ADDED: write_term/2 options format_float(+Format) and format_integer(…

…+Format) Using a format seems a more sensible way to control writing numbers from write_term/2. This patch removes the previously introduced radix(+Radix) option. If we want this back we will implement it on top of format_integer(+Format). This patch also contains a lot of small code cleanup and some optimization to low-level I/O.
SWI-Prolog · Nov 15, 2024 · a49fe8c · a49fe8c
1 parent 841a487
commit a49fe8c
Show file tree

Hide file tree

Showing 13 changed files with 200 additions and 175 deletions.
diff --git a/man/builtin.doc b/man/builtin.doc
@@ -6535,6 +6535,12 @@ a space (default) or newline if the \term{nl}{true} option is also
 given.\footnote{Compatible with
 \href{http://eclipseclp.org/doc/bips/kernel/ioterm/write_term-3.html}{ECLiPSe}}
 
+    \termitem{float_format}{+Atom}
+Print floating point numbers using format/2 as \term{format}{Atom,
+[Float]}. The default is \verb$~h$. This option is compatible with
+SICStus. See format/2 for valid format specifiers and the
+\const{integer_format} option for additional comments.
+
     \termitem{ignore_ops}{Bool}
 If \const{true}, the generic term representation (<functor>(<args>
 \ldots)) will be used for all terms.  Otherwise (default), operators
@@ -6543,6 +6549,16 @@ flag also stops the syntactic sugar notation for lists and brace terms.
 In SWI-Prolog, these are controlled by the separate options
 \const{dotlists} and \const{brace_terms}}.
 
+    \termitem{integer_format}{+Atom}
+Print integers using format/2 as \term{format}{Atom, [Int]}.  The
+default is \verb$~d$. This allows to print integers using an
+alternative \jargon{radix}, using e.g.\, \verb$~16r$ or \verb$0x~16r$
+or to use digit grouping using e.g.\ \verb$~D$. Note that the user
+is reponsible to provide a format that produces valid Prolog syntax if
+the term must be readable by Prolog.  The format must accept exactly
+one argument.  If that is not satisfied, printing an integer results
+in an exception.  See format/2 for for valid format specifiers.
+
     \termitem{max_depth}{Integer}
 If the term is nested deeper than \arg{Integer}, print the remainder
 as ellipses (\ldots).  A 0 (zero) value (default) imposes no depth limit.
@@ -9700,12 +9716,12 @@ stream and uses the position information of the temporary stream to
 update its notion of the position.  Notable ansi_format/3 cooperates
 properly in callbacks.\footnote{As of version 8.3.30}.
 
-Numeric conversion (\chr{d}, \chr{D}, \chr{e}, \chr{E}, \chr{f}, \chr{g}
-and \chr{G}) accept an arithmetic expression as argument. This is
-introduced to handle rational numbers transparently (see
-\secref{rational}).  The floating point conversions allow for unlimited
-precision for printing rational numbers in decimal form.  E.g., the
-following will write as many 3's as you want by changing the `50'.
+Numeric conversion (\chr{d}, \chr{D}, \chr{e}, \chr{E}, \chr{f},
+\chr{g}, \chr{G}, \chr{h} and \chr{H}) accept an arithmetic expression
+as argument. This is introduced to handle rational numbers transparently
+(see \secref{rational}). The floating point conversions allow for
+unlimited precision for printing rational numbers in decimal form. E.g.,
+the following will write as many 3's as you want by changing the `50'.
 
 \begin{code}
 ?- format('~50f', [10 rdiv 3]).

diff --git a/src/ATOMS b/src/ATOMS
@@ -338,6 +338,7 @@ A flag			"flag"
 A flag_value		"flag_value"
 A float			"float"
 A float_format		"float_format"
+A float_format_specifier "~h"
 A float_fractional_part	"float_fractional_part"
 A float_integer_part	"float_integer_part"
 A float_overflow	"float_overflow"
@@ -441,9 +442,11 @@ A inserted_char		"inserted_char"
 A instantiation_error	"instantiation_error"
 A int			"int"
 A int64_t		"int64_t"
+A int_format_specifier  "~d"
 A int_overflow		"int_overflow"
 A integer		"integer"
 A integer_expression	"integer_expression"
+A integer_format	"integer_format"
 A interrupt		"interrupt"
 A invalid		"invalid"
 A io_error		"io_error"

diff --git a/src/os/pl-fmt.c b/src/os/pl-fmt.c
@@ -86,7 +86,8 @@ typedef struct
 			  } \
 			}
 #define FMT_ERROR(fmt)	return PL_error(NULL, 0, NULL, ERR_FORMAT, fmt)
-#define FMT_ARG(c, a)	return PL_error(NULL, 0, NULL, ERR_FORMAT_ARG, c, a)
+#define FMT_ARG(s, a)	return PL_error(NULL, 0, NULL, ERR_FORMAT_ARG, s, a)
+#define FMT_ARGC(c, a)  do { char f[2] = {(char)c}; FMT_ARG(f, a); } while(0)
 #define FMT_EXEPTION()	return false
 
 
@@ -96,14 +97,24 @@ static PL_locale prolog_locale =
 };
 
 
-static int
-update_column(int col, int c)
-{ switch(c)
-  { case '\n':	return 0;
-    case '\r':  return 0;
-    case '\t':	return (col + 1) | 0x7;
-    case '\b':	return (col <= 0 ? 0 : col - 1);
-    default:	return col + 1;
+static inline void
+update_column(format_state *state, int c)
+{ if ( likely(c >= ' ') )
+  { state->column++;
+  } else
+  { switch(c)
+    { case '\n':
+	state->column = 0;
+	break;
+      case '\t':
+	state->column = (state->column+1)|0x7;
+      case '\b':
+	if ( likely(state->column>0) )
+	  state->column--;
+	break;
+      default:
+	state->column++;
+    }
   }
 }
 
@@ -114,7 +125,7 @@ UTF-8 format in the state's `buffer'.   The  `buffered' field represents
 the number of UTF-8 characters in the buffer.
 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 
-static WUNUSED int
+static WUNUSED bool
 outchr(format_state *state, int chr)
 { if ( state->pending_rubber )
   { if ( chr > 0x7f )
@@ -134,7 +145,7 @@ outchr(format_state *state, int chr)
       return false;
   }
 
-  state->column = update_column(state->column, chr);
+  update_column(state, chr);
 
   return true;
 }
@@ -161,35 +172,50 @@ outstring(format_state *state, const char *s, size_t len)
   }
 
   for(q=s; q < e; q++)
-    state->column = update_column(state->column, *q&0xff);
+    update_column(state, *q&0xff);
 
   return true;
 }
 
 
-static WUNUSED int
+static WUNUSED bool
 oututf8(format_state *state, const char *s, size_t len)
 { const char *e = &s[len];
 
-  while(s<e)
-  { int chr;
+  if ( !state->pending_rubber )
+  { while(s<e)
+    { int chr = s[0];
 
-    PL_utf8_code_point(&s, e, &chr);
-    if ( !outchr(state, chr) )
-      return false;
+      if ( likely(chr < 0x80) )
+	s++;
+      else
+	PL_utf8_code_point(&s, e, &chr);
+
+      if ( Sputcode(chr, state->out) < 0 )
+	return false;
+      update_column(state, chr);
+    }
+  } else
+  { while(s<e)
+    { int chr;
+
+      PL_utf8_code_point(&s, e, &chr);
+      if ( !outchr(state, chr) )
+	return false;
+    }
   }
 
   return true;
 }
 
 
-static WUNUSED int
+static WUNUSED bool
 oututf80(format_state *state, const char *s)
 { return oututf8(state, s, strlen(s));
 }
 
 
-static WUNUSED int
+static WUNUSED bool
 outtext(format_state *state, PL_chars_t *txt)
 { switch(txt->encoding)
   { case ENC_ISO_LATIN_1:
@@ -218,9 +244,6 @@ outtext(format_state *state, PL_chars_t *txt)
 
 #define format_predicates (GD->format.predicates)
 
-static int	update_column(int, Char);
-static bool	do_format(IOSTREAM *fd, PL_chars_t *fmt,
-			  int ac, term_t av, Module m);
 static void	distribute_rubber(struct rubber *, int, int);
 static WUNUSED int emit_rubber(format_state *state);
 
@@ -462,7 +485,7 @@ end_sub_format(sub_state *state, int rc)
 		*       ACTUAL FORMATTING	*
 		********************************/
 
-static bool
+bool
 do_format(IOSTREAM *fd, PL_chars_t *fmt, int argc, term_t argv, Module m)
 { GET_LD
   format_state state;			/* complete state */
@@ -609,23 +632,25 @@ do_format(IOSTREAM *fd, PL_chars_t *fmt, int argc, term_t argv, Module m)
 	      case 'G':			/* shortest of 'f' and 'E' */
 	      case 'h':
 	      case 'H':			/* Precise */
-		{ AR_CTX
-		  number n;
-		  union {
-		  tmp_buffer b;
+		{ number n;
+		  union
+		  { tmp_buffer b;
 		    buffer b1;
 		  } u;
 		  PL_locale *l;
+		  AR_CTX
 
 		  NEED_ARG;
 		  AR_BEGIN();
-		  if ( !valueExpression(argv, &n) )
-		  { char f[2];
-
-		    f[0] = (char)c;
-		    f[1] = EOS;
-		    AR_CLEANUP();
-		    FMT_ARG(f, argv); /* returns error */
+		  if ( !PL_get_number(argv, &n) )
+		  { if ( !valueExpression(argv, &n) )
+		    { char f[2];
+
+		      f[0] = (char)c;
+		      f[1] = EOS;
+		      AR_CLEANUP();
+		      FMT_ARG(f, argv); /* returns error */
+		    }
 		  }
 		  SHIFT;
 
@@ -651,22 +676,26 @@ do_format(IOSTREAM *fd, PL_chars_t *fmt, int argc, term_t argv, Module m)
 	      case 'r':			/* radix number */
 	      case 'R':			/* Radix number */
 	      case 'I':			/* Prolog 1_000_000 */
-		{ AR_CTX
-		  number i;
+		{ number i;
 		  tmp_buffer b;
 		  char *si;
+		  AR_CTX
 
 		  NEED_ARG;
 		  AR_BEGIN();
-		  if ( !valueExpression(argv, &i) ||
-		       !toIntegerNumber(&i, 0) )
-		  { char f[2];
-
-		    f[0] = (char)c;
-		    f[1] = EOS;
-		    AR_CLEANUP();
-		    FMT_ARG(f, argv);
+		  if ( !PL_get_number(argv, &i) )
+		  { if ( !valueExpression(argv, &i) )
+		    { AR_CLEANUP();
+		      FMT_ARGC(c, argv); /* return with error */
+		    }
 		  }
+		  if ( !isIntegerNumber(&i) )
+		  { if ( !toIntegerNumber(&i, 0) )
+		    { AR_CLEANUP();
+		      FMT_ARGC(c, argv);
+		    }
+		  }
+
 		  SHIFT;
 		  initBuffer(&b);
 		  if ( c == 'd' || c == 'D' )
@@ -894,7 +923,7 @@ do_format(IOSTREAM *fd, PL_chars_t *fmt, int argc, term_t argv, Module m)
 		    { rc = false;
 		      goto out;
 		    }
-		    state.column = update_column(state.column, '\n');
+		    update_column(&state, '\n');
 
 		    state.rub[0].where = state.buffered;
 		    state.rub[0].pad = nl_and_reindent;

diff --git a/src/os/pl-fmt.h b/src/os/pl-fmt.h
@@ -5,7 +5,7 @@
     WWW:           http://www.swi-prolog.org
     Copyright (c)  2024, University of Amsterdam
                          VU University Amsterdam
-		         CWI, Amsterdam
+			 CWI, Amsterdam
     All rights reserved.
 
     Redistribution and use in source and binary forms, with or without
@@ -39,5 +39,7 @@
 
 COMMON(char *)	formatInteger(PL_locale *locale, int div, int radix,
 			      bool smll, Number n, Buffer out);
+COMMON(bool)	do_format(IOSTREAM *fd, PL_chars_t *fmt,
+			  int argc, term_t argv, Module m);
 
 #endif /*FMT_H_INCLUDED*/
diff --git a/src/os/pl-stream.c b/src/os/pl-stream.c
@@ -730,7 +730,7 @@ static inline void
 update_linepos(IOSTREAM *s, int c)
 { IOPOS *p = s->position;
 
-  if ( c > '\r' )			/* speedup the 99% case a bit */
+  if ( likely(c > '\r') )	/* speedup the 99% case a bit */
   { p->linepos++;
     return;
   }
@@ -942,7 +942,6 @@ put_code(int c, IOSTREAM *s)
 	  return -1;
 	break;
       }
-    simple:
       if ( put_byte(c, s) < 0 )
 	return -1;
       break;
@@ -952,7 +951,9 @@ put_code(int c, IOSTREAM *s)
 	  return -1;
 	break;
       }
-      goto simple;
+      if ( put_byte(c, s) < 0 )
+	return -1;
+      break;
     case ENC_ANSI:
     { char b[PL_MB_LEN_MAX];
       size_t n;
@@ -981,8 +982,11 @@ put_code(int c, IOSTREAM *s)
     { char buf[6];
       char *p, *end;
 
-      if ( c < 128 )
-	goto simple;
+      if ( likely(c < 128) )
+      { if ( put_byte(c, s) < 0 )
+	  return -1;
+	break;
+      }
 
       end = utf8_put_char(buf, c);
       for(p=buf; p<end; p++)

diff --git a/src/os/pl-string.c b/src/os/pl-string.c
@@ -67,18 +67,6 @@ remove_string(char *s)
 		 *	     NUMBERS		*
 		 *******************************/
 
-/*  Return the character representing some digit.
-
- ** Fri Jun 10 10:45:40 1988  [email protected] (Jan Wielemaker)  */
-
-char
-digitName(int n, int smll)
-{ if (n <= 9)
-    return (char)(n + '0');
-  return (char)(n + (smll ? 'a' : 'A') - 10);
-}
-
-
 /*  Return the value of a digit when transforming a number of base 'b'.
     Return '-1' if it is an illegal digit.