From 9afe98ddd7506561f70b4356fbde69731e339042 Mon Sep 17 00:00:00 2001
From: Kaz Kylheku <kaz@kylheku.com>
Date: Mon, 12 Jun 2017 20:00:20 -0700
Subject: int-str: allow radix to be #\c for C conventions.

* lib.c (int_str): Support #\c base, recognizing
0x as hex and leading 0 as octal. We don't rely on
the wcstol function's ability to do this conversion,
but scan it ourselves.

* txr.1: Documented.
---
 lib.c | 60 ++++++++++++++++++++++++++++++++++++++++++------------------
 txr.1 | 32 ++++++++++++++++++++++++++++----
 2 files changed, 70 insertions(+), 22 deletions(-)

diff --git a/lib.c b/lib.c
index f53c467b..951d540e 100644
--- a/lib.c
+++ b/lib.c
@@ -4327,34 +4327,53 @@ val int_str(val str, val base)
   wchar_t *ptr;
   long value;
   cnum b = c_num(default_arg(base, num_fast(10)));
+  int zerox = 0, octzero = 0, minus = 0, flip = 0;
 
-  /* Standard C idiocy: if base is 16, strtoul and its siblings
-     still recognize the 0x prefix. */
-  if (b == 16) {
-    switch (wcs[0]) {
-    case '+':
-    case '-':
-      switch (wcs[1]) {
-      case '0':
-        switch (wcs[2]) {
-        case 'x': case 'X':
-          return zero;
-        }
-      }
-      break;
+  switch (wcs[0]) {
+  case '-':
+    minus = 1;
+    /* fallthrough */
+  case '+':
+    switch (wcs[1]) {
     case '0':
-      switch (wcs[1]) {
+      switch (wcs[2]) {
       case 'x': case 'X':
-        return zero;
+        zerox = 1;
+        wcs += 3;
+        flip = minus;
+        break;
+      default:
+        octzero = 1;
+        break;
       }
-      break;
     }
+    break;
+  case '0':
+    switch (wcs[1]) {
+    case 'x': case 'X':
+      zerox = 1;
+      wcs += 2;
+    default:
+      octzero = 1;
+    }
+    break;
+  }
+
+  if (base == chr('c')) {
+    b = (zerox ? 16 : (octzero ? 8 : 10));
+  } else if (b == 16) {
+    /* If base is 16, strtoul and its siblings
+       still recognize the 0x prefix. We don't want that;
+       except if base is the character #\c. Otherwise,
+       it is a zero with trailing junk. */
+    if (zerox)
+      return zero;
   } else if (b < 2 || b > 36) {
      uw_throwf(error_s, lit("int-str: invalid base ~s"), base, nao);
   }
 
   /* TODO: detect if we have wcstoll */
-  value = wcstol(wcs, &ptr, b ? b : 10);
+  value = wcstol(wcs, &ptr, b);
 
   if (value == 0 && ptr == wcs)
     return nil;
@@ -4371,6 +4390,8 @@ val int_str(val str, val base)
     if (err != MP_OKAY)
       return nil;
 
+    if (flip)
+      mp_neg(mp(bignum), mp(bignum));
     /* If wcstol overflowed, but the range of long is smaller than
        that of fixnums, that means that the value might not
        actually be a bignum, and so we must normalize.
@@ -4379,6 +4400,9 @@ val int_str(val str, val base)
     return (LONG_MAX < NUM_MAX) ? normalize(bignum) : bignum;
   }
 
+  if (flip)
+    value = -value;
+
   if (value >= NUM_MIN && value <= NUM_MAX)
     return num(value);
 
diff --git a/txr.1 b/txr.1
index 08b9c0ce..e46d52a0 100644
--- a/txr.1
+++ b/txr.1
@@ -33489,8 +33489,13 @@ ignored.
 The
 .code int-str
 function converts a string of digits in the specified
-radix to an integer value. If the radix isn't specified, it defaults to 10.
-Otherwise it must be an integer in the range 2 to 36.
+.meta radix
+to an integer value. If
+.meta radix
+isn't specified, it defaults to 10.
+Otherwise it must be an integer in the range 2 to 36, or else the character
+.codn #\c .
+
 For radices above 10, letters of the alphabet
 are used for digits:
 .code A
@@ -33499,11 +33504,30 @@ represent a digit whose value is 10,
 represents 11 and
 so forth until
 .codn Z .
-For values of radix above 36, the returned value is
-unspecified. Upper and lower case letters are recognized.
+Upper and lower case letters are recognized.
 Any character which is not a digit of the specified radix is regarded
 as the start of trailing junk at which the extraction of the digits stops.
 
+When
+.meta radix
+is specified as the character object
+.codn #\c ,
+this indicates that a C-language-style integer constant should be
+recognized.  If, after any optional sign, the remainder of
+.meta string
+begins with the character pair
+.code 0x
+then that pair is considered removed from the string, and it is treated
+as base 16 (hexadecimal).  If, after any optional sign, the remainder of
+.meta string
+begins with a leading zero not followed by
+.codn x ,
+then the radix is taken to be 8 (octal). In scanning these formats,
+.code int-str
+function is not otherwise constrained by C language representational
+limitations. Specifically, the input values are taken to be the printed
+representation of arbitrary-precision integers and treated accordingly.
+
 The
 .code flo-str
 function converts a floating-point decimal notation to a nearby
-- 
cgit v1.2.3