summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaz Kylheku <kaz@kylheku.com>2017-06-12 20:00:20 -0700
committerKaz Kylheku <kaz@kylheku.com>2017-06-12 20:00:20 -0700
commit9afe98ddd7506561f70b4356fbde69731e339042 (patch)
tree20cf813f0330826ae03f32e381638aab05274521
parent2f9728db321c67ee6c41372914a4a56a5809356d (diff)
downloadtxr-9afe98ddd7506561f70b4356fbde69731e339042.tar.gz
txr-9afe98ddd7506561f70b4356fbde69731e339042.tar.bz2
txr-9afe98ddd7506561f70b4356fbde69731e339042.zip
int-str: allow radix to be #\c for C conventions.
* lib.c (int_str): Support #\c base, recognizing 0x as hex and leading 0 as octal. We don't rely on the wcstol function's ability to do this conversion, but scan it ourselves. * txr.1: Documented.
-rw-r--r--lib.c60
-rw-r--r--txr.132
2 files changed, 70 insertions, 22 deletions
diff --git a/lib.c b/lib.c
index f53c467b..951d540e 100644
--- a/lib.c
+++ b/lib.c
@@ -4327,34 +4327,53 @@ val int_str(val str, val base)
wchar_t *ptr;
long value;
cnum b = c_num(default_arg(base, num_fast(10)));
+ int zerox = 0, octzero = 0, minus = 0, flip = 0;
- /* Standard C idiocy: if base is 16, strtoul and its siblings
- still recognize the 0x prefix. */
- if (b == 16) {
- switch (wcs[0]) {
- case '+':
- case '-':
- switch (wcs[1]) {
- case '0':
- switch (wcs[2]) {
- case 'x': case 'X':
- return zero;
- }
- }
- break;
+ switch (wcs[0]) {
+ case '-':
+ minus = 1;
+ /* fallthrough */
+ case '+':
+ switch (wcs[1]) {
case '0':
- switch (wcs[1]) {
+ switch (wcs[2]) {
case 'x': case 'X':
- return zero;
+ zerox = 1;
+ wcs += 3;
+ flip = minus;
+ break;
+ default:
+ octzero = 1;
+ break;
}
- break;
}
+ break;
+ case '0':
+ switch (wcs[1]) {
+ case 'x': case 'X':
+ zerox = 1;
+ wcs += 2;
+ default:
+ octzero = 1;
+ }
+ break;
+ }
+
+ if (base == chr('c')) {
+ b = (zerox ? 16 : (octzero ? 8 : 10));
+ } else if (b == 16) {
+ /* If base is 16, strtoul and its siblings
+ still recognize the 0x prefix. We don't want that;
+ except if base is the character #\c. Otherwise,
+ it is a zero with trailing junk. */
+ if (zerox)
+ return zero;
} else if (b < 2 || b > 36) {
uw_throwf(error_s, lit("int-str: invalid base ~s"), base, nao);
}
/* TODO: detect if we have wcstoll */
- value = wcstol(wcs, &ptr, b ? b : 10);
+ value = wcstol(wcs, &ptr, b);
if (value == 0 && ptr == wcs)
return nil;
@@ -4371,6 +4390,8 @@ val int_str(val str, val base)
if (err != MP_OKAY)
return nil;
+ if (flip)
+ mp_neg(mp(bignum), mp(bignum));
/* If wcstol overflowed, but the range of long is smaller than
that of fixnums, that means that the value might not
actually be a bignum, and so we must normalize.
@@ -4379,6 +4400,9 @@ val int_str(val str, val base)
return (LONG_MAX < NUM_MAX) ? normalize(bignum) : bignum;
}
+ if (flip)
+ value = -value;
+
if (value >= NUM_MIN && value <= NUM_MAX)
return num(value);
diff --git a/txr.1 b/txr.1
index 08b9c0ce..e46d52a0 100644
--- a/txr.1
+++ b/txr.1
@@ -33489,8 +33489,13 @@ ignored.
The
.code int-str
function converts a string of digits in the specified
-radix to an integer value. If the radix isn't specified, it defaults to 10.
-Otherwise it must be an integer in the range 2 to 36.
+.meta radix
+to an integer value. If
+.meta radix
+isn't specified, it defaults to 10.
+Otherwise it must be an integer in the range 2 to 36, or else the character
+.codn #\c .
+
For radices above 10, letters of the alphabet
are used for digits:
.code A
@@ -33499,11 +33504,30 @@ represent a digit whose value is 10,
represents 11 and
so forth until
.codn Z .
-For values of radix above 36, the returned value is
-unspecified. Upper and lower case letters are recognized.
+Upper and lower case letters are recognized.
Any character which is not a digit of the specified radix is regarded
as the start of trailing junk at which the extraction of the digits stops.
+When
+.meta radix
+is specified as the character object
+.codn #\c ,
+this indicates that a C-language-style integer constant should be
+recognized. If, after any optional sign, the remainder of
+.meta string
+begins with the character pair
+.code 0x
+then that pair is considered removed from the string, and it is treated
+as base 16 (hexadecimal). If, after any optional sign, the remainder of
+.meta string
+begins with a leading zero not followed by
+.codn x ,
+then the radix is taken to be 8 (octal). In scanning these formats,
+.code int-str
+function is not otherwise constrained by C language representational
+limitations. Specifically, the input values are taken to be the printed
+representation of arbitrary-precision integers and treated accordingly.
+
The
.code flo-str
function converts a floating-point decimal notation to a nearby