From 9afe98ddd7506561f70b4356fbde69731e339042 Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Mon, 12 Jun 2017 20:00:20 -0700 Subject: int-str: allow radix to be #\c for C conventions. * lib.c (int_str): Support #\c base, recognizing 0x as hex and leading 0 as octal. We don't rely on the wcstol function's ability to do this conversion, but scan it ourselves. * txr.1: Documented. --- lib.c | 60 ++++++++++++++++++++++++++++++++++++++++++------------------ txr.1 | 32 ++++++++++++++++++++++++++++---- 2 files changed, 70 insertions(+), 22 deletions(-) diff --git a/lib.c b/lib.c index f53c467b..951d540e 100644 --- a/lib.c +++ b/lib.c @@ -4327,34 +4327,53 @@ val int_str(val str, val base) wchar_t *ptr; long value; cnum b = c_num(default_arg(base, num_fast(10))); + int zerox = 0, octzero = 0, minus = 0, flip = 0; - /* Standard C idiocy: if base is 16, strtoul and its siblings - still recognize the 0x prefix. */ - if (b == 16) { - switch (wcs[0]) { - case '+': - case '-': - switch (wcs[1]) { - case '0': - switch (wcs[2]) { - case 'x': case 'X': - return zero; - } - } - break; + switch (wcs[0]) { + case '-': + minus = 1; + /* fallthrough */ + case '+': + switch (wcs[1]) { case '0': - switch (wcs[1]) { + switch (wcs[2]) { case 'x': case 'X': - return zero; + zerox = 1; + wcs += 3; + flip = minus; + break; + default: + octzero = 1; + break; } - break; } + break; + case '0': + switch (wcs[1]) { + case 'x': case 'X': + zerox = 1; + wcs += 2; + default: + octzero = 1; + } + break; + } + + if (base == chr('c')) { + b = (zerox ? 16 : (octzero ? 8 : 10)); + } else if (b == 16) { + /* If base is 16, strtoul and its siblings + still recognize the 0x prefix. We don't want that; + except if base is the character #\c. Otherwise, + it is a zero with trailing junk. */ + if (zerox) + return zero; } else if (b < 2 || b > 36) { uw_throwf(error_s, lit("int-str: invalid base ~s"), base, nao); } /* TODO: detect if we have wcstoll */ - value = wcstol(wcs, &ptr, b ? b : 10); + value = wcstol(wcs, &ptr, b); if (value == 0 && ptr == wcs) return nil; @@ -4371,6 +4390,8 @@ val int_str(val str, val base) if (err != MP_OKAY) return nil; + if (flip) + mp_neg(mp(bignum), mp(bignum)); /* If wcstol overflowed, but the range of long is smaller than that of fixnums, that means that the value might not actually be a bignum, and so we must normalize. @@ -4379,6 +4400,9 @@ val int_str(val str, val base) return (LONG_MAX < NUM_MAX) ? normalize(bignum) : bignum; } + if (flip) + value = -value; + if (value >= NUM_MIN && value <= NUM_MAX) return num(value); diff --git a/txr.1 b/txr.1 index 08b9c0ce..e46d52a0 100644 --- a/txr.1 +++ b/txr.1 @@ -33489,8 +33489,13 @@ ignored. The .code int-str function converts a string of digits in the specified -radix to an integer value. If the radix isn't specified, it defaults to 10. -Otherwise it must be an integer in the range 2 to 36. +.meta radix +to an integer value. If +.meta radix +isn't specified, it defaults to 10. +Otherwise it must be an integer in the range 2 to 36, or else the character +.codn #\c . + For radices above 10, letters of the alphabet are used for digits: .code A @@ -33499,11 +33504,30 @@ represent a digit whose value is 10, represents 11 and so forth until .codn Z . -For values of radix above 36, the returned value is -unspecified. Upper and lower case letters are recognized. +Upper and lower case letters are recognized. Any character which is not a digit of the specified radix is regarded as the start of trailing junk at which the extraction of the digits stops. +When +.meta radix +is specified as the character object +.codn #\c , +this indicates that a C-language-style integer constant should be +recognized. If, after any optional sign, the remainder of +.meta string +begins with the character pair +.code 0x +then that pair is considered removed from the string, and it is treated +as base 16 (hexadecimal). If, after any optional sign, the remainder of +.meta string +begins with a leading zero not followed by +.codn x , +then the radix is taken to be 8 (octal). In scanning these formats, +.code int-str +function is not otherwise constrained by C language representational +limitations. Specifically, the input values are taken to be the printed +representation of arbitrary-precision integers and treated accordingly. + The .code flo-str function converts a floating-point decimal notation to a nearby -- cgit v1.2.3