From 3fcfbdf34170d6ee499e5ecb78cb4072c098f9d0 Mon Sep 17 00:00:00 2001 From: Kaz Kylheku Date: Thu, 21 Apr 2016 20:42:10 -0700 Subject: Harmonize rules for string and character printing. In this patch we change which characters objects are printed using hex escapes, and which characters are printed as hex when printing string literals. * lib.c (obj_print_impl): Add DEL (U+7F) to the list of character objects which are printed as hex. In a string literal, it's already printed as \x7F. Use upper case hex rather than lower case. (out_str_char): Copy the rules used by obj_print_impl for deciding what string constituents to print as hex and how to print it. So for instance, U+80 to U+A0 will now print in hex as well as the U+D800 to U+DFFF range, rather than just U+DC00 to U+DCFF, and the BOM code U+FFFE, U+FFFF and anything higher. --- lib.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/lib.c b/lib.c index 0c648534..677cce60 100644 --- a/lib.c +++ b/lib.c @@ -8679,11 +8679,22 @@ void out_str_char(wchar_t ch, val out, int *semi_flag) case '\\': put_string(lit("\\\\"), out); break; case 27: put_string(lit("\\e"), out); break; default: - if ((ch >= ' ' && ch != 127 && ch < 0xDC00) || ch > 0xDCFF) { - put_char(chr(ch), out); - } else { - format(out, lit("\\x~,02X"), num(ch), nao); - *semi_flag = 1; + { + val fmt = nil; + + if ((ch < 0x20) || (ch >= 0x7F && ch < 0xA0)) + fmt = lit("\\x~,02X"); + else if ((ch >= 0xD800 && ch < 0xE000) || ch == 0xFFFE || ch == 0xFFFF) + fmt = lit("\\x~,04X"); + else if (ch >= 0xFFFF) + fmt = lit("\\x~,06X"); + else + put_char(chr(ch), out); + + if (fmt) { + format(out, fmt, num(ch), nao); + *semi_flag = 1; + } } } } @@ -8862,6 +8873,7 @@ finish: put_char(obj, out); } else { wchar_t ch = c_chr(obj); + val fmt = nil; put_string(lit("#\\"), out); switch (ch) { @@ -8877,14 +8889,17 @@ finish: case ' ': put_string(lit("space"), out); break; case 0xDC00: put_string(lit("pnul"), out); break; default: - if ((ch < 0x20) || (ch >= 0x80 && ch < 0xA0)) - format(out, lit("x~,02x"), num(ch), nao); + if ((ch < 0x20) || (ch >= 0x7F && ch < 0xA0)) + fmt = lit("x~,02X"); else if ((ch >= 0xD800 && ch < 0xE000) || ch == 0xFFFE || ch == 0xFFFF) - format(out, lit("x~,04x"), num(ch), nao); + fmt = lit("x~,04X"); else if (ch >= 0xFFFF) - format(out, lit("x~,06x"), num(ch), nao); + fmt = lit("x~,06X"); else put_char(chr(ch), out); + + if (fmt) + format(out, fmt, num(ch), nao); } } break; -- cgit v1.2.3