/* Copyright 2009-2022 * Kaz Kylheku * Vancouver, Canada * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include "config.h" #include "lib.h" #include "hash.h" #include "signal.h" #include "unwind.h" #include "match.h" #include "filter.h" #include "eval.h" #include "stream.h" #include "buf.h" val filters_s; val filter_k, lfilt_k, rfilt_k, tohtml_k, fromhtml_k; val tohtml_star_k; val upcase_k, downcase_k; val topercent_k, frompercent_k, tourl_k, fromurl_k, tobase64_k, frombase64_k; val tobase64url_k, frombase64url_k; val tonumber_k, toint_k, tofloat_k, hextoint_k; static val make_trie(void) { return make_hash(hash_weak_none, nil); } static val trie_add(val trie, val key, val value) { val node, i, len = length_str(key); for (node = trie, i = zero; lt(i, len); i = plus(i, one)) { val ch = chr_str(key, i); val newnode_p; loc place = gethash_l(lit("trie-add"), node, ch, mkcloc(newnode_p)); if (newnode_p) set(place, make_hash(hash_weak_none, nil)); node = deref(place); } set_hash_userdata(node, value); return node; } /* * Reduce the storage requirement for a the trie, by applying * these rules: * * 1. All leaf-level nodes (i.e. empty hash tables) are replaced by * just the node value (hash user data). * 2. All hash tables with a single transition (i.e. hash tables * containing one element) and which have no node value * are replaced by a cons cell, whose CAR is the transition * character, and whose CDR is the transition. */ static void trie_compress(loc ptrie) { val trie = deref(ptrie); if (hashp(trie)) { val count = hash_count(trie); val value = get_hash_userdata(trie); if (zerop(count)) { set(ptrie, value); } else if (count == one && nilp(value)) { struct hash_iter hi; val cell = (us_hash_iter_init(&hi, trie), hash_iter_next(&hi)); set(ptrie, cons(us_car(cell), us_cdr(cell))); trie_compress(cdr_l(deref(ptrie))); } else { val cell; struct hash_iter hi; us_hash_iter_init(&hi, trie); for (cell = hash_iter_next(&hi); cell; cell = hash_iter_next(&hi)) trie_compress(mkloc(*us_cdr_p(cell), cell)); } } else if (consp(trie)) { trie_compress(cdr_l(trie)); } } static val trie_compress_intrinsic(val ptrie) { trie_compress(mkcloc(ptrie)); return ptrie; } static val regex_from_trie(val trie) { switch (type(trie)) { case NIL: return t; case CONS: { val a = car(trie); val d = cdr(trie); switch (type(d)) { case CONS: { val rx = regex_from_trie(d); if (consp(rx) && car(rx) == compound_s) return cons(compound_s, cons(a, cdr(rx))); return list(compound_s, a, rx, nao); } case COBJ: if (d->co.cls == hash_cls) return list(compound_s, a, regex_from_trie(d), nao); /* fallthrough */ default: return a; } } case COBJ: if (trie->co.cls == hash_cls) { if (zerop(hash_count(trie))) { return tnil(!get_hash_userdata(trie)); } else { val out = nil; val cell; struct hash_iter hi; us_hash_iter_init(&hi, trie); while ((cell = hash_iter_next(&hi)) != nil) { val a = us_car(cell); val d = us_cdr(cell); val rx = if2(consp(d) || hashp(d), regex_from_trie(d)); val ry = if3(consp(rx) && car(rx) == compound_s, cons(compound_s, cons(a, cdr(rx))), if3(rx, list(compound_s, a, rx, nao), a)); if (out) out = list(or_s, ry, out, nao); else out = ry; } if (get_hash_userdata(trie)) out = list(or_s, nil, out, nao); return out; } } /* fallthrough */ default: uw_throwf(error_s, lit("regex-from-trie: bad trie element ~s"), trie, nao); } } val trie_lookup_begin(val trie) { return trie; } val trie_value_at(val node) { if (hashp(node)) return get_hash_userdata(node); if (consp(node)) return nil; if (functionp(node)) return nil; return node; } val trie_lookup_feed_char(val node, val ch) { if (hashp(node)) return gethash(node, ch); if (functionp(node)) return funcall1(node, ch); if (consp(node) && ch == car(node)) return cdr(node); return nil; } static val string_tree_filter(val tree, val filter) { return filter_string_tree(filter, tree); } static val compound_filter(val filter_list, val string) { return reduce_left(func_n2(string_tree_filter), filter_list, string, nil); } val get_filter(val spec) { if (consp(spec)) { if (car(spec) == fun_k) { return pa_123_2(func_n3(match_filter), second(spec), rest(rest(spec))); } else { val filter_list = mapcar(func_n1(get_filter), spec); if (memqual(nil, filter_list)) return nil; return pa_12_2(func_n2(compound_filter), filter_list); } } return gethash(filters, spec); } struct filter_pair { const wchli_t *key, *value; }; static val build_filter(struct filter_pair *pair, val compress_p) { int i; val trie = make_trie(); for (i = 0; pair[i].key; i++) trie_add(trie, static_str(pair[i].key), static_str(pair[i].value)); if (compress_p) trie_compress(mkcloc(trie)); return trie; } static val build_filter_from_list(val list) { val trie = make_trie(); val iter; for (iter = list; iter; iter = cdr(iter)) { val tuple = reverse(car(iter)); mapcar(pa_123_2(func_n3(trie_add), trie, first(tuple)), rest(tuple)); } trie_compress(mkcloc(trie)); return trie; } static val trie_filter_string(val filter, val str) { val len = length_str(str); val i; val out = string(L""); for (i = zero; lt(i, len); ) { val node = trie_lookup_begin(filter); val match = nil; val subst = nil; val j; for (j = i; lt(j, len); j = plus(j, one)) { val ch = chr_str(str, j); val nnode = trie_lookup_feed_char(node, ch); val nsubst; if (!nnode) break; if ((nsubst = trie_value_at(nnode))) { match = j; subst = nsubst; } node = nnode; } if (match) { string_extend(out, subst, nil); i = plus(match, one); } else { string_extend(out, chr_str(str, i), nil); i = plus(i, one); } } return out; } val filter_string_tree(val filter, val obj) { val self = lit("filter-string-tree"); switch (type(obj)) { case NIL: return nil; case CONS: return mapcar(pa_12_2(func_n2(filter_string_tree), filter), obj); default: { val type = typeof(filter); if (type == null_s) return obj; if (type == hash_s || type == cons_s) return trie_filter_string(filter, obj); else if (type == fun_s) return funcall1(filter, obj); uw_throwf(type_error_s, lit("~a: ~s isn't a filter"), self, filter, nao); } } } val filter_equal(val lfilt, val rfilt, val left, val right) { return equal(filter_string_tree(lfilt, left), filter_string_tree(rfilt, right)); } val register_filter(val sym, val table) { return sethash(filters, sym, build_filter_from_list(table)); } static struct filter_pair tohtml_table[] = { { wli("<"), wli("<") }, { wli(">"), wli(">") }, { wli("&"), wli("&") }, { wli("\""), wli(""") }, { wli("'"), wli("'") }, { 0, 0 } }; static struct filter_pair tohtml_star_table[] = { { wli("<"), wli("<") }, { wli(">"), wli(">") }, { wli("&"), wli("&") }, { 0, 0 } }; static struct filter_pair fromhtml_table[] = { { wli("""), wli("\"") }, { wli("&"), wli("&") }, { wli("'"), wli("'") }, { wli("<"), wli("<") }, { wli(">"), wli(">") }, { wli(" "), wli("\x00A0") }, { wli("¡"), wli("\x00A1") }, { wli("¢"), wli("\x00A2") }, { wli("£"), wli("\x00A3") }, { wli("¤"), wli("\x00A4") }, { wli("¥"), wli("\x00A5") }, { wli("¦"), wli("\x00A6") }, { wli("§"), wli("\x00A7") }, { wli("¨"), wli("\x00A8") }, { wli("©"), wli("\x00A9") }, { wli("ª"), wli("\x00AA") }, { wli("«"), wli("\x00AB") }, { wli("¬"), wli("\x00AC") }, { wli("­"), wli("\x00AD") }, { wli("®"), wli("\x00AE") }, { wli("¯"), wli("\x00AF") }, { wli("°"), wli("\x00B0") }, { wli("±"), wli("\x00B1") }, { wli("²"), wli("\x00B2") }, { wli("³"), wli("\x00B3") }, { wli("´"), wli("\x00B4") }, { wli("µ"), wli("\x00B5") }, { wli("¶"), wli("\x00B6") }, { wli("·"), wli("\x00B7") }, { wli("¸"), wli("\x00B8") }, { wli("¹"), wli("\x00B9") }, { wli("º"), wli("\x00BA") }, { wli("»"), wli("\x00BB") }, { wli("¼"), wli("\x00BC") }, { wli("½"), wli("\x00BD") }, { wli("¾"), wli("\x00BE") }, { wli("¿"), wli("\x00BF") }, { wli("À"), wli("\x00C0") }, { wli("Á"), wli("\x00C1") }, { wli("Â"), wli("\x00C2") }, { wli("Ã"), wli("\x00C3") }, { wli("Ä"), wli("\x00C4") }, { wli("Å"), wli("\x00C5") }, { wli("Æ"), wli("\x00C6") }, { wli("Ç"), wli("\x00C7") }, { wli("È"), wli("\x00C8") }, { wli("É"), wli("\x00C9") }, { wli("Ê"), wli("\x00CA") }, { wli("Ë"), wli("\x00CB") }, { wli("Ì"), wli("\x00CC") }, { wli("Í"), wli("\x00CD") }, { wli("Î"), wli("\x00CE") }, { wli("Ï"), wli("\x00CF") }, { wli("Ð"), wli("\x00D0") }, { wli("Ñ"), wli("\x00D1") }, { wli("Ò"), wli("\x00D2") }, { wli("Ó"), wli("\x00D3") }, { wli("Ô"), wli("\x00D4") }, { wli("Õ"), wli("\x00D5") }, { wli("Ö"), wli("\x00D6") }, { wli("×"), wli("\x00D7") }, { wli("Ø"), wli("\x00D8") }, { wli("Ù"), wli("\x00D9") }, { wli("Ú"), wli("\x00DA") }, { wli("Û"), wli("\x00DB") }, { wli("Ü"), wli("\x00DC") }, { wli("Ý"), wli("\x00DD") }, { wli("Þ"), wli("\x00DE") }, { wli("ß"), wli("\x00DF") }, { wli("à"), wli("\x00E0") }, { wli("á"), wli("\x00E1") }, { wli("â"), wli("\x00E2") }, { wli("ã"), wli("\x00E3") }, { wli("ä"), wli("\x00E4") }, { wli("å"), wli("\x00E5") }, { wli("æ"), wli("\x00E6") }, { wli("ç"), wli("\x00E7") }, { wli("è"), wli("\x00E8") }, { wli("é"), wli("\x00E9") }, { wli("ê"), wli("\x00EA") }, { wli("ë"), wli("\x00EB") }, { wli("ì"), wli("\x00EC") }, { wli("í"), wli("\x00ED") }, { wli("î"), wli("\x00EE") }, { wli("ï"), wli("\x00EF") }, { wli("ð"), wli("\x00F0") }, { wli("ñ"), wli("\x00F1") }, { wli("ò"), wli("\x00F2") }, { wli("ó"), wli("\x00F3") }, { wli("ô"), wli("\x00F4") }, { wli("õ"), wli("\x00F5") }, { wli("ö"), wli("\x00F6") }, { wli("÷"), wli("\x00F7") }, { wli("ø"), wli("\x00F8") }, { wli("ù"), wli("\x00F9") }, { wli("ú"), wli("\x00FA") }, { wli("û"), wli("\x00FB") }, { wli("ü"), wli("\x00FC") }, { wli("ý"), wli("\x00FD") }, { wli("þ"), wli("\x00FE") }, { wli("ÿ"), wli("\x00FF") }, { wli("Œ"), wli("\x0152") }, { wli("œ"), wli("\x0153") }, { wli("Š"), wli("\x0160") }, { wli("š"), wli("\x0161") }, { wli("Ÿ"), wli("\x0178") }, { wli("ƒ"), wli("\x0192") }, { wli("ˆ"), wli("\x02C6") }, { wli("˜"), wli("\x02DC") }, { wli("Α"), wli("\x0391") }, { wli("Β"), wli("\x0392") }, { wli("Γ"), wli("\x0393") }, { wli("Δ"), wli("\x0394") }, { wli("Ε"), wli("\x0395") }, { wli("Ζ"), wli("\x0396") }, { wli("Η"), wli("\x0397") }, { wli("Θ"), wli("\x0398") }, { wli("Ι"), wli("\x0399") }, { wli("Κ"), wli("\x039A") }, { wli("Λ"), wli("\x039B") }, { wli("Μ"), wli("\x039C") }, { wli("Ν"), wli("\x039D") }, { wli("Ξ"), wli("\x039E") }, { wli("Ο"), wli("\x039F") }, { wli("Π"), wli("\x03A0") }, { wli("Ρ"), wli("\x03A1") }, { wli("Σ"), wli("\x03A3") }, { wli("Τ"), wli("\x03A4") }, { wli("Υ"), wli("\x03A5") }, { wli("Φ"), wli("\x03A6") }, { wli("Χ"), wli("\x03A7") }, { wli("Ψ"), wli("\x03A8") }, { wli("Ω"), wli("\x03A9") }, { wli("α"), wli("\x03B1") }, { wli("β"), wli("\x03B2") }, { wli("γ"), wli("\x03B3") }, { wli("δ"), wli("\x03B4") }, { wli("ε"), wli("\x03B5") }, { wli("ζ"), wli("\x03B6") }, { wli("η"), wli("\x03B7") }, { wli("θ"), wli("\x03B8") }, { wli("ι"), wli("\x03B9") }, { wli("κ"), wli("\x03BA") }, { wli("λ"), wli("\x03BB") }, { wli("μ"), wli("\x03BC") }, { wli("ν"), wli("\x03BD") }, { wli("ξ"), wli("\x03BE") }, { wli("ο"), wli("\x03BF") }, { wli("π"), wli("\x03C0") }, { wli("ρ"), wli("\x03C1") }, { wli("ς"), wli("\x03C2") }, { wli("σ"), wli("\x03C3") }, { wli("τ"), wli("\x03C4") }, { wli("υ"), wli("\x03C5") }, { wli("φ"), wli("\x03C6") }, { wli("χ"), wli("\x03C7") }, { wli("ψ"), wli("\x03C8") }, { wli("ω"), wli("\x03C9") }, { wli("ϑ"), wli("\x03D1") }, { wli("ϒ"), wli("\x03D2") }, { wli("ϖ"), wli("\x03D6") }, { wli(" "), wli("\x2002") }, { wli(" "), wli("\x2003") }, { wli(" "), wli("\x2009") }, { wli("‌"), wli("\x200C") }, { wli("‍"), wli("\x200D") }, { wli("‎"), wli("\x200E") }, { wli("‏"), wli("\x200F") }, { wli("–"), wli("\x2013") }, { wli("—"), wli("\x2014") }, { wli("‘"), wli("\x2018") }, { wli("’"), wli("\x2019") }, { wli("‚"), wli("\x201A") }, { wli("“"), wli("\x201C") }, { wli("”"), wli("\x201D") }, { wli("„"), wli("\x201E") }, { wli("†"), wli("\x2020") }, { wli("‡"), wli("\x2021") }, { wli("•"), wli("\x2022") }, { wli("…"), wli("\x2026") }, { wli("‰"), wli("\x2030") }, { wli("′"), wli("\x2032") }, { wli("″"), wli("\x2033") }, { wli("‹"), wli("\x2039") }, { wli("›"), wli("\x203A") }, { wli("‾"), wli("\x203E") }, { wli("⁄"), wli("\x2044") }, { wli("€"), wli("\x20AC") }, { wli("ℑ"), wli("\x2111") }, { wli("℘"), wli("\x2118") }, { wli("ℜ"), wli("\x211C") }, { wli("™"), wli("\x2122") }, { wli("ℵ"), wli("\x2135") }, { wli("←"), wli("\x2190") }, { wli("↑"), wli("\x2191") }, { wli("→"), wli("\x2192") }, { wli("↓"), wli("\x2193") }, { wli("↔"), wli("\x2194") }, { wli("↵"), wli("\x21B5") }, { wli("⇐"), wli("\x21D0") }, { wli("⇑"), wli("\x21D1") }, { wli("⇒"), wli("\x21D2") }, { wli("⇓"), wli("\x21D3") }, { wli("⇔"), wli("\x21D4") }, { wli("∀"), wli("\x2200") }, { wli("∂"), wli("\x2202") }, { wli("∃"), wli("\x2203") }, { wli("∅"), wli("\x2205") }, { wli("∇"), wli("\x2207") }, { wli("∈"), wli("\x2208") }, { wli("∉"), wli("\x2209") }, { wli("∋"), wli("\x220B") }, { wli("∏"), wli("\x220F") }, { wli("∑"), wli("\x2211") }, { wli("−"), wli("\x2212") }, { wli("∗"), wli("\x2217") }, { wli("√"), wli("\x221A") }, { wli("∝"), wli("\x221D") }, { wli("∞"), wli("\x221E") }, { wli("∠"), wli("\x2220") }, { wli("∧"), wli("\x2227") }, { wli("∨"), wli("\x2228") }, { wli("∩"), wli("\x2229") }, { wli("∪"), wli("\x222A") }, { wli("∫"), wli("\x222B") }, { wli("∴"), wli("\x2234") }, { wli("∼"), wli("\x223C") }, { wli("≅"), wli("\x2245") }, { wli("≈"), wli("\x2248") }, { wli("≠"), wli("\x2260") }, { wli("≡"), wli("\x2261") }, { wli("≤"), wli("\x2264") }, { wli("≥"), wli("\x2265") }, { wli("⊂"), wli("\x2282") }, { wli("⊃"), wli("\x2283") }, { wli("⊄"), wli("\x2284") }, { wli("⊆"), wli("\x2286") }, { wli("⊇"), wli("\x2287") }, { wli("⊕"), wli("\x2295") }, { wli("⊗"), wli("\x2297") }, { wli("⊥"), wli("\x22A5") }, { wli("⋅"), wli("\x22C5") }, { wli("⌈"), wli("\x2308") }, { wli("⌉"), wli("\x2309") }, { wli("⌊"), wli("\x230A") }, { wli("⌋"), wli("\x230B") }, { wli("⟨"), wli("\x2329") }, { wli("⟩"), wli("\x232A") }, { wli("◊"), wli("\x25CA") }, { wli("♠"), wli("\x2660") }, { wli("♣"), wli("\x2663") }, { wli("♥"), wli("\x2665") }, { wli("♦"), wli("\x2666") }, { 0, 0 } }; static int digit_value(int digit) { if (digit >= '0' && digit <= '9') return digit - '0'; if (digit >= 'A' && digit <= 'F') return digit - 'A' + 10; if (digit >= 'a' && digit <= 'f') return digit - 'a' + 10; internal_error("bad digit"); } static val html_hex_continue(val hexlist, val ch) { if (iswxdigit(c_chr(ch))) { return func_f1(cons(ch, hexlist), html_hex_continue); } if (ch == chr(';')) { wchar_t out[2] = { 0 }; val iter; if (nilp(hexlist)) return nil; for (iter = nreverse(hexlist); iter; iter = cdr(iter)) { val hexch = car(iter); int val = digit_value(c_chr(hexch)); out[0] <<= 4; out[0] |= val; } return string(out); } else { return nil; } } static val html_dec_continue(val declist, val ch) { if (iswdigit(c_chr(ch))) { return func_f1(cons(ch, declist), html_dec_continue); } if (ch == chr(';')) { wchar_t out[2] = { 0 }; val iter; for (iter = nreverse(declist); iter; iter = cdr(iter)) { val decch = car(iter); int val = c_chr(decch) - '0'; out[0] *= 10; out[0] += val; } return string(out); } else { return nil; } } static val html_numeric_handler(val ch) { if (ch == chr('x')) return func_f1(nil, html_hex_continue); if (!iswdigit(c_chr(ch))) return nil; return func_f1(cons(ch, nil), html_dec_continue); } static int is_url_reserved(int ch) { return (ch <= 0x20 || ch >= 0x7F || strchr(":/?#[]@!$&'()*+,;=%", ch) != 0); } val url_encode(val str, val space_plus) { val self = lit("url-encode"); val in_byte = make_string_byte_input_stream(str); val out = make_string_output_stream(); val ch; while ((ch = get_byte(in_byte)) != nil) { int c = c_num(ch, self); if (space_plus && c == ' ') put_char(chr('+'), out); else if (is_url_reserved(c)) format(out, lit("%~1X~1X"), num_fast(c >> 4), num_fast(c & 0xf), nao); else put_char(chr_int(ch), out); } return get_string_from_stream(out); } val url_decode(val str, val space_plus) { val self = lit("url-encode"); val in = make_string_input_stream(str); val out = make_string_output_stream(); for (;;) { val ch = get_char(in); if (ch == chr('%')) { val ch2 = get_char(in); val ch3 = get_char(in); if (ch2 && ch3 && chr_isxdigit(ch2) && chr_isxdigit(ch3)) { int byte = digit_value(c_num(ch2, self)) << 4 | digit_value(c_num(ch3, self)); put_byte(num_fast(byte), out); } else { put_char(ch, out); if (!ch2) break; put_char(ch2, out); if (!ch3) break; put_char(ch3, out); } continue; } if (space_plus && ch == chr('+')) { put_char(chr(' '), out); continue; } if (!ch) break; put_char(ch, out); } return get_string_from_stream(out); } INLINE void col_check(cnum *pcol, cnum wcol, val out) { if (wcol && ++(*pcol) >= wcol) { *pcol = 0; put_char(chr('\n'), out); } } static val base64_stream_enc_impl(val out, val in, val nbytes, val wrap_cols, const char *b64) { val self = lit("base64-stream-enc"); int ulim = nilp(default_null_arg(nbytes)); cnum col = 0; cnum nb = if3(ulim, 0, c_num(nbytes, self)); cnum count = 0; val ret = zero; cnum wcol = c_num(default_arg(wrap_cols, zero), self); for (; ulim || nb > 0; ulim ? --nb : 0) { val bv0 = get_byte(in); val bv1 = if2(bv0 && (ulim || --nb > 0), get_byte(in)); val bv2 = if2(bv1 && (ulim || --nb > 0), get_byte(in)); if (bv2) { cnum b0 = c_num(bv0, self); cnum b1 = c_num(bv1, self); cnum b2 = c_num(bv2, self); cnum word = (b0 << 16) | (b1 << 8) | b2; put_char(chr(b64[(word >> 18) ]), out); col_check(&col, wcol, out); put_char(chr(b64[(word >> 12) & 0x3F]), out); col_check(&col, wcol, out); put_char(chr(b64[(word >> 6) & 0x3F]), out); col_check(&col, wcol, out); put_char(chr(b64[(word ) & 0x3F]), out); col_check(&col, wcol, out); count += 3; } else if (bv1) { cnum b0 = c_num(bv0, self); cnum b1 = c_num(bv1, self); cnum word = (b0 << 16) | (b1 << 8); put_char(chr(b64[(word >> 18) ]), out); col_check(&col, wcol, out); put_char(chr(b64[(word >> 12) & 0x3F]), out); col_check(&col, wcol, out); put_char(chr(b64[(word >> 6) & 0x3F]), out); col_check(&col, wcol, out); put_char(chr('='), out); col_check(&col, wcol, out); count += 2; break; } else if (bv0) { cnum b0 = c_num(bv0, self); cnum word = (b0 << 16); put_char(chr(b64[(word >> 18) ]), out); col_check(&col, wcol, out); put_char(chr(b64[(word >> 12) & 0x3F]), out); col_check(&col, wcol, out); put_char(chr('='), out); col_check(&col, wcol, out); put_char(chr('='), out); col_check(&col, wcol, out); count++; break; } else { break; } if (count > NUM_MAX / 2) { ret = plus(ret, num_fast(count)); count = 0; } } if (wcol && col > 0) put_char(chr('\n'), out); return plus(ret, num_fast(count)); } val base64_stream_enc(val out, val in, val nbytes, val wrap_cols) { static const char *b64 = { "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" }; return base64_stream_enc_impl(out, in, nbytes, wrap_cols, b64); } val base64_encode(val str, val wrap_cols) { val in = make_byte_input_stream(str); val out = make_string_output_stream(); (void) base64_stream_enc(out, in, nil, wrap_cols); return get_string_from_stream(out); } val base64url_stream_enc(val out, val in, val nbytes, val wrap_cols) { static const char *b64 = { "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" }; return base64_stream_enc_impl(out, in, nbytes, wrap_cols, b64); } val base64url_encode(val str, val wrap_cols) { val in = make_byte_input_stream(str); val out = make_string_output_stream(); (void) base64url_stream_enc(out, in, nil, wrap_cols); return get_string_from_stream(out); } INLINE cnum get_base64_char(val in) { for (;;) { val ch = get_char(in); if (!ch) return 0; if (chr_isalnum(ch) || ch == chr('+') || ch == chr('/')) return c_chr(ch); if (!chr_isspace(ch) && ch != chr('=')) { unget_char(ch, in); return 0; } } } INLINE int b64_code(cnum c) { if ('A' <= c && c <= 'Z') return c - 'A'; if ('a' <= c && c <= 'z') return c - 'a' + 26; if ('0' <= c && c <= '9') return c - '0' + 26 + 26; switch (c) { case '+': return 62; case '/': return 63; default: return 0; } } static val base64_stream_dec_impl(val out, val in, cnum (*get_char)(val), int (*get_code)(cnum)) { val ret = zero; cnum count = 0; for (;;) { cnum c0 = get_char(in); cnum c1 = c0 ? get_char(in) : 0; cnum c2 = c1 ? get_char(in) : 0; cnum c3 = c2 ? get_char(in) : 0; if (c3) { long f0 = get_code(c0); long f1 = get_code(c1); long f2 = get_code(c2); long f3 = get_code(c3); long word = (f0 << 18) | (f1 << 12) | (f2 << 6) | f3; put_byte(num_fast((word >> 16) ), out); put_byte(num_fast((word >> 8) & 0xff), out); put_byte(num_fast( word & 0xff), out); count += 3; } else if (c2) { long f0 = get_code(c0); long f1 = get_code(c1); long f2 = get_code(c2); long word = (f0 << 18) | (f1 << 12) | (f2 << 6); put_byte(num_fast((word >> 16) ), out); put_byte(num_fast((word >> 8) & 0xff), out); count += 2; break; } else if (c0 || c1) { long f0 = get_code(c0); long f1 = get_code(c1); long word = (f0 << 18) | (f1 << 12); put_byte(num_fast((word >> 16) ), out); count += 1; break; } else { break; } if (count > NUM_MAX / 2) { ret = plus(ret, num_fast(count)); count = 0; } } return plus(ret, num_fast(count)); } val base64_stream_dec(val out, val in) { return base64_stream_dec_impl(out, in, get_base64_char, b64_code); } val base64_decode(val str) { val in = make_string_input_stream(str); val out = make_string_output_stream(); (void) base64_stream_dec(out, in); return get_string_from_stream(out); } val base64_decode_buf(val str) { val in = make_string_input_stream(str); val out = make_buf_stream(nil); (void) base64_stream_dec(out, in); return get_buf_from_stream(out); } INLINE cnum get_base64url_char(val in) { for (;;) { val ch = get_char(in); if (!ch) return 0; if (chr_isalnum(ch) || ch == chr('-') || ch == chr('_')) return c_chr(ch); if (!chr_isspace(ch) && ch != chr('=')) { unget_char(ch, in); return 0; } } } INLINE int b64url_code(cnum c) { if ('A' <= c && c <= 'Z') return c - 'A'; if ('a' <= c && c <= 'z') return c - 'a' + 26; if ('0' <= c && c <= '9') return c - '0' + 26 + 26; switch (c) { case '-': return 62; case '_': return 63; default: return 0; } } val base64url_stream_dec(val out, val in) { return base64_stream_dec_impl(out, in, get_base64url_char, b64url_code); } val base64url_decode(val str) { val in = make_string_input_stream(str); val out = make_string_output_stream(); (void) base64url_stream_dec(out, in); return get_string_from_stream(out); } val base64url_decode_buf(val str) { val in = make_string_input_stream(str); val out = make_buf_stream(nil); (void) base64url_stream_dec(out, in); return get_buf_from_stream(out); } static val html_encode(val str) { return trie_filter_string(get_filter(tohtml_k), str); } static val html_encode_star(val str) { return trie_filter_string(get_filter(tohtml_star_k), str); } static val html_decode(val str) { return trie_filter_string(get_filter(fromhtml_k), str); } void filter_init(void) { val fh = make_hash(hash_weak_none, nil); filters_s = intern(lit("*filters*"), user_package); filter_k = intern(lit("filter"), keyword_package); lfilt_k = intern(lit("lfilt"), keyword_package); rfilt_k = intern(lit("rfilt"), keyword_package); tohtml_k = intern(lit("tohtml"), keyword_package); tohtml_star_k = intern(lit("tohtml*"), keyword_package); fromhtml_k = intern(lit("fromhtml"), keyword_package); upcase_k = intern(lit("upcase"), keyword_package); downcase_k = intern(lit("downcase"), keyword_package); topercent_k = intern(lit("topercent"), keyword_package); frompercent_k = intern(lit("frompercent"), keyword_package); tourl_k = intern(lit("tourl"), keyword_package); fromurl_k = intern(lit("fromurl"), keyword_package); tonumber_k = intern(lit("tonumber"), keyword_package); tobase64_k = intern(lit("tobase64"), keyword_package); frombase64_k = intern(lit("frombase64"), keyword_package); tobase64url_k = intern(lit("tobase64url"), keyword_package); frombase64url_k = intern(lit("frombase64url"), keyword_package); toint_k = intern(lit("toint"), keyword_package); tofloat_k = intern(lit("tofloat"), keyword_package); hextoint_k = intern(lit("hextoint"), keyword_package); reg_var(filters_s, fh); sethash(fh, tohtml_k, build_filter(tohtml_table, t)); sethash(fh, tohtml_star_k, build_filter(tohtml_star_table, t)); { val trie = build_filter(fromhtml_table, nil); trie_add(trie, lit("&#"), func_n1(html_numeric_handler)); trie_compress(mkcloc(trie)); sethash(fh, fromhtml_k, trie); } sethash(fh, intern(lit("to_html"), keyword_package), get_filter(tohtml_k)); sethash(fh, intern(lit("from_html"), keyword_package), get_filter(fromhtml_k)); sethash(fh, upcase_k, func_n1(upcase_str)); sethash(fh, downcase_k, func_n1(downcase_str)); sethash(fh, topercent_k, pa_12_1(func_n2(url_encode), nil)); sethash(fh, frompercent_k, pa_12_1(func_n2(url_decode), nil)); sethash(fh, tourl_k, pa_12_1(func_n2(url_encode), t)); sethash(fh, fromurl_k, pa_12_1(func_n2(url_decode), t)); sethash(fh, tobase64_k, pa_12_1(func_n2(base64_encode), 0)); sethash(fh, frombase64_k, func_n1(base64_decode)); sethash(fh, tobase64url_k, pa_12_1(func_n2(base64url_encode), 0)); sethash(fh, frombase64url_k, func_n1(base64url_decode)); sethash(fh, tonumber_k, func_n1(num_str)); sethash(fh, toint_k, pa_12_1(func_n2(int_str), nil)); sethash(fh, tofloat_k, func_n1(flo_str)); sethash(fh, hextoint_k, pa_12_1(func_n2(int_str), num_fast(16))); reg_fun(intern(lit("make-trie"), user_package), func_n0(make_trie)); reg_fun(intern(lit("trie-add"), user_package), func_n3(trie_add)); reg_fun(intern(lit("trie-compress"), user_package), func_n1(trie_compress_intrinsic)); reg_fun(intern(lit("regex-from-trie"), user_package), func_n1(regex_from_trie)); reg_fun(intern(lit("trie-lookup-begin"), user_package), func_n1(trie_lookup_begin)); reg_fun(intern(lit("trie-value-at"), user_package), func_n1(trie_value_at)); reg_fun(intern(lit("trie-lookup-feed-char"), user_package), func_n2(trie_lookup_feed_char)); reg_fun(intern(lit("filter-string-tree"), user_package), func_n2(filter_string_tree)); reg_fun(intern(lit("filter-equal"), user_package), func_n4(filter_equal)); reg_fun(intern(lit("url-encode"), user_package), func_n2o(url_encode, 1)); reg_fun(intern(lit("url-decode"), user_package), func_n2o(url_decode, 1)); reg_fun(intern(lit("base64-stream-enc"), user_package), func_n4o(base64_stream_enc, 2)); reg_fun(intern(lit("base64-stream-dec"), user_package), func_n2(base64_stream_dec)); reg_fun(intern(lit("base64-encode"), user_package), func_n2o(base64_encode, 1)); reg_fun(intern(lit("base64-decode"), user_package), func_n1(base64_decode)); reg_fun(intern(lit("base64-decode-buf"), user_package), func_n1(base64_decode_buf)); reg_fun(intern(lit("base64url-stream-enc"), user_package), func_n4o(base64url_stream_enc, 2)); reg_fun(intern(lit("base64url-stream-dec"), user_package), func_n2(base64url_stream_dec)); reg_fun(intern(lit("base64url-encode"), user_package), func_n2o(base64url_encode, 1)); reg_fun(intern(lit("base64url-decode"), user_package), func_n1(base64url_decode)); reg_fun(intern(lit("base64url-decode-buf"), user_package), func_n1(base64url_decode_buf)); reg_fun(intern(lit("html-encode"), user_package), func_n1(html_encode)); reg_fun(intern(lit("html-encode*"), user_package), func_n1(html_encode_star)); reg_fun(intern(lit("html-decode"), user_package), func_n1(html_decode)); }