diff options
author | Arnold D. Robbins <arnold@skeeve.com> | 2021-10-09 22:16:58 +0300 |
---|---|---|
committer | Arnold D. Robbins <arnold@skeeve.com> | 2021-10-09 22:16:58 +0300 |
commit | 6db264b1f346896f18fa705023594e5d4d789f04 (patch) | |
tree | 86ebaeb00e5f86d53a00e7ae43873b1f7b877123 | |
parent | 589d91eeb99bf2c7dcd6a4310f5964739b9c8edb (diff) | |
download | egawk-6db264b1f346896f18fa705023594e5d4d789f04.tar.gz egawk-6db264b1f346896f18fa705023594e5d4d789f04.tar.bz2 egawk-6db264b1f346896f18fa705023594e5d4d789f04.zip |
New script in helpers to find UTF in doc files.
-rw-r--r-- | helpers/ChangeLog | 4 | ||||
-rwxr-xr-x | helpers/find-utf.sh | 53 |
2 files changed, 57 insertions, 0 deletions
diff --git a/helpers/ChangeLog b/helpers/ChangeLog index d8e12b46..6b59f26c 100644 --- a/helpers/ChangeLog +++ b/helpers/ChangeLog @@ -1,3 +1,7 @@ +2021-10-09 Arnold D. Robbins <arnold@skeeve.com> + + * find-utf.sh: New script. + 2021-07-15 Arnold D. Robbins <arnold@skeeve.com> * testdfa.c (main): Bug fix and new -b option. diff --git a/helpers/find-utf.sh b/helpers/find-utf.sh new file mode 100755 index 00000000..b608a6fa --- /dev/null +++ b/helpers/find-utf.sh @@ -0,0 +1,53 @@ +#! /bin/sh +export LC_ALL=C +gawk ' +# ord.awk --- do ord and chr + +# Global identifiers: +# _ord_: numerical values indexed by characters +# _ord_init: function to initialize _ord_ +# +# Arnold Robbins, arnold@skeeve.com, Public Domain +# 16 January, 1992 +# 20 July, 1992, revised + +BEGIN { _ord_init() } + +function _ord_init( low, high, i, t) +{ + low = sprintf("%c", 7) # BEL is ascii 7 + if (low == "\a") { # regular ascii + low = 0 + high = 255 + } else if (sprintf("%c", 128 + 7) == "\a") { + # ascii, mark parity + low = 128 + high = 255 + } else { # ebcdic(!) + low = 0 + high = 255 + } + + for (i = low; i <= high; i++) { + t = sprintf("%c", i) + _ord_[t] = i + } +} +function ord(str, c) +{ + # only first character is of interest + c = substr(str, 1, 1) + return _ord_[c] +} + +function chr(c) +{ + # force c to be numeric by adding 0 + return sprintf("%c", c + 0) +} +{ + n = split($0, c, "") + for (i = 1; i <= n; i++) + if (ord(c[i]) > 127) + printf("%d: char: \\%o\n", NR, ord(c[i])) +}' "$@" |