aboutsummaryrefslogtreecommitdiffstats
path: root/helpers
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2021-10-09 22:18:38 +0300
committerArnold D. Robbins <arnold@skeeve.com>2021-10-09 22:18:38 +0300
commit3c374e502995634d4dae8967aa8393a74b748f83 (patch)
tree960d2df61f57fd1dca67e80193420bdef1bbfc4a /helpers
parentb09f86a66cc6cae015a1343c189c84031007de1c (diff)
parent6db264b1f346896f18fa705023594e5d4d789f04 (diff)
downloadegawk-3c374e502995634d4dae8967aa8393a74b748f83.tar.gz
egawk-3c374e502995634d4dae8967aa8393a74b748f83.tar.bz2
egawk-3c374e502995634d4dae8967aa8393a74b748f83.zip
Merge branch 'gawk-5.1-stable'
Diffstat (limited to 'helpers')
-rw-r--r--helpers/ChangeLog4
-rwxr-xr-xhelpers/find-utf.sh53
2 files changed, 57 insertions, 0 deletions
diff --git a/helpers/ChangeLog b/helpers/ChangeLog
index d8e12b46..6b59f26c 100644
--- a/helpers/ChangeLog
+++ b/helpers/ChangeLog
@@ -1,3 +1,7 @@
+2021-10-09 Arnold D. Robbins <arnold@skeeve.com>
+
+ * find-utf.sh: New script.
+
2021-07-15 Arnold D. Robbins <arnold@skeeve.com>
* testdfa.c (main): Bug fix and new -b option.
diff --git a/helpers/find-utf.sh b/helpers/find-utf.sh
new file mode 100755
index 00000000..b608a6fa
--- /dev/null
+++ b/helpers/find-utf.sh
@@ -0,0 +1,53 @@
+#! /bin/sh
+export LC_ALL=C
+gawk '
+# ord.awk --- do ord and chr
+
+# Global identifiers:
+# _ord_: numerical values indexed by characters
+# _ord_init: function to initialize _ord_
+#
+# Arnold Robbins, arnold@skeeve.com, Public Domain
+# 16 January, 1992
+# 20 July, 1992, revised
+
+BEGIN { _ord_init() }
+
+function _ord_init( low, high, i, t)
+{
+ low = sprintf("%c", 7) # BEL is ascii 7
+ if (low == "\a") { # regular ascii
+ low = 0
+ high = 255
+ } else if (sprintf("%c", 128 + 7) == "\a") {
+ # ascii, mark parity
+ low = 128
+ high = 255
+ } else { # ebcdic(!)
+ low = 0
+ high = 255
+ }
+
+ for (i = low; i <= high; i++) {
+ t = sprintf("%c", i)
+ _ord_[t] = i
+ }
+}
+function ord(str, c)
+{
+ # only first character is of interest
+ c = substr(str, 1, 1)
+ return _ord_[c]
+}
+
+function chr(c)
+{
+ # force c to be numeric by adding 0
+ return sprintf("%c", c + 0)
+}
+{
+ n = split($0, c, "")
+ for (i = 1; i <= n; i++)
+ if (ord(c[i]) > 127)
+ printf("%d: char: \\%o\n", NR, ord(c[i]))
+}' "$@"