aboutsummaryrefslogtreecommitdiffstats
path: root/awklib/eg
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2020-10-01 16:48:51 +0300
committerArnold D. Robbins <arnold@skeeve.com>2020-10-01 16:48:51 +0300
commit6a34364cde8eec7df0dd9f1de005babea18e45ec (patch)
tree340c1306a41a618af283b6fee0458179b099bf6b /awklib/eg
parent2ab1c82b4097cff8763d1ed63be6478edf55eb54 (diff)
downloadegawk-6a34364cde8eec7df0dd9f1de005babea18e45ec.tar.gz
egawk-6a34364cde8eec7df0dd9f1de005babea18e45ec.tar.bz2
egawk-6a34364cde8eec7df0dd9f1de005babea18e45ec.zip
Rewrite split program.
Diffstat (limited to 'awklib/eg')
-rw-r--r--awklib/eg/prog/split.awk164
1 files changed, 125 insertions, 39 deletions
diff --git a/awklib/eg/prog/split.awk b/awklib/eg/prog/split.awk
index 9239a6c5..6e0ac16b 100644
--- a/awklib/eg/prog/split.awk
+++ b/awklib/eg/prog/split.awk
@@ -1,56 +1,142 @@
# split.awk --- do split in awk
#
-# Requires ord() and chr() library functions
+# Requires getopt() library function.
#
# Arnold Robbins, arnold@skeeve.com, Public Domain
# May 1993
# Revised slightly, May 2014
+# Rewritten September 2020
-# usage: split [-count] [file] [outname]
-
+function usage()
+{
+ print("usage: split [-l count] [-a suffix-len] [file [outname]]") > "/dev/stderr"
+ print(" split [-b N[k|m]] [-a suffix-len] [file [outname]]") > "/dev/stderr"
+ exit 1
+}
BEGIN {
- outfile = "x" # default
- count = 1000
- if (ARGC > 4)
- usage()
-
- i = 1
- if (i in ARGV && ARGV[i] ~ /^-[[:digit:]]+$/) {
- count = -ARGV[i]
- ARGV[i] = ""
- i++
+ # Set defaults:
+ Suffix_length = 2
+ Line_count = 1000
+ Byte_count = 0
+ Outfile = "x"
+
+ parse_arguments()
+
+ init_suffix_data()
+
+ Output = (Outfile compute_suffix())
+}
+function parse_arguments( i, c, l, modifier)
+{
+ while ((c = getopt(ARGC, ARGV, "a:b:l:")) != -1) {
+ if (c == "a")
+ Suffix_length = Optarg + 0
+ else if (c == "b") {
+ Byte_count = Optarg + 0
+ Line_count = 0
+
+ l = length(Optarg)
+ modifier = substr(Optarg, l, 1)
+ if (modifier == "k")
+ Byte_count *= 1024
+ else if (modifier == "m")
+ Byte_count *= 1024 * 1024
+ } else if (c == "l") {
+ Line_count = Optarg + 0
+ Byte_count = 0
+ } else
+ usage()
}
- # test argv in case reading from stdin instead of file
- if (i in ARGV)
- i++ # skip datafile name
- if (i in ARGV) {
- outfile = ARGV[i]
+
+ # Clear out options
+ for (i = 1; i < Optind; i++)
ARGV[i] = ""
+
+ # Check for filename
+ if (ARGV[Optind]) {
+ Optind++
+
+ # Check for different prefix
+ if (ARGV[Optind]) {
+ Outfile = ARGV[Optind]
+ ARGV[Optind] = ""
+
+ if (++Optind < ARGC)
+ usage()
+ }
}
- s1 = s2 = "a"
- out = (outfile s1 s2)
}
+function compute_suffix( i, result, letters)
{
- if (++tcount > count) {
- close(out)
- if (s2 == "z") {
- if (s1 == "z") {
- printf("split: %s is too large to split\n",
- FILENAME) > "/dev/stderr"
- exit 1
- }
- s1 = chr(ord(s1) + 1)
- s2 = "a"
- }
- else
- s2 = chr(ord(s2) + 1)
- out = (outfile s1 s2)
- tcount = 1
+ # Logical step 3
+ if (Reached_last) {
+ printf("split: too many files!\n") > "/dev/stderr"
+ exit 1
+ } else if (on_last_file())
+ Reached_last = 1 # fail when wrapping after 'zzz'
+
+ # Logical step 1
+ result = ""
+ letters = "abcdefghijklmnopqrstuvwxyz"
+ for (i = 1; i <= Suffix_length; i++)
+ result = result substr(letters, Suffix_ind[i], 1)
+
+ # Logical step 2
+ for (i = Suffix_length; i >= 1; i--) {
+ if (++Suffix_ind[i] > 26) {
+ Suffix_ind[i] = 1
+ } else
+ break
}
- print > out
+
+ return result
}
-function usage()
+function init_suffix_data( i)
{
- print("usage: split [-num] [file] [outname]") > "/dev/stderr"
- exit 1
+ for (i = 1; i <= Suffix_length; i++)
+ Suffix_ind[i] = 1
+
+ Reached_last = 0
+}
+function on_last_file( i, on_last)
+{
+ on_last = 1
+ for (i = 1; i <= Suffix_length; i++) {
+ on_last = on_last && (Suffix_ind[i] == 26)
+ }
+
+ return on_last
+}
+Line_count > 0 {
+ if (++tcount > Line_count) {
+ close(Output)
+ Output = (Outfile compute_suffix())
+ tcount = 1
+ }
+ print > Output
+}
+Byte_count > 0 {
+ # `+ 1' is for the final newline
+ if (tcount + length($0) + 1 > Byte_count) { # would overflow
+ # compute leading bytes
+ leading_bytes = Byte_count - tcount
+
+ # write leading bytes
+ printf("%s", substr($0, 1, leading_bytes)) > Output
+
+ # close old file, open new file
+ close(Output)
+ Output = (Outfile compute_suffix())
+
+ # set up first bytes for new file
+ $0 = substr($0, leading_bytes + 1) # trailing bytes
+ tcount = 0
+ }
+
+ # write full record or trailing bytes
+ tcount += length($0) + 1
+ print > Output
+}
+END {
+ close(Output)
}