diff options
Diffstat (limited to 'awklib')
-rw-r--r-- | awklib/eg/prog/split.awk | 164 |
1 files changed, 125 insertions, 39 deletions
diff --git a/awklib/eg/prog/split.awk b/awklib/eg/prog/split.awk index 9239a6c5..6e0ac16b 100644 --- a/awklib/eg/prog/split.awk +++ b/awklib/eg/prog/split.awk @@ -1,56 +1,142 @@ # split.awk --- do split in awk # -# Requires ord() and chr() library functions +# Requires getopt() library function. # # Arnold Robbins, arnold@skeeve.com, Public Domain # May 1993 # Revised slightly, May 2014 +# Rewritten September 2020 -# usage: split [-count] [file] [outname] - +function usage() +{ + print("usage: split [-l count] [-a suffix-len] [file [outname]]") > "/dev/stderr" + print(" split [-b N[k|m]] [-a suffix-len] [file [outname]]") > "/dev/stderr" + exit 1 +} BEGIN { - outfile = "x" # default - count = 1000 - if (ARGC > 4) - usage() - - i = 1 - if (i in ARGV && ARGV[i] ~ /^-[[:digit:]]+$/) { - count = -ARGV[i] - ARGV[i] = "" - i++ + # Set defaults: + Suffix_length = 2 + Line_count = 1000 + Byte_count = 0 + Outfile = "x" + + parse_arguments() + + init_suffix_data() + + Output = (Outfile compute_suffix()) +} +function parse_arguments( i, c, l, modifier) +{ + while ((c = getopt(ARGC, ARGV, "a:b:l:")) != -1) { + if (c == "a") + Suffix_length = Optarg + 0 + else if (c == "b") { + Byte_count = Optarg + 0 + Line_count = 0 + + l = length(Optarg) + modifier = substr(Optarg, l, 1) + if (modifier == "k") + Byte_count *= 1024 + else if (modifier == "m") + Byte_count *= 1024 * 1024 + } else if (c == "l") { + Line_count = Optarg + 0 + Byte_count = 0 + } else + usage() } - # test argv in case reading from stdin instead of file - if (i in ARGV) - i++ # skip datafile name - if (i in ARGV) { - outfile = ARGV[i] + + # Clear out options + for (i = 1; i < Optind; i++) ARGV[i] = "" + + # Check for filename + if (ARGV[Optind]) { + Optind++ + + # Check for different prefix + if (ARGV[Optind]) { + Outfile = ARGV[Optind] + ARGV[Optind] = "" + + if (++Optind < ARGC) + usage() + } } - s1 = s2 = "a" - out = (outfile s1 s2) } +function compute_suffix( i, result, letters) { - if (++tcount > count) { - close(out) - if (s2 == "z") { - if (s1 == "z") { - printf("split: %s is too large to split\n", - FILENAME) > "/dev/stderr" - exit 1 - } - s1 = chr(ord(s1) + 1) - s2 = "a" - } - else - s2 = chr(ord(s2) + 1) - out = (outfile s1 s2) - tcount = 1 + # Logical step 3 + if (Reached_last) { + printf("split: too many files!\n") > "/dev/stderr" + exit 1 + } else if (on_last_file()) + Reached_last = 1 # fail when wrapping after 'zzz' + + # Logical step 1 + result = "" + letters = "abcdefghijklmnopqrstuvwxyz" + for (i = 1; i <= Suffix_length; i++) + result = result substr(letters, Suffix_ind[i], 1) + + # Logical step 2 + for (i = Suffix_length; i >= 1; i--) { + if (++Suffix_ind[i] > 26) { + Suffix_ind[i] = 1 + } else + break } - print > out + + return result } -function usage() +function init_suffix_data( i) { - print("usage: split [-num] [file] [outname]") > "/dev/stderr" - exit 1 + for (i = 1; i <= Suffix_length; i++) + Suffix_ind[i] = 1 + + Reached_last = 0 +} +function on_last_file( i, on_last) +{ + on_last = 1 + for (i = 1; i <= Suffix_length; i++) { + on_last = on_last && (Suffix_ind[i] == 26) + } + + return on_last +} +Line_count > 0 { + if (++tcount > Line_count) { + close(Output) + Output = (Outfile compute_suffix()) + tcount = 1 + } + print > Output +} +Byte_count > 0 { + # `+ 1' is for the final newline + if (tcount + length($0) + 1 > Byte_count) { # would overflow + # compute leading bytes + leading_bytes = Byte_count - tcount + + # write leading bytes + printf("%s", substr($0, 1, leading_bytes)) > Output + + # close old file, open new file + close(Output) + Output = (Outfile compute_suffix()) + + # set up first bytes for new file + $0 = substr($0, leading_bytes + 1) # trailing bytes + tcount = 0 + } + + # write full record or trailing bytes + tcount += length($0) + 1 + print > Output +} +END { + close(Output) } |