diff options
author | Arnold D. Robbins <arnold@skeeve.com> | 2010-07-16 12:41:09 +0300 |
---|---|---|
committer | Arnold D. Robbins <arnold@skeeve.com> | 2010-07-16 12:41:09 +0300 |
commit | 8c042f99cc7465c86351d21331a129111b75345d (patch) | |
tree | 9656e653be0e42e5469cec77635c20356de152c2 /awklib/eg/prog/uniq.awk | |
parent | 8ceb5f934787eb7be5fb452fb39179df66119954 (diff) | |
download | egawk-8c042f99cc7465c86351d21331a129111b75345d.tar.gz egawk-8c042f99cc7465c86351d21331a129111b75345d.tar.bz2 egawk-8c042f99cc7465c86351d21331a129111b75345d.zip |
Move to gawk-3.0.0.
Diffstat (limited to 'awklib/eg/prog/uniq.awk')
-rw-r--r-- | awklib/eg/prog/uniq.awk | 116 |
1 files changed, 116 insertions, 0 deletions
diff --git a/awklib/eg/prog/uniq.awk b/awklib/eg/prog/uniq.awk new file mode 100644 index 00000000..5f63ef0f --- /dev/null +++ b/awklib/eg/prog/uniq.awk @@ -0,0 +1,116 @@ +# uniq.awk --- do uniq in awk +# Arnold Robbins, arnold@gnu.ai.mit.edu, Public Domain +# May 1993 + +function usage( e) +{ + e = "Usage: uniq [-udc [-n]] [+n] [ in [ out ]]" + print e > "/dev/stderr" + exit 1 +} + +# -c count lines. overrides -d and -u +# -d only repeated lines +# -u only non-repeated lines +# -n skip n fields +# +n skip n characters, skip fields first + +BEGIN \ +{ + count = 1 + outputfile = "/dev/stdout" + opts = "udc0:1:2:3:4:5:6:7:8:9:" + while ((c = getopt(ARGC, ARGV, opts)) != -1) { + if (c == "u") + non_repeated_only++ + else if (c == "d") + repeated_only++ + else if (c == "c") + do_count++ + else if (index("0123456789", c) != 0) { + # getopt requires args to options + # this messes us up for things like -5 + if (Optarg ~ /^[0-9]+$/) + fcount = (c Optarg) + 0 + else { + fcount = c + 0 + Optind-- + } + } else + usage() + } + + if (ARGV[Optind] ~ /^\+[0-9]+$/) { + charcount = substr(ARGV[Optind], 2) + 0 + Optind++ + } + + for (i = 1; i < Optind; i++) + ARGV[i] = "" + + if (repeated_only == 0 && non_repeated_only == 0) + repeated_only = non_repeated_only = 1 + + if (ARGC - Optind == 2) { + outputfile = ARGV[ARGC - 1] + ARGV[ARGC - 1] = "" + } +} +function are_equal( n, m, clast, cline, alast, aline) +{ + if (fcount == 0 && charcount == 0) + return (last == $0) + + if (fcount > 0) { + n = split(last, alast) + m = split($0, aline) + clast = join(alast, fcount+1, n) + cline = join(aline, fcount+1, m) + } else { + clast = last + cline = $0 + } + if (charcount) { + clast = substr(clast, charcount + 1) + cline = substr(cline, charcount + 1) + } + + return (clast == cline) +} +NR == 1 { + last = $0 + next +} + +{ + equal = are_equal() + + if (do_count) { # overrides -d and -u + if (equal) + count++ + else { + printf("%4d %s\n", count, last) > outputfile + last = $0 + count = 1 # reset + } + next + } + + if (equal) + count++ + else { + if ((repeated_only && count > 1) || + (non_repeated_only && count == 1)) + print last > outputfile + last = $0 + count = 1 + } +} + +END { + if (do_count) + printf("%4d %s\n", count, last) > outputfile + else if ((repeated_only && count > 1) || + (non_repeated_only && count == 1)) + print last > outputfile +} |