diff options
author | Arnold D. Robbins <arnold@skeeve.com> | 2010-07-16 13:09:56 +0300 |
---|---|---|
committer | Arnold D. Robbins <arnold@skeeve.com> | 2010-07-16 13:09:56 +0300 |
commit | bc70de7b3302d5a81515b901cae376b8b51d2004 (patch) | |
tree | d36d6743e65697f6923b79d0ea8f9f9bf4ef7398 /awklib/eg/prog/wordfreq.awk | |
parent | b9e4a1fd4c8c8753ab8a9887bab55f03efe1e3e2 (diff) | |
download | egawk-bc70de7b3302d5a81515b901cae376b8b51d2004.tar.gz egawk-bc70de7b3302d5a81515b901cae376b8b51d2004.tar.bz2 egawk-bc70de7b3302d5a81515b901cae376b8b51d2004.zip |
Move to gawk-3.1.0.
Diffstat (limited to 'awklib/eg/prog/wordfreq.awk')
-rw-r--r-- | awklib/eg/prog/wordfreq.awk | 11 |
1 files changed, 9 insertions, 2 deletions
diff --git a/awklib/eg/prog/wordfreq.awk b/awklib/eg/prog/wordfreq.awk index b67fed47..62db5cfa 100644 --- a/awklib/eg/prog/wordfreq.awk +++ b/awklib/eg/prog/wordfreq.awk @@ -1,10 +1,17 @@ -# Print list of word frequencies +# wordfreq.awk --- print list of word frequencies + { $0 = tolower($0) # remove case distinctions - gsub(/[^a-z0-9_ \t]/, "", $0) # remove punctuation + # remove punctuation + gsub(/[^[:alnum:]_[:blank:]]/, "", $0) for (i = 1; i <= NF; i++) freq[$i]++ } + +END { + for (word in freq) + printf "%s\t%d\n", word, freq[word] +} END { sort = "sort +1 -nr" for (word in freq) |