diff options
Diffstat (limited to 'awklib/eg/prog/wordfreq.awk')
-rw-r--r-- | awklib/eg/prog/wordfreq.awk | 11 |
1 files changed, 9 insertions, 2 deletions
diff --git a/awklib/eg/prog/wordfreq.awk b/awklib/eg/prog/wordfreq.awk index b67fed47..62db5cfa 100644 --- a/awklib/eg/prog/wordfreq.awk +++ b/awklib/eg/prog/wordfreq.awk @@ -1,10 +1,17 @@ -# Print list of word frequencies +# wordfreq.awk --- print list of word frequencies + { $0 = tolower($0) # remove case distinctions - gsub(/[^a-z0-9_ \t]/, "", $0) # remove punctuation + # remove punctuation + gsub(/[^[:alnum:]_[:blank:]]/, "", $0) for (i = 1; i <= NF; i++) freq[$i]++ } + +END { + for (word in freq) + printf "%s\t%d\n", word, freq[word] +} END { sort = "sort +1 -nr" for (word in freq) |