aboutsummaryrefslogtreecommitdiffstats
path: root/awklib/eg/prog/wordfreq.awk
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2010-07-16 13:09:56 +0300
committerArnold D. Robbins <arnold@skeeve.com>2010-07-16 13:09:56 +0300
commitbc70de7b3302d5a81515b901cae376b8b51d2004 (patch)
treed36d6743e65697f6923b79d0ea8f9f9bf4ef7398 /awklib/eg/prog/wordfreq.awk
parentb9e4a1fd4c8c8753ab8a9887bab55f03efe1e3e2 (diff)
downloadegawk-bc70de7b3302d5a81515b901cae376b8b51d2004.tar.gz
egawk-bc70de7b3302d5a81515b901cae376b8b51d2004.tar.bz2
egawk-bc70de7b3302d5a81515b901cae376b8b51d2004.zip
Move to gawk-3.1.0.
Diffstat (limited to 'awklib/eg/prog/wordfreq.awk')
-rw-r--r--awklib/eg/prog/wordfreq.awk11
1 files changed, 9 insertions, 2 deletions
diff --git a/awklib/eg/prog/wordfreq.awk b/awklib/eg/prog/wordfreq.awk
index b67fed47..62db5cfa 100644
--- a/awklib/eg/prog/wordfreq.awk
+++ b/awklib/eg/prog/wordfreq.awk
@@ -1,10 +1,17 @@
-# Print list of word frequencies
+# wordfreq.awk --- print list of word frequencies
+
{
$0 = tolower($0) # remove case distinctions
- gsub(/[^a-z0-9_ \t]/, "", $0) # remove punctuation
+ # remove punctuation
+ gsub(/[^[:alnum:]_[:blank:]]/, "", $0)
for (i = 1; i <= NF; i++)
freq[$i]++
}
+
+END {
+ for (word in freq)
+ printf "%s\t%d\n", word, freq[word]
+}
END {
sort = "sort +1 -nr"
for (word in freq)