aboutsummaryrefslogtreecommitdiffstats
path: root/awklib/eg/prog/wordfreq.awk
diff options
context:
space:
mode:
Diffstat (limited to 'awklib/eg/prog/wordfreq.awk')
-rw-r--r--awklib/eg/prog/wordfreq.awk11
1 files changed, 9 insertions, 2 deletions
diff --git a/awklib/eg/prog/wordfreq.awk b/awklib/eg/prog/wordfreq.awk
index b67fed47..62db5cfa 100644
--- a/awklib/eg/prog/wordfreq.awk
+++ b/awklib/eg/prog/wordfreq.awk
@@ -1,10 +1,17 @@
-# Print list of word frequencies
+# wordfreq.awk --- print list of word frequencies
+
{
$0 = tolower($0) # remove case distinctions
- gsub(/[^a-z0-9_ \t]/, "", $0) # remove punctuation
+ # remove punctuation
+ gsub(/[^[:alnum:]_[:blank:]]/, "", $0)
for (i = 1; i <= NF; i++)
freq[$i]++
}
+
+END {
+ for (word in freq)
+ printf "%s\t%d\n", word, freq[word]
+}
END {
sort = "sort +1 -nr"
for (word in freq)