From bc70de7b3302d5a81515b901cae376b8b51d2004 Mon Sep 17 00:00:00 2001 From: "Arnold D. Robbins" Date: Fri, 16 Jul 2010 13:09:56 +0300 Subject: Move to gawk-3.1.0. --- awklib/eg/prog/wordfreq.awk | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'awklib/eg/prog/wordfreq.awk') diff --git a/awklib/eg/prog/wordfreq.awk b/awklib/eg/prog/wordfreq.awk index b67fed47..62db5cfa 100644 --- a/awklib/eg/prog/wordfreq.awk +++ b/awklib/eg/prog/wordfreq.awk @@ -1,10 +1,17 @@ -# Print list of word frequencies +# wordfreq.awk --- print list of word frequencies + { $0 = tolower($0) # remove case distinctions - gsub(/[^a-z0-9_ \t]/, "", $0) # remove punctuation + # remove punctuation + gsub(/[^[:alnum:]_[:blank:]]/, "", $0) for (i = 1; i <= NF; i++) freq[$i]++ } + +END { + for (word in freq) + printf "%s\t%d\n", word, freq[word] +} END { sort = "sort +1 -nr" for (word in freq) -- cgit v1.2.3