Arm: Change CRC from fpu feature to archititectural extension
[deliverable/binutils-gdb.git] / gdb / contrib / words.sh
CommitLineData
496af5c8
TV
1#!/bin/sh
2
3# Copyright (C) 2019 Free Software Foundation, Inc.
4# This program is free software; you can redistribute it and/or modify
5# it under the terms of the GNU General Public License as published by
6# the Free Software Foundation; either version 3 of the License, or
7# (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU General Public License for more details.
13#
14# You should have received a copy of the GNU General Public License
15# along with this program. If not, see <http://www.gnu.org/licenses/>.
16
17# This script intends to facilitate spell checking of comments in C sources.
18# It:
19# - extracts comments from C files
20# - transforms the comments into a list of lowercase words
21# - prefixes each word with the frequency
22# - filters out words within a frequency range
23# - sorts the words, longest first
24#
25# For:
26# ...
27# $ ./gdb/contrib/words.sh $(find gdb -type f -name "*.c" -o -name "*.h")
28# ...
29# it generates a list of ~15000 words prefixed with frequency.
30#
31# This could be used to generate a dictionary that is kept as part of the
32# sources, against which new code can be checked, generating a warning or
33# error. The hope is that misspellings would trigger this frequently, and rare
34# words rarely, otherwise the burden of updating the dictionary would be too
35# much.
36#
37# And for:
38# ...
39# $ ./gdb/contrib/words.sh -f 1 $(find gdb -type f -name "*.c" -o -name "*.h")
40# ...
41# it generates a list of ~5000 words with frequency 1.
42#
43# This can be used to scan for misspellings manually.
44#
45
46minfreq=
47maxfreq=
48while [ $# -gt 0 ]; do
49 case "$1" in
50 --freq|-f)
51 minfreq=$2
52 maxfreq=$2
53 shift 2
54 ;;
55 --min)
56 minfreq=$2
57 if [ "$maxfreq" = "" ]; then
58 maxfreq=0
59 fi
60 shift 2
61 ;;
62 --max)
63 maxfreq=$2
64 if [ "$minfreq" = "" ]; then
65 minfreq=0
66 fi
67 shift 2
68 ;;
69 *)
70 break;
71 ;;
72 esac
73done
74
75if [ "$minfreq" = "" ] && [ "$maxfreq" = "" ]; then
76 minfreq=0
77 maxfreq=0
78fi
79
80awkfile=$(mktemp)
81trap 'rm -f "$awkfile"' EXIT
82
83cat > "$awkfile" <<EOF
84BEGIN {
85 in_comment=0
86}
87
88// {
89 line=\$0
90}
91
92/\/\*/ {
93 in_comment=1
94 sub(/.*\/\*/, "", line)
95}
96
97/\*\// {
98 sub(/\*\/.*/, "", line)
99 in_comment=0
100 print line
101 next
102}
103
104// {
105 if (in_comment) {
106 print line
107 }
108}
109EOF
110
111# Stabilize sort.
112export LC_ALL=C
113
114awk \
115 -f "$awkfile" \
116 -- "$@" \
117 | sed 's/[%^$~#{}`&=@,. \t\/_()|<>\+\*-]/\n/g' \
118 | sed 's/\[/\n/g' \
119 | sed 's/\]/\n/g' \
120 | sed 's/[0-9][0-9]*/\n/g' \
121 | tr '[:upper:]' '[:lower:]' \
122 | sed 's/[ \t]*//g' \
123 | sort \
124 | uniq -c \
125 | awk "{ if (($minfreq == 0 || $minfreq <= \$1) \
126 && ($maxfreq == 0 || \$1 <= $maxfreq)) { print \$0; } }" \
127 | awk '{ print length($0) " " $0; }' \
128 | sort -n -r \
129 | cut -d ' ' -f 2-
This page took 0.029494 seconds and 4 git commands to generate.