forked from ls1248659692/python_guide
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtf_10.java
More file actions
180 lines (147 loc) · 4.75 KB
/
tf_10.java
File metadata and controls
180 lines (147 loc) · 4.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
import java.io.*;
import java.util.*;
public class tf_10 {
/*
* The main function
*/
public static void main(String[] args) throws IOException {
new WordFrequencyController(args[0]).run();
}
}
/*
* The classes
*/
abstract class TFExercise {
public String getInfo() {
return this.getClass().getName();
}
}
class WordFrequencyController extends TFExercise {
private DataStorageManager storageManager;
private StopWordManager stopWordManager;
private WordFrequencyManager wordFreqManager;
public WordFrequencyController(String pathToFile) throws IOException {
this.storageManager = new DataStorageManager(pathToFile);
this.stopWordManager = new StopWordManager();
this.wordFreqManager = new WordFrequencyManager();
}
public void run() {
for (String word : this.storageManager.getWords()) {
if (!this.stopWordManager.isStopWord(word)) {
this.wordFreqManager.incrementCount(word);
}
}
int numWordsPrinted = 0;
for (WordFrequencyPair pair : this.wordFreqManager.sorted()) {
System.out.println(pair.getWord() + " - " + pair.getFrequency());
numWordsPrinted++;
if (numWordsPrinted >= 25) {
break;
}
}
}
}
/** Models the contents of the file. */
class DataStorageManager extends TFExercise {
private List<String> words;
public DataStorageManager(String pathToFile) throws IOException {
this.words = new ArrayList<String>();
Scanner f = new Scanner(new File(pathToFile), "UTF-8");
try {
f.useDelimiter("[\\W_]+");
while (f.hasNext()) {
this.words.add(f.next().toLowerCase());
}
} finally {
f.close();
}
}
public List<String> getWords() {
return this.words;
}
public String getInfo() {
return super.getInfo() + ": My major data structure is a " + this.words.getClass().getName();
}
}
/** Models the stop word filter. */
class StopWordManager extends TFExercise {
private Set<String> stopWords;
public StopWordManager() throws IOException {
this.stopWords = new HashSet<String>();
Scanner f = new Scanner(new File("../stop_words.txt"), "UTF-8");
try {
f.useDelimiter(",");
while (f.hasNext()) {
this.stopWords.add(f.next());
}
} finally {
f.close();
}
// Add single-letter words
for (char c = 'a'; c <= 'z'; c++) {
this.stopWords.add("" + c);
}
}
public boolean isStopWord(String word) {
return this.stopWords.contains(word);
}
public String getInfo() {
return super.getInfo() + ": My major data structure is a " + this.stopWords.getClass().getName();
}
}
/** Keeps the word frequency data. */
class WordFrequencyManager extends TFExercise {
private Map<String, MutableInteger> wordFreqs;
public WordFrequencyManager() {
this.wordFreqs = new HashMap<String, MutableInteger>();
}
public void incrementCount(String word) {
MutableInteger count = this.wordFreqs.get(word);
if (count == null) {
this.wordFreqs.put(word, new MutableInteger(1));
} else {
count.setValue(count.getValue() + 1);
}
}
public List<WordFrequencyPair> sorted() {
List<WordFrequencyPair> pairs = new ArrayList<WordFrequencyPair>();
for (Map.Entry<String, MutableInteger> entry : wordFreqs.entrySet()) {
pairs.add(new WordFrequencyPair(entry.getKey(), entry.getValue().getValue()));
}
Collections.sort(pairs);
Collections.reverse(pairs);
return pairs;
}
public String getInfo() {
return super.getInfo() + ": My major data structure is a " + this.wordFreqs.getClass().getName();
}
}
class MutableInteger {
private int value;
public MutableInteger(int value) {
this.value = value;
}
public int getValue() {
return value;
}
public void setValue(int value) {
this.value = value;
}
}
class WordFrequencyPair implements Comparable<WordFrequencyPair> {
private String word;
private int frequency;
public WordFrequencyPair(String word, int frequency) {
this.word = word;
this.frequency = frequency;
}
public String getWord() {
return word;
}
public int getFrequency() {
return frequency;
}
public int compareTo(WordFrequencyPair other) {
return this.frequency - other.frequency;
}
}