forked from vector4wang/spring-boot-quick
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathNews.java
More file actions
78 lines (67 loc) · 1.51 KB
/
News.java
File metadata and controls
78 lines (67 loc) · 1.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
package com.quick.simhash;
import java.util.HashMap;
import java.util.Map;
public class News {
// 新闻具体内容
String content;
// 新闻包含的词的个数统计值
HashMap<String, Double> word2Count;
public News(String content) {
this.content = content;
this.word2Count = new HashMap<String, Double>();
}
/**
* 将分词后的字符串进行关键词词数统计
*/
public void statWords() {
int index;
int invalidCount;
double count;
// 词频
double wordRate;
String w;
String[] array;
invalidCount = 0;
array = this.content.split(" ");
for (String str : array) {
index = str.indexOf('/');
if (index == -1) {
continue;
}
w = str.substring(0, index);
// 只过滤掉标点符/wn,逗号,
if (str.contains("wn") || str.contains("wd")) {
invalidCount++;
continue;
}
count = 0;
if (this.word2Count.containsKey(w)) {
count = this.word2Count.get(w);
}
// 做计数的更新
count++;
this.word2Count.put(w, count);
}
// 进行总词语的记录汇总
for (Map.Entry<String, Double> entry : this.word2Count.entrySet()) {
w = entry.getKey();
count = entry.getValue();
wordRate = 1.0 * count / (array.length - invalidCount);
this.word2Count.put(w, wordRate);
}
}
/**
* 根据词语名称获取词频
*
* @param word
* 词的名称
*
*/
public double getWordFrequentValue(String word) {
if(this.word2Count.containsKey(word)){
return this.word2Count.get(word);
}else{
return -1;
}
}
}