|
|
@@ -1,9 +1,5 @@
|
|
|
package com.uas.search;
|
|
|
|
|
|
-import com.hankcs.hanlp.HanLP;
|
|
|
-import com.hankcs.hanlp.seg.Segment;
|
|
|
-import com.hankcs.hanlp.seg.common.Term;
|
|
|
-import com.hankcs.hanlp.suggest.Suggester;
|
|
|
import com.uas.search.util.CollectionUtils;
|
|
|
import com.uas.search.util.StringUtils;
|
|
|
import org.apache.lucene.analysis.Analyzer;
|
|
|
@@ -38,8 +34,7 @@ public class DictionaryHelper {
|
|
|
// "编剧邵钧林和稽道青说",
|
|
|
// "这里有关天培的有关事迹",
|
|
|
// "龚学平等领导,邓颖超生前");
|
|
|
- print(hanlp(words));
|
|
|
-// print(ik(words));
|
|
|
+ print(ik(words));
|
|
|
// suggest();
|
|
|
}
|
|
|
|
|
|
@@ -80,24 +75,6 @@ public class DictionaryHelper {
|
|
|
return words;
|
|
|
}
|
|
|
|
|
|
- private static List<String> hanlp(List<String> words) throws IOException {
|
|
|
- List<String> result = new ArrayList<>();
|
|
|
- Segment segment = HanLP.newSegment().enableNameRecognize(true);
|
|
|
- for (String sentence : words) {
|
|
|
- List<Term> termList = segment.seg(sentence);
|
|
|
- for (Term term : termList) {
|
|
|
- result.add(term.word);
|
|
|
- }
|
|
|
-// List<String> keywords = HanLP.extractKeyword(sentence, 5);
|
|
|
- }
|
|
|
- for(int i = result.size() - 1; i >= 0; i--){
|
|
|
- if(result.get(i).matches("[`~!@#$^&*()=|{}':;',\\[\\].<>/?~!@#¥……&*()——|{}【】‘;:”“'。,、?\\s]+")){
|
|
|
- result.remove(i);
|
|
|
- }
|
|
|
- }
|
|
|
- return result;
|
|
|
- }
|
|
|
-
|
|
|
private static List<String> ik(List<String> words) throws IOException {
|
|
|
List<String> result = new ArrayList<>();
|
|
|
for (String sentence : words) {
|
|
|
@@ -113,23 +90,4 @@ public class DictionaryHelper {
|
|
|
}
|
|
|
return result;
|
|
|
}
|
|
|
-
|
|
|
- private static void suggest() {
|
|
|
- Suggester suggester = new Suggester();
|
|
|
- String[] titleArray =
|
|
|
- (
|
|
|
- "威廉王子发表演说 呼吁保护野生动物\n" +
|
|
|
- "《时代》年度人物最终入围名单出炉 普京马云入选\n" +
|
|
|
- "“黑格比”横扫菲:菲吸取“海燕”经验及早疏散\n" +
|
|
|
- "日本保密法将正式生效 日媒指其损害国民知情权\n" +
|
|
|
- "英报告说空气污染带来“公共健康危机”"
|
|
|
- ).split("\\n");
|
|
|
- for (String title : titleArray) {
|
|
|
- suggester.addSentence(title);
|
|
|
- }
|
|
|
-
|
|
|
- System.out.println(suggester.suggest("发言", 2)); // 语义
|
|
|
- System.out.println(suggester.suggest("危机公共", 2)); // 字符
|
|
|
- System.out.println(suggester.suggest("mayun", 1)); // 拼音
|
|
|
- }
|
|
|
}
|