|
|
@@ -1,18 +1,10 @@
|
|
|
package com.uas.search.sort;
|
|
|
|
|
|
-import com.uas.search.analyzer.IKAnalyzer;
|
|
|
-import org.apache.lucene.analysis.Analyzer;
|
|
|
-import org.apache.lucene.analysis.TokenStream;
|
|
|
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|
|
import org.apache.lucene.search.FieldComparator;
|
|
|
import org.apache.lucene.search.FieldComparatorSource;
|
|
|
import org.springframework.util.StringUtils;
|
|
|
|
|
|
import java.io.IOException;
|
|
|
-import java.util.ArrayList;
|
|
|
-import java.util.List;
|
|
|
-import java.util.regex.Matcher;
|
|
|
-import java.util.regex.Pattern;
|
|
|
|
|
|
/**
|
|
|
* 用于 StringField 的排序
|
|
|
@@ -43,95 +35,43 @@ public class StringFieldComparatorSource extends FieldComparatorSource {
|
|
|
str2 = str2.toLowerCase();
|
|
|
keyword = keyword.toLowerCase();
|
|
|
|
|
|
+ int index1 = str1.indexOf(keyword);
|
|
|
+ int index2 = str2.indexOf(keyword);
|
|
|
// 一方包含关键词,另一方不包含时,该方排序靠前
|
|
|
- if (str1.contains(keyword)) {
|
|
|
- if (str2.contains(keyword)) {
|
|
|
+ if (index1 > 0) {
|
|
|
+ if (index2 > 0) {
|
|
|
// 均包含关键词时,比较关键词在字符串中的位置,位置越靠前,最终排序越靠前
|
|
|
- int index1 = str1.indexOf(keyword);
|
|
|
- int index2 = str2.indexOf(keyword);
|
|
|
if (index1 < index2) {
|
|
|
return AHEAD;
|
|
|
} else if (index1 > index2) {
|
|
|
return BEHIND;
|
|
|
} else {
|
|
|
// 关键词在字符串中的位置相同时,比较字符串长度,长度较短,则排序靠前
|
|
|
- int length1 = str1.length();
|
|
|
- int length2 = str2.length();
|
|
|
- if (length1 < length2) {
|
|
|
- return AHEAD;
|
|
|
- } else if (length1 > length2) {
|
|
|
- return BEHIND;
|
|
|
- } else {
|
|
|
- // 字符串长度也相同时,截取含有的第一个关键词后,再递归比较
|
|
|
- return compare(str1.substring(index1 + keyword.length()), str2.substring(index2 + keyword.length()));
|
|
|
- }
|
|
|
+ return compareLength(str1, str2);
|
|
|
}
|
|
|
} else {
|
|
|
return AHEAD;
|
|
|
}
|
|
|
} else {
|
|
|
- if (str2.contains(keyword)) {
|
|
|
+ if (index2 > 0) {
|
|
|
return BEHIND;
|
|
|
} else {
|
|
|
- // 均不包含关键词时,统计分词后的词在字符串中的分布情况,比例较高,则排序靠前
|
|
|
- List<String> tokenizedWords = tokenize();
|
|
|
- int count1 = 0;
|
|
|
- int count2 = 0;
|
|
|
- for (String tokenizedWord : tokenizedWords) {
|
|
|
- count1 += count(str1, tokenizedWord);
|
|
|
- count2 += count(str2, tokenizedWord);
|
|
|
- }
|
|
|
- double percent1 = count1 / (1.0 * str1.length());
|
|
|
- double percent2 = count2 / (1.0 * str2.length());
|
|
|
- if (percent1 > percent2) {
|
|
|
- return AHEAD;
|
|
|
- } else if (percent1 < percent2) {
|
|
|
- return BEHIND;
|
|
|
- } else {
|
|
|
- // 分词后的词在字符串中的比例相同时,比较字符串长度,长度较短,则排序靠前
|
|
|
- int length1 = str1.length();
|
|
|
- int length2 = str2.length();
|
|
|
- if (length1 < length2) {
|
|
|
- return AHEAD;
|
|
|
- } else if (length1 > length2) {
|
|
|
- return BEHIND;
|
|
|
- } else {
|
|
|
- return PARALLEL;
|
|
|
- }
|
|
|
- }
|
|
|
+ // 均不包含关键词时,比较字符串长度,长度较短,则排序靠前
|
|
|
+ return compareLength(str1, str2);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- /**
|
|
|
- * @return 对关键词进行分词
|
|
|
- */
|
|
|
- private List<String> tokenize() {
|
|
|
- List<String> keywords = new ArrayList<>();
|
|
|
- try (Analyzer analyzer = new IKAnalyzer(true);
|
|
|
- TokenStream tokenStream = analyzer.tokenStream(fieldname, keyword)) {
|
|
|
- tokenStream.reset();
|
|
|
- CharTermAttribute cta = tokenStream.addAttribute(CharTermAttribute.class);
|
|
|
- while (tokenStream.incrementToken()) {
|
|
|
- keywords.add(cta.toString());
|
|
|
- }
|
|
|
- } catch (IOException e) {
|
|
|
- throw new IllegalStateException("排序时分词错误:fieldname=" + fieldname + ", keyword=" + keyword, e);
|
|
|
- }
|
|
|
- return keywords;
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * 统计 str 中 sub 的数目
|
|
|
- */
|
|
|
- private int count(String str, String sub) {
|
|
|
- int count = 0;
|
|
|
- Pattern pattern = Pattern.compile("[\\s\\S]*?" + sub + "[\\s\\S]*?");
|
|
|
- Matcher matcher = pattern.matcher(str);
|
|
|
- while (matcher.find()) {
|
|
|
- count++;
|
|
|
+ private int compareLength(String str1, String str2) {
|
|
|
+ int length1 = str1.length();
|
|
|
+ int length2 = str2.length();
|
|
|
+ if (length1 < length2) {
|
|
|
+ return AHEAD;
|
|
|
+ } else if (length1 > length2) {
|
|
|
+ return BEHIND;
|
|
|
+ } else {
|
|
|
+ return PARALLEL;
|
|
|
}
|
|
|
- return count;
|
|
|
}
|
|
|
};
|
|
|
}
|