Browse Source

支持联想词

sunyj 8 years ago
parent
commit
ee8a0a219f

+ 25 - 7
search-api-b2b/src/main/java/com/uas/search/b2b/service/SearchService.java

@@ -198,20 +198,38 @@ public interface SearchService {
 	 * 根据关键词、单据类型、状态码搜索单据id
 	 * 
 	 * @param keyword
-	 *            为空,模糊搜索关键词,可以是:单据编号、供应商uu(采购)、供应商名称(采购)、客户uu(销售)、客户名称(销售)、
+	 *            为空,模糊搜索关键词,可以是:单据编号、供应商uu(采购)、供应商名称(采购)、客户uu(销售)、客户名称(销售)、
 	 *            物料编号、 物料名称、物料规格
 	 * @param tableName
 	 *            不为空,单据类型
 	 * @param pageParams
 	 *            可为空,可能含有翻页信息,filters中可能有过滤信息,包括:1.状态、所属企业uu、其他状态(如已采纳、未采纳等),
-	 *            该部分参数的键为数据库表中相应的字段名称,值为字段对应的值,若值有多个,则使用com.uas.search.b2b.
-	 *            model.MultiValue,则;2.开始时间(Long)、截止时间(Long),
-	 *            这两个参数用于对时间范围进行筛选(包含开始和截止时间),键为com.uas.search.b2b.util.
-	 *            SearchConstants中的常量;3.排序方式(ArrayList(Sort)),键为com.uas.search.
-	 *            b2b.util.SearchConstants中的常量,值为List(com.uas.search.b2b.model.
-	 *            Sort)
+	 *            该部分参数的键为数据库表中相应的字段名称,值为字段对应的值,若值有多个,则使用
+	 *            {@link com.uas.search.b2b.model.MultiValue}
+	 *            ;2.开始时间(Long)、截止时间(Long), 这两个参数用于对时间范围进行筛选(包含开始和截止时间),键为
+	 *            {@link com.uas.search.b2b.util.SearchConstants}
+	 *            中的常量;3.排序方式(ArrayList(Sort)),键为
+	 *            {@link com.uas.search.b2b.util.SearchConstants}中的常量,值为List(
+	 *            {@link com.uas.search.b2b.model.Sort})
 	 * @return 单据id
 	 * @throws SearchException
 	 */
 	public SPage<Long> searchIds(String keyword, Table_name tableName, PageParams pageParams) throws SearchException;
+
+	/**
+	 * 根据关键词、单据类型、指定域获取联想词
+	 * 
+	 * @param keyword
+	 *            可为空,关键词
+	 * @param tableName
+	 *            不为空,单据类型
+	 * @param size
+	 *            可为空,联想词的数目
+	 * @param fields
+	 *            指定的域(数据库表中相应的字段名称)
+	 * @return 联想词
+	 * @throws SearchException
+	 */
+	public SPage<String> similar(String keyword, Table_name tableName, Integer size, String... fields)
+			throws SearchException;
 }

+ 10 - 0
search-console-b2b/src/main/java/com/uas/search/console/b2b/controller/SearchController.java

@@ -77,6 +77,16 @@ public class SearchController {
 		return searchService.searchIds(keyword, tbName, pageParams);
 	}
 
+	@RequestMapping("/similar")
+	@ResponseBody
+	public SPage<String> similar(String keyword, String tableName, Integer size, String... fields) {
+		Table_name tbName = null;
+		if (!StringUtils.isEmpty(tableName)) {
+			tbName = Table_name.valueOf(tableName.toUpperCase());
+		}
+		return searchService.similar(keyword, tbName, size, fields);
+	}
+
 	@RequestMapping("/object")
 	@ResponseBody
 	public Object getObjectById(Long id, String tableName) {

+ 65 - 8
search-console-b2b/src/main/java/com/uas/search/console/b2b/service/impl/SearchServiceImpl.java

@@ -10,6 +10,7 @@ import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Set;
 
+import org.apache.commons.lang3.ArrayUtils;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanClause.Occur;
@@ -63,7 +64,9 @@ public class SearchServiceImpl implements SearchService, InnerSearchService {
 		// 获取单据的id
 		List<Long> content = new ArrayList<>();
 		try {
-			SPage<ScoreDoc> scoreDocPage = search(indexSearcher, keyword, tableName, pageParams);
+			// 获取该表keyword可以搜索的域
+			List<String> keywordFields = ClassAndTableNameUtils.getKeywordFields(tableName);
+			SPage<ScoreDoc> scoreDocPage = search(indexSearcher, keyword, tableName, keywordFields, true, pageParams);
 			SPage<Long> sPage = convertSPage(scoreDocPage, Long.class);
 			for (ScoreDoc scoreDoc : scoreDocPage.getContent()) {
 				Document document = indexSearcher.doc(scoreDoc.doc);
@@ -78,6 +81,53 @@ public class SearchServiceImpl implements SearchService, InnerSearchService {
 		}
 	}
 
+	@Override
+	public SPage<String> similar(String keyword, Table_name tableName, Integer size, String... fields)
+			throws SearchException, IllegalArgumentException {
+		if (ArrayUtils.isEmpty(fields)) {
+			throw new IllegalArgumentException("fields不可为空");
+		}
+		if (keyword == null) {
+			keyword = "";
+		}
+
+		PageParams pageParams = new PageParams();
+		if (size == null || size < 1) {
+			size = PAGE_SIZE;
+		}
+		pageParams.setPage(PAGE_INDEX);
+		pageParams.setSize(size);
+
+		IndexSearcher indexSearcher = SearchUtils.getIndexSearcher(tableName);
+		List<String> content = new ArrayList<>();
+		try {
+			List<String> keywordFields = new ArrayList<>();
+			for (String field : fields) {
+				keywordFields.add(ClassAndTableNameUtils.combineField(tableName, field));
+			}
+			// 获取联想词时,不进行分词
+			SPage<ScoreDoc> scoreDocPage = search(indexSearcher, keyword, tableName, keywordFields, false, pageParams);
+			SPage<String> sPage = convertSPage(scoreDocPage, String.class);
+			for (ScoreDoc scoreDoc : scoreDocPage.getContent()) {
+				Document document = indexSearcher.doc(scoreDoc.doc);
+				for (String field : keywordFields) {
+					String value = document.get(field);
+					// 需包含关键词,才作为联想词(关键词视为一个完整的字符串,中间不能掺有其他字符)
+					if (value != null && value.toUpperCase().contains(keyword.toUpperCase())) {
+						content.add(value);
+						break;
+					}
+				}
+			}
+			sPage.setContent(content);
+			return sPage;
+		} catch (IOException e) {
+			throw new SearchException(e).setDetailedMessage(e);
+		} finally {
+			SearchUtils.releaseIndexSearcher(indexSearcher);
+		}
+	}
+
 	/**
 	 * 转换SPage
 	 * 
@@ -100,6 +150,10 @@ public class SearchServiceImpl implements SearchService, InnerSearchService {
 	 *            物料编号、 物料名称、物料规格
 	 * @param tableName
 	 *            不为空,单据类型
+	 * @param keywordFields
+	 *            不为空,模糊搜索的域,多个域之间取或操作
+	 * @param tokenized
+	 *            可为空,模糊搜索时,是否对关键词进行分词,为空时,不分词
 	 * @param pageParams
 	 *            可为空,可能含有翻页信息,filters中可能有过滤信息,包括:1.状态、所属企业uu、其他状态(如已采纳、未采纳等),
 	 *            该部分参数的键为数据库表中相应的字段名称,值为字段对应的值,若值有多个,则使用com.uas.search.b2b.
@@ -113,10 +167,11 @@ public class SearchServiceImpl implements SearchService, InnerSearchService {
 	 */
 	@SuppressWarnings("unchecked")
 	private SPage<ScoreDoc> search(IndexSearcher indexSearcher, String keyword, Table_name tableName,
-			PageParams pageParams) throws IOException {
+			List<String> keywordFields, Boolean tokenized, PageParams pageParams) throws IOException {
 
-		// 获取该表keyword可以搜索的域
-		List<String> keywordFields = ClassAndTableNameUtils.getKeywordFields(tableName);
+		if (CollectionUtils.isEmpty(keywordFields)) {
+			throw new IllegalArgumentException("keywordFields不可为空");
+		}
 
 		SPage<ScoreDoc> sPage = new SPage<>();
 		BooleanQuery booleanQuery = new BooleanQuery();
@@ -132,7 +187,7 @@ public class SearchServiceImpl implements SearchService, InnerSearchService {
 			// keyword可能是哪些域,域之间进行或操作
 			BooleanQuery booleanQuery2 = new BooleanQuery();
 			for (String keywordField : keywordFields) {
-				booleanQuery2.add(SearchUtils.getBooleanQuery(keywordField, str), BooleanClause.Occur.SHOULD);
+				booleanQuery2.add(SearchUtils.regexpQuery(keywordField, str, tokenized), BooleanClause.Occur.SHOULD);
 			}
 			booleanQuery.add(booleanQuery2, BooleanClause.Occur.MUST);
 		}
@@ -221,12 +276,12 @@ public class SearchServiceImpl implements SearchService, InnerSearchService {
 						Occur occur = multiValue.isOr() ? Occur.SHOULD : Occur.MUST;
 						BooleanQuery booleanQuery2 = new BooleanQuery();
 						for (Object object : values) {
-							booleanQuery2.add(SearchUtils.getRegexpQuery(field, String.valueOf(object)), occur);
+							booleanQuery2.add(SearchUtils.regexpQuery(field, String.valueOf(object), false), occur);
 						}
 						booleanQuery.add(booleanQuery2, Occur.MUST);
 					} else {
 						if (value != null) {
-							booleanQuery.add(SearchUtils.getRegexpQuery(field, String.valueOf(value)), Occur.MUST);
+							booleanQuery.add(SearchUtils.regexpQuery(field, String.valueOf(value), false), Occur.MUST);
 						}
 					}
 
@@ -304,7 +359,9 @@ public class SearchServiceImpl implements SearchService, InnerSearchService {
 			PageParams pageParams = new PageParams();
 			pageParams.setPage(page);
 			pageParams.setSize(size);
-			SPage<ScoreDoc> scoreDocPage = search(indexSearcher, null, tableName, pageParams);
+			// 获取该表keyword可以搜索的域
+			List<String> keywordFields = ClassAndTableNameUtils.getKeywordFields(tableName);
+			SPage<ScoreDoc> scoreDocPage = search(indexSearcher, null, tableName, keywordFields, null, pageParams);
 			SPage<T> sPage = convertSPage(scoreDocPage, clazz);
 			for (ScoreDoc scoreDoc : scoreDocPage.getContent()) {
 				Document document = indexSearcher.doc(scoreDoc.doc);

+ 32 - 12
search-console-b2b/src/main/java/com/uas/search/console/b2b/util/SearchUtils.java

@@ -13,6 +13,7 @@ import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.PrefixQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.RegexpQuery;
 import org.apache.lucene.search.ScoreDoc;
@@ -68,7 +69,7 @@ public class SearchUtils {
 	}
 
 	/**
-	 * 对搜索词进行分词后组合得到BooleanQuery
+	 * 对搜索词进行分词后组合得到BooleanQuery(内部条件为PrefixQuery)
 	 * 
 	 * @param field
 	 *            搜索的域名
@@ -76,23 +77,18 @@ public class SearchUtils {
 	 *            搜索关键词
 	 * @return
 	 */
-	public static BooleanQuery getBooleanQuery(String field, String keyword) {
-		if (StringUtils.isEmpty(field)) {
+	public static BooleanQuery booleanQuery(String field, String keyword) {
+		if (StringUtils.isEmpty(field) || StringUtils.isEmpty(keyword)) {
 			return null;
 		}
 		BooleanQuery booleanQuery = new BooleanQuery();
-		if (StringUtils.isEmpty(keyword)) {
-			booleanQuery.add(new RegexpQuery(new Term(field, ".*.*")), BooleanClause.Occur.MUST);
-			return booleanQuery;
-		}
 		Analyzer analyzer = new IKAnalyzer(true);
 		try {
 			TokenStream tokenStream = analyzer.tokenStream(field, keyword);
 			tokenStream.reset();
 			CharTermAttribute cta = tokenStream.addAttribute(CharTermAttribute.class);
 			while (tokenStream.incrementToken()) {
-				booleanQuery.add(new RegexpQuery(new Term(field, ".*" + cta.toString() + ".*")),
-						BooleanClause.Occur.MUST);
+				booleanQuery.add(new PrefixQuery(new Term(field, cta.toString())), BooleanClause.Occur.MUST);
 			}
 			tokenStream.close();
 			analyzer.close();
@@ -109,14 +105,38 @@ public class SearchUtils {
 	 *            搜索的域名
 	 * @param keyword
 	 *            搜索关键词
+	 * @param tokenized
+	 *            可为空,模糊搜索时,是否对关键词进行分词,为空时,不分词
 	 * @return
 	 */
-	// TODO 如果用正则表达式进行搜索,就不需要再分词(分词的字段可能会导致边界问题)
-	public static RegexpQuery getRegexpQuery(String field, String keyword) {
+	// TODO 如果用正则表达式进行搜索,建索引时就不需要再分词(分词的字段可能会导致边界问题) <- 可以分词
+	public static Query regexpQuery(String field, String keyword, Boolean tokenized) {
 		if (StringUtils.isEmpty(field)) {
 			return null;
 		}
-		return new RegexpQuery(new Term(field, ".*" + keyword.toLowerCase() + ".*"));
+		if (field == null) {
+			field = "";
+		}
+		// 关键词为空或不分词,直接返回单个RegexpQuery
+		if (StringUtils.isEmpty(keyword) || tokenized == null || !tokenized.booleanValue()) {
+			return new RegexpQuery(new Term(field, ".*" + keyword + ".*"));
+		}
+		BooleanQuery booleanQuery = new BooleanQuery();
+		Analyzer analyzer = new IKAnalyzer(true);
+		try {
+			TokenStream tokenStream = analyzer.tokenStream(field, keyword);
+			tokenStream.reset();
+			CharTermAttribute cta = tokenStream.addAttribute(CharTermAttribute.class);
+			while (tokenStream.incrementToken()) {
+				booleanQuery.add(new RegexpQuery(new Term(field, ".*" + cta.toString() + ".*")),
+						BooleanClause.Occur.MUST);
+			}
+			tokenStream.close();
+			analyzer.close();
+		} catch (IOException e) {
+			logger.error("", e);
+		}
+		return booleanQuery;
 	}
 
 	/**

+ 6 - 0
search-console-b2b/src/main/webapp/WEB-INF/views/console.html

@@ -15,6 +15,12 @@
 				<li><a target="_blank">search?keyword=MSQ150800&tableName=MAKE$ORDERS</a></li>
 			</ol>
 
+			<strong><li class="title">联想词</li></strong>
+			<ol>
+				<li>search/similar?keyword=P&tableName=PURC$ORDERS&size=10&fields=pu_code&fields=pu_venduu</li>
+				<li><a target="_blank">search/similar?keyword=a&tableName=PURC$ORDERS&fields=pu_code</a></li>
+			</ol>
+
 			<strong><li class="title">获取索引中的数据</li></strong>
 			<ol>
 				<li><a target="_blank">search/object?id=115940&tableName=PURC$ORDERS</a></li>