目標(biāo)
1、ngram分詞器Elasticsearch實現(xiàn)模糊搜索
2、keyword忽略大小寫
一、代碼
Controller
@PostMapping("createIndex")
@ApiOperation(value="創(chuàng)建索引")
public Result<Boolean> createIndex() throws Exception {
Boolean is = esMemberService.createIndex("member");
return Result.success(is);
}
Service
@Override
public Boolean createIndex(String index) {
XContentBuilder setting = packageSetting();
XContentBuilder mapping = packageMapping();
return createIndexSetting(index,setting,mapping);
}
private XContentBuilder packageMapping(){
XContentBuilder mapping = null;
try {
//創(chuàng)建索引Mapping
mapping = XContentFactory.jsonBuilder()
.startObject()
.field("dynamic", true)
.startObject("properties")
//id
.startObject("id")
.field("type", "long")
.field("index", false)
.endObject()
//賬號:keyword忽略大小寫
.startObject("markId")
.field("type", "keyword")
.field("normalizer", "lowercase")
.endObject()
//昵稱:模糊搜索、忽略大小寫
.startObject("nickName")
.field("type", "text")
.field("analyzer", "ngram")
.endObject()
//頭像
.startObject("iconUrl")
.field("type", "text")
.field("index", false)
.endObject()
//性別
.startObject("sex")
.field("type", "keyword")
.endObject()
.startObject("mobile")
.field("type", "keyword")
.endObject()
//經(jīng)緯度
.startObject("location")
.field("type", "geo_point")
.endObject()
//地址
.startObject("address")
.field("type", "text")
.endObject()
.startObject("openMobile")
.field("type", "keyword")
.endObject()
//
.startObject("birthday")
.field("type", "date")
.field("format","yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis")
.endObject()
//
.startObject("createTime")
.field("type", "date")
.field("format","yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis")
.endObject()
.endObject()
.endObject();
} catch (Exception e) {
e.printStackTrace();
}
return mapping;
}
/**
* ngram分詞器配置
* ngram:英文單詞按字母分詞
* field("filter","lowercase"):大小寫兼容搜索
* index.max_ngram_diff: 允許min_gram、max_gram的差值
* https://www.elastic.co/guide/en/elasticsearch/reference/6.8/analysis-ngram-tokenizer.html
* normalizer:解決keyword區(qū)分大小寫
* https://www.elastic.co/guide/en/elasticsearch/reference/6.0/normalizer.html
* @return
*/
private XContentBuilder packageSetting() {
XContentBuilder setting = null;
try {
//創(chuàng)建索引setting
setting = XContentFactory.jsonBuilder()
.startObject()
.field("index.max_ngram_diff","5")
.startObject("analysis")
.startObject("analyzer")
.startObject("ngram")
.field("tokenizer","my_tokenizer")
.field("filter","lowercase")
.endObject()
.endObject()
.startObject("tokenizer")
.startObject("my_tokenizer")
.field("type","ngram")
.field("min_gram","1")
.field("max_gram","3")
.endObject()
.endObject()
.startObject("normalizer")
.startObject("lowercase")
.field("type","custom")
.field("filter","lowercase")
.endObject()
.endObject()
.endObject()
.endObject();
} catch (Exception e) {
e.printStackTrace();
}
return setting;
}
protected Boolean createIndexSetting(String indexName, XContentBuilder settings,XContentBuilder mapping) {
Boolean is = false;
try {
CreateIndexRequest request = buildCreateIndexRequest(indexName);
if (settings != null) {
request.settings(settings);
}
if (mapping != null) {
request.mapping(mapping);
}
//獲取索引客戶端
IndicesClient indices = client.indices();
//創(chuàng)建索引
CreateIndexResponse response = indices.create(request, COMMON_OPTIONS);
log.info("是否所有節(jié)點都已確認請求: " + response.isAcknowledged());
log.info("指示是否在超時之前為索引中的每個分片啟動了必要數(shù)量的分片副本: " + response.isShardsAcknowledged());
is = response.isAcknowledged();
} catch (Exception e) {
e.printStackTrace();
}
return is;
}
二、JSON格式
PUT member
{
"settings": {
"index.max_ngram_diff":"5"
"analysis": {
"analyzer": {
"ngram": {
"tokenizer": "my_tokenizer",
"filter": "lowercase"
}
},
"tokenizer": {
"my_tokenizer": {
"type": "ngram",
"min_gram": 1,
"max_gram": 3
}
}
}
}
}
三、參數(shù)說明
3.1 filter:lowercase
大小寫兼容搜索,即字段內(nèi)容為alan,搜索alan、ALAN、Alan都可以搜索出來。
3.2 min_gram、max_gram
根據(jù)min_gram以及max_gram指定切分時最小幾個字符、最大幾個字符。長度越短,切分出來越少,更多的被匹配到質(zhì)量也越差;長度越長,切分出來越多,匹配越精確。
如min_gram為1,max_gram為1,對于Quick這個單詞,就會變成[ Q,u,i,c,k]。按關(guān)鍵字Qui搜索,關(guān)鍵字就會被拆分成Q,u、i三個字母去搜索,可能就會搜索出:Quick、Query、your、like等單詞。
如min_gram為1,max_gram為3,對于Quick這個單詞,就會變成[ Q, Qu, Qui, u, ui, uic, i, ic, ick, c, ck, k ]。按關(guān)鍵字Qui搜索,只會去匹配包含Qui的單詞,因此搜索結(jié)果只有Quick
3.3 index.max_ngram_diff
min_gram默認值為1,max_gram默認值為2,min_gram與max_gram的差值默認最大為1,如果設(shè)置值時差值大于1,需要先設(shè)置index.max_ngram_diff參數(shù)。