一.下載elasticsearch-analysis-ik源碼下載

二.解壓修改源碼文件
我這里使用idea開發(fā)
修改maven依賴es版本號
<elasticsearch.version>7.1.0</elasticsearch.version>
引入oracle驅(qū)動(dòng)
<!-- oracle驅(qū)動(dòng)包 -->
<dependency>
<groupId>com.oracle</groupId>
<artifactId>ojdbc7</artifactId>
<version>12.1.0.2</version>
</dependency>
創(chuàng)建循環(huán)線程類
package org.wltea.analyzer.dic;
import org.apache.logging.log4j.Logger;
import org.wltea.analyzer.help.ESPluginLoggerFactory;
/**
* @Description: HotDictReloadThread 獲取詞典單子實(shí)例,并執(zhí)行它的reLoadMainDict方法
* @Author: HuGang 1042258937@qq.com
* @CreateDate: 2022/7/19 10:06
*/
public class HotDictReloadThread {
private static final Logger log = ESPluginLoggerFactory.getLogger(HotDictReloadThread.class.getName());
public void initial(){
while (true) {
log.info("正在調(diào)用HotDictReloadThread...");
Dictionary.getSingleton().reLoadMainDict();
}
}
}

在項(xiàng)目中找到Dictionary類修改initial方法
//啟動(dòng)自定義線程
pool.execute(() -> new HotDictReloadThread().initial());

準(zhǔn)備數(shù)據(jù)庫文件
-- Create table
create table T_DSJFX_IK_LEXICON
(
TEXT VARCHAR2(15) not null,
TYPE INTEGER default 0,
STATUS INTEGER default 0,
SEARCH_COUNT INTEGER default 0,
LAST_SEARCH_TIME DATE default sysdate
)
-- Add comments to the table
comment on table T_DSJFX_IK_LEXICON
is 'ES ik分詞器自定義詞條';
-- Add comments to the columns
comment on column T_DSJFX_IK_LEXICON.TEXT
is '詞條';
comment on column T_DSJFX_IK_LEXICON.TYPE
is '0擴(kuò)展詞庫 1停用詞庫';
comment on column T_DSJFX_IK_LEXICON.STATUS
is '詞條狀態(tài) 0正常 1暫停使用';
comment on column T_DSJFX_IK_LEXICON.SEARCH_COUNT
is '搜索次數(shù)';
comment on column T_DSJFX_IK_LEXICON.LAST_SEARCH_TIME
is '最后搜索時(shí)間';
-- Create/Recreate primary, unique and foreign key constraints
alter table T_DSJFX_IK_LEXICON
add constraint PK_LEXICON_TEXT primary key (TEXT)
using index
tablespace TS_JCJMR_DATA
pctfree 10
initrans 2
maxtrans 255
storage
(
initial 64K
next 1M
minextents 1
maxextents unlimited
);
然后我們在項(xiàng)目的根路徑的config目錄下新建配置文件jdbc-reload.properties,內(nèi)容如下
# 公司地址
# 數(shù)據(jù)庫地址
jdbc.url=jdbc:oracle:thin:@127.0.0.1:1521/hnkcdb
# 數(shù)據(jù)庫用戶名
jdbc.user=user
# 數(shù)據(jù)庫密碼
jdbc.password=password
# 數(shù)據(jù)庫查詢擴(kuò)展詞庫sql語句
jdbc.reload.sql=select text as word from t_dsjfx_ik_lexicon t where t.type = '0' and t.status = '0'
# 數(shù)據(jù)庫查詢停用詞sql語句
jdbc.reload.stopword.sql=select text as word from t_dsjfx_ik_lexicon t where t.type = '1' and t.status = '0'
# 數(shù)據(jù)庫查詢間隔時(shí)間 每隔60秒請求一次
jdbc.reload.interval=60
在類中創(chuàng)建獲取數(shù)據(jù)庫連接類
private Connection getConn(){
Connection conn = null;
//加載配置文件
Path file = PathUtils.get(getDictRoot(), "jdbc-reload.properties");
try {
props.load(new FileInputStream(file.toFile()));
logger.info("[==========]jdbc-reload.properties");
for(Object key : props.keySet()) {
logger.info("[==========]" + key + "=" + props.getProperty(String.valueOf(key)));
}
Class.forName("oracle.jdbc.driver.OracleDriver");//反射
//獲取連接對象 驅(qū)動(dòng)成功后進(jìn)行連接
conn= DriverManager.getConnection(props.getProperty("jdbc.url"), props.getProperty("jdbc.user"), props.getProperty("jdbc.password"));
}catch (SQLException throwables) {
throwables.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
return conn;
}
找到Dictionary類的reLoadMainDict方法,可以看到在方面里面,有2個(gè)方法tmpDict.loadMainDict()和tmpDict.loadStopWordDict(),分別維護(hù)的是擴(kuò)展詞庫和停用詞庫,一塊先看一下對擴(kuò)展詞庫的維護(hù);
在方法tmpDict.loadMainDict()中,我們在最后一行加載遠(yuǎn)程自定義詞庫后面新增一個(gè)方法this.loadMySQLExtDict(),用于加載oracle詞庫,在加載oracle詞庫之前,我們需先準(zhǔn)備一下oracle相關(guān)的配置以及sql語句;在數(shù)據(jù)庫中新建一張表,用戶維護(hù)擴(kuò)展詞和停用詞,表結(jié)構(gòu)如下
從oracle中加載動(dòng)態(tài)詞庫
private void loadMyDbExtDict(){
Connection conn=null;
Statement st=null;
ResultSet rs=null;
try {
//獲取連接對象 驅(qū)動(dòng)成功后進(jìn)行連接
conn = getConn();
//2、創(chuàng)建statement類對象,用來執(zhí)行SQL語句
st=conn.createStatement();
//3、創(chuàng)建sql查詢語句
String sql=props.getProperty("jdbc.reload.sql");
//4、執(zhí)行sql語句并且換回一個(gè)查詢的結(jié)果集
rs=st.executeQuery(sql);
while(rs.next()) { //循環(huán)遍歷結(jié)果集
String theWord = rs.getString("word");
logger.info("[==========]正在加載自定義IK擴(kuò)展詞庫詞條: " + theWord);
_MainDict.fillSegment(theWord.trim().toCharArray());
}
Thread.sleep(Integer.valueOf(String.valueOf(props.get("jdbc.reload.interval"))) * 1000);
} catch (Exception e) {
e.printStackTrace();
}finally {
if(rs != null) {
try {
rs.close();
} catch (SQLException e) {
logger.error("error", e);
}
}
if(st != null) {
try {
st.close();
} catch (SQLException e) {
logger.error("error", e);
}
}
if(conn != null) {
try {
conn.close();
} catch (SQLException e) {
logger.error("error", e);
}
}
}
}
this.loadMySQLExtDict();

停用詞類似 依葫蘆畫瓢
oracl依賴包直接添加(不添加的話 也可自行拷貝jar相應(yīng)目錄)
<dependencySet>
<outputDirectory/>
<useProjectArtifact>true</useProjectArtifact>
<useTransitiveFiltering>true</useTransitiveFiltering>
<includes>
<include>com.oracle:ojdbc7</include>
</includes>
</dependencySet>

安裝IK分詞器插件
完成上述步驟后,拿到elasticsearch-analysis-ik-7.8.0.zip插件,我們將其放在ES安裝目錄下的plugins目錄下,新建一個(gè)ik文件夾,將其解壓到ik文件夾下

三.常見問題
異常1
java.sql.SQLException: Column 'word' not found.
此異常是因?yàn)榫帉憇ql時(shí),查詢的數(shù)據(jù)庫字段需要起別名為 word,修改一下sql即可解決這個(gè)問題;
異常2 困惱了我一個(gè)星期,找了各種答案都做不到,重點(diǎn)要記筆記的
java.security.AccessControlException: access denied ("java.lang.management.ManagementPermission" "control")
[2022-07-22T13:42:21,186][ERROR][o.e.b.ElasticsearchUncaughtExceptionHandler] [node-1] fatal error in thread [elasticsearch[node-1][clusterApplierService#updateTask][T#1]], exiting
java.lang.ExceptionInInitializerError: null
at oracle.jdbc.driver.BlockSource$ThreadedCachingBlockSource.<clinit>(BlockSource.java:402) ~[?:?]
at oracle.jdbc.driver.BlockSource.createBlockSource(BlockSource.java:80) ~[?:?]
at oracle.jdbc.driver.BlockSource.createBlockSource(BlockSource.java:70) ~[?:?]
at oracle.jdbc.driver.PhysicalConnection.setBlockSource(PhysicalConnection.java:593) ~[?:?]
at oracle.jdbc.driver.PhysicalConnection.<init>(PhysicalConnection.java:631) ~[?:?]
at oracle.jdbc.driver.T4CConnection.<init>(T4CConnection.java:398) ~[?:?]
at oracle.jdbc.driver.T4CDriverExtension.getConnection(T4CDriverExtension.java:31) ~[?:?]
at oracle.jdbc.driver.OracleDriver.connect(OracleDriver.java:566) ~[?:?]
at java.sql.DriverManager.getConnection(DriverManager.java:677) ~[java.sql:?]
at java.sql.DriverManager.getConnection(DriverManager.java:228) ~[java.sql:?]
at org.wltea.analyzer.dic.Dictionary.getConn(Dictionary.java:686) ~[?:?]
at org.wltea.analyzer.dic.Dictionary.loadMyDbExtDict(Dictionary.java:589) ~[?:?]
at org.wltea.analyzer.dic.Dictionary.loadMainDict(Dictionary.java:398) ~[?:?]
at org.wltea.analyzer.dic.Dictionary.initial(Dictionary.java:151) ~[?:?]
at org.wltea.analyzer.cfg.Configuration.<init>(Configuration.java:40) ~[?:?]
at org.elasticsearch.index.analysis.IkTokenizerFactory.<init>(IkTokenizerFactory.java:15) ~[?:?]
at org.elasticsearch.index.analysis.IkTokenizerFactory.getIkSmartTokenizerFactory(IkTokenizerFactory.java:23) ~[?:?]
at org.elasticsearch.index.analysis.AnalysisRegistry.buildMapping(AnalysisRegistry.java:338) ~[elasticsearch-7.1.0.jar:7.1.0]
at org.elasticsearch.index.analysis.AnalysisRegistry.buildTokenizerFactories(AnalysisRegistry.java:174) ~[elasticsearch-7.1.0.jar:7.1.0]
at org.elasticsearch.index.analysis.AnalysisRegistry.build(AnalysisRegistry.java:159) ~[elasticsearch-7.1.0.jar:7.1.0]
at org.elasticsearch.index.IndexService.<init>(IndexService.java:165) ~[elasticsearch-7.1.0.jar:7.1.0]
at org.elasticsearch.index.IndexModule.newIndexService(IndexModule.java:398) ~[elasticsearch-7.1.0.jar:7.1.0]
at org.elasticsearch.indices.IndicesService.createIndexService(IndicesService.java:544) ~[elasticsearch-7.1.0.jar:7.1.0]
at org.elasticsearch.indices.IndicesService.createIndex(IndicesService.java:493) ~[elasticsearch-7.1.0.jar:7.1.0]
at org.elasticsearch.indices.IndicesService.createIndex(IndicesService.java:161) ~[elasticsearch-7.1.0.jar:7.1.0]
at org.elasticsearch.indices.cluster.IndicesClusterStateService.createIndices(IndicesClusterStateService.java:498) ~[elasticsearch-7.1.0.jar:7.1.0]
at org.elasticsearch.indices.cluster.IndicesClusterStateService.applyClusterState(IndicesClusterStateService.java:268) ~[elasticsearch-7.1.0.jar:7.1.0]
at org.elasticsearch.cluster.service.ClusterApplierService.lambda$callClusterStateAppliers$5(ClusterApplierService.java:478) ~[elasticsearch-7.1.0.jar:7.1.0]
at java.lang.Iterable.forEach(Iterable.java:75) ~[?:?]
at org.elasticsearch.cluster.service.ClusterApplierService.callClusterStateAppliers(ClusterApplierService.java:476) ~[elasticsearch-7.1.0.jar:7.1.0]
at org.elasticsearch.cluster.service.ClusterApplierService.applyChanges(ClusterApplierService.java:459) ~[elasticsearch-7.1.0.jar:7.1.0]
at org.elasticsearch.cluster.service.ClusterApplierService.runTask(ClusterApplierService.java:413) ~[elasticsearch-7.1.0.jar:7.1.0]
at org.elasticsearch.cluster.service.ClusterApplierService$UpdateTask.run(ClusterApplierService.java:164) ~[elasticsearch-7.1.0.jar:7.1.0]
at org.elasticsearch.common.util.concurrent.ThreadContext$ContextPreservingRunnable.run(ThreadContext.java:681) ~[elasticsearch-7.1.0.jar:7.1.0]
at org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor$TieBreakingPrioritizedRunnable.runAndClean(PrioritizedEsThreadPoolExecutor.java:252) ~[elasticsearch-7.1.0.jar:7.1.0]
at org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor$TieBreakingPrioritizedRunnable.run(PrioritizedEsThreadPoolExecutor.java:215) ~[elasticsearch-7.1.0.jar:7.1.0]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) ~[?:?]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) ~[?:?]
at java.lang.Thread.run(Thread.java:835) [?:?]
Caused by: java.security.AccessControlException: access denied ("java.lang.management.ManagementPermission" "control")
at java.security.AccessControlContext.checkPermission(AccessControlContext.java:472) ~[?:?]
at java.security.AccessController.checkPermission(AccessController.java:1042) ~[?:?]
at java.lang.SecurityManager.checkPermission(SecurityManager.java:408) ~[?:?]
at sun.management.Util.checkAccess(Util.java:77) ~[?:?]
at sun.management.Util.checkControlAccess(Util.java:85) ~[?:?]
at sun.management.MemoryPoolImpl.setCollectionUsageThreshold(MemoryPoolImpl.java:213) ~[?:?]
at oracle.jdbc.driver.BlockSource$ThreadedCachingBlockSource$BlockReleaserListener$1.run(BlockSource.java:376) ~[?:?]
at java.security.AccessController.doPrivileged(AccessController.java:310) ~[?:?]
at oracle.jdbc.driver.BlockSource$ThreadedCachingBlockSource$BlockReleaserListener.<init>(BlockSource.java:374) ~[?:?]
at oracle.jdbc.driver.BlockSource$ThreadedCachingBlockSource$BlockReleaserListener.<clinit>(BlockSource.java:348) ~[?:?]
... 39 more
解決方法:在plugin-security.policy文件中添加
permission java.lang.management.ManagementPermission "control";

異常3:AccessControlException: access denied ("java.net.SocketPermission" "127.0.0.1:3306" "connect,resolve")
在D:\tool\ELK\elasticsearch-7.1.0\jdk\conf\securityjava.polic文件中添加
permission java.net.SocketPermission "*", "connect,resolve";
#有相應(yīng)報(bào)錯(cuò)的就添加
permission java.lang.RuntimePermission "accessClassInPackage.sun.security.krb5";
permission java.lang.RuntimePermission "accessDeclaredMembers";
permission javax.management.MBeanServerPermission "createMBeanServer";

四、測試
GET _analyze
{
"analyzer": "ik_max_word",
"text": "微信"
}

無法分詞,在數(shù)據(jù)中添加“微信”等60秒

成功!??!