歡迎來到Linux教程網
Linux教程網
Linux教程網
Linux教程網
Linux教程網 >> Linux編程 >> Linux編程 >> sphinx增量索引

sphinx增量索引

日期:2017/3/1 9:10:17   编辑:Linux編程

首先建立一個計數表,保存數據表的最新記錄ID

CREATE TABLE `sph_counter` (
`id` int(11) unsigned NOT NULL,
`max_id` int(11) unsigned NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT='sphinx增量表最大記錄數';

#定義主索引源

source test
{
type = mysql
sql_host = localhost
sql_user = root
sql_pass = 8888
sql_db = test
sql_port = 3306
sql_query_pre = SET NAMES utf8
sql_query_pre = REPLACE INTO sph_counter SELECT 1, MAX(id) FROM test where status=1 #取最大記錄數

sql_query = select id from test where status = 1
sql_query_info = select * from test where id = $id
}

#增量索引數據源定義
source test_delta : test
{
sql_query_pre = SET NAMES utf8
sql_query = select * from test where id>=(select max_id from sph_counter where id=1) and status = 1
sql_query_info = select * from test where id = $id

}

#定義主索引

index test
{
source = test #對應的source名稱
path = /usr/local/sphinx/var/data/test #請修改為實際使用的絕對路徑,例如:/usr/local/coreseek/var/...
docinfo = extern
mlock = 0
morphology = none
min_word_len = 2
html_strip = 1

#中文分詞配置,詳情請查看:http://www.coreseek.cn/products-install/coreseek_mmseg/
charset_dictpath = /usr/local/mmseg/etc/ #BSD、Linux環境下設置,/符號結尾
#charset_dictpath = etc/ #Windows環境下設置,/符號結尾,最好給出絕對路徑,例如:C:/usr/local/coreseek/etc/...
charset_type = zh_cn.utf-8
}
#定義增量索引
index test_delta:test
{
source = test_delta #對應的source名稱
path = /usr/local/sphinx/var/data/test_delta #請修改為實際使用的絕對路徑,例如:/usr/local/coreseek/var/...
docinfo = extern
mlock = 0
morphology = none
min_word_len = 2
html_strip = 1

#中文分詞配置,詳情請查看:http://www.coreseek.cn/products-install/coreseek_mmseg/
charset_dictpath = /usr/local/mmseg/etc/ #BSD、Linux環境下設置,/符號結尾
#charset_dictpath = etc/ #Windows環境下設置,/符號結尾,最好給出絕對路徑,例如:C:/usr/local/coreseek/etc/...
charset_type = zh_cn.utf-8
}

#全局index定義
indexer
{
mem_limit = 128M
}

#searchd服務定義
searchd
{
listen = 9312
read_timeout = 5
max_children = 30
max_matches = 1000
seamless_rotate = 0
preopen_indexes = 0
unlink_old = 1
pid_file = /usr/local/sphinx/var/log/searchd_mysql.pid #請修改為實際使用的絕對路徑,例如:/usr/local/coreseek/var/...
log = /usr/local/sphinx/var/log/searchd_mysql.log #請修改為實際使用的絕對路徑,例如:/usr/local/coreseek/var/...
query_log = /usr/local/sphinx/var/log/query_mysql.log #請修改為實際使用的絕對路徑,例如:/usr/local/coreseek/var/...
binlog_path = #關閉binlog日志
}

保存配置文件後退出,先停止searchd進程再啟動,然後重新生成索引。

停止進程
/usr/local/sphinx/bin/searchd -c /usr/local/sphinx/etc/csft.conf --stop

啟動進程
/usr/local/sphinx/bin/searchd -c /usr/local/sphinx/etc/csft.conf

重新生成所有索引
/usr/local/sphinx/bin/indexer -c /usr/local/sphinx/etc/csft.conf --all --rotate
增量索引
/usr/local/sphinx/bin/indexer -c /usr/local/sphinx/etc/csft.conf test_delta --rotate
合並索引
/usr/local/sphinx/bin/indexer -c /usr/local/sphinx/etc/csft.conf --merge test test_delta --rotate

如果合並索引時出現下面問題:

FATAL: failed to merge index 'test_delta' into index 'test': source index preload failed: failed to open /usr/local/sphinx/var/data/test_delta.sph: No such file or directory

停止searchd進程,然後重新啟動searchd進程。

增量索引可以放在crontab裡根據需要設置幾分鐘運行一次,然後執行索引合並,至於主索引重建可以選擇在訪問量不大或者半夜運行。

##每5分鐘運行增量索引

*/5 * * * /usr/local/sphinx/bin/indexer -c /usr/local/sphinx/etc/csft.conf test_delta --rotate > /dev/null 2>&1

##每10分鐘執行一次增量索引合並

*/10 * * * /usr/local/sphinx/bin/indexer -c /usr/local/sphinx/etc/csft.conf --merge test test_delta --rotate

##凌晨0點5分重新建立主索引

5 0 * * * /usr/local/sphinx/bin/indexer -c /usr/local/sphinx/etc/csft.conf --all --rotate > /dev/null 2>&1

Copyright © Linux教程網 All Rights Reserved