首頁 > 後端開發 > php教程 > php sphinx 高效率搜尋引擎配置教程

php sphinx 高效率搜尋引擎配置教程

WBOY
發布: 2016-07-25 09:03:00
原創
1288 人瀏覽過
  1. tar -xvzf sphinx-2.0.1-beta.tar.gz
  2. cd sphinx-2.0.1-beta
  3. ./configure - -prefix=/usr/local/sphinx --with-mysql --with-iconv
複製程式碼

備註:64位元增加參數--enable-id64

  1. make && make install
  2. cd /usr/local/sphinx/etc/
  3. cp sphinx.conf.dist sphinx.conf.dist sphinx.
複製程式碼

配置: # # Sphinx設定檔範例 # # 警告!雖然此範例文件提到了所有可用選項, # 它僅包含(非常)簡短的幫助器描述。請參考 # doc/sphinx.html 了解詳細資訊。 #

############################################ ## 資料來源定義 ############################################ 原始碼1 { # 資料來源類型。強制,無預設值 # 已知類型有 mysql、pgsql、mssql、xmlpipe、xmlpipe2、odbc 類型 = mysql

################################################################################# # ### ## SQL 設定(針對「mysql」和「pgsql」類型) ################################################### ##

# SQL 來源類型的一些簡單參數 sql_host = 本機 sql_用戶=根 sql_pass = ****** sql_db = ****** sql_port = 3306 # 可選,預設為 3306

# UNIX 套接字名稱 # 可選,預設為空(重複使用客戶端庫預設值) # Linux 上通常為“/var/lib/mysql/mysql.sock” # 在 FreeBSD 上通常是“/tmp/mysql.sock” # sql_sock = /tmp/mysql.sock

# MySQL 特定客戶端連線標誌 # 可選,預設為0 # 資料傳輸方式 # mysql_connect_flags = 32 # 啟用壓縮

# MySQL 特定的 SSL 憑證設置 # 可選,預設為空 #SLL連結 # mysql_ssl_cert = /etc/ssl/client-cert.pem # mysql_ssl_key = /etc/ssl/client-key.pem # mysql_ssl_ca = /etc/ssl/cacert.pem

# MS SQL 特定的 Windows 驗證模式標誌 # 必須與 charset_type 索引等級設定同步 # 可選,預設為0 # # mssql_winauth = 1 # 使用目前登入的使用者憑證

# MS SQL 特定的 Unicode 索引標誌 # 可選,預設為0(請求SBCS資料) # # mssql_unicode = 1 # 從伺服器請求 Unicode 資料

# ODBC 特定 DSN(資料來源名稱) # odbc來源類型強制,無預設值 # # odbc_dsn = DBQ=C:data;DefaultDir=C:data;Driver={Microsoft 文字驅動程式 (*.txt; *.csv)}; # sql_query = SELECT id, 資料來自documents.csv

# ODBC 和 MS SQL 特定的每列緩衝區大小 # 可選,預設自動偵測 # # sql_column_buffers = 內容=12M, 註解=1M

# 預查詢,在主取得查詢前執行 # 多值,可選,預設為空查詢列表 # 發送SQL語句前發送 sql_query_pre = 設定名稱 utf8 sql_query_pre = 設定會話 query_cache_type=OFF

# 主文檔取得查詢 # 強制、整數文件 ID 欄位必須是第一個選定的列 # 需要查詢的表建構查詢 sql_查詢 = 選擇 ID、目標類型、流派、明星、副標題、運動隊、音樂樂團、音樂專輯 來自 ko_link #如果多個資料來源並且要在一個索引中,必須要保持欄位的順序數量跟資料要一致,否則將出錯

# 加入/有效負載欄位取得查詢 # 連線欄位讓您避免(緩慢)JOIN 和 GROUP_CONCAT # 有效負載欄位可讓您附加自訂的每個關鍵字值(例如用於排名) # # 文法為FIELD-NAME 'from' ( 'query' | 'payload-query' );詢問 # 連接欄位 QUERY 應傳回 2 列(docid、文字) # 負載欄位 QUERY 應傳回 3 列(docid、關鍵字、權重) # # 要求查詢結果按文件 ID 升序排列! # 多值,可選,預設為空查詢列表 #新增欄位,來源與表格自動連接 # 欄位結果集保留為 # (1,標籤1) # (1,標籤2) # (2,標籤3) # (2,標籤4) #新增字段將用於搜索,結果為第3個字段,第3個字段表示該記錄的權重,權重為大於1的值 # sql_joined_field = 查詢中的標籤;SELECT docid, CONCAT('tag',tagid) FROM 標籤 ORDER BY docid ASC # sql_joined_field = 來自負載查詢的 wtags;從標籤中選擇 docid、標籤、標籤權重 ORDER BY docid ASC

# 基於文件的欄位聲明 # # 該欄位的內容被視為檔案名 # 文件被載入並索引以代替字段 # # 最大檔案大小受 max_file_field_buffer 索引器設定的限制 # 文件 IO 錯誤不是致命的,會被報告為警告 #放置欄位聲明文件 # sql_file_field = content_file_path

# 範圍查詢設置,查詢必須傳回最小和最大ID值 # 可選,預設為空 # # sql_query 將需要引用 $start 和 $end 邊界 # 如果使用範圍查詢: # 分區查詢,防止MYSQL死鎖 # sql_query = # 選擇 doc.id、doc.id AS 群組、doc.title、doc.data # 來自文檔 doc # WHERE id>=$start 且 id

# 範圍查詢步驟 # 可選,預設為1024 # 分區查詢跳步 # sql_range_step = 1000

# 無符號整數屬性聲明 # 多值(允許任意數量的屬性),可選 # 可以指定可選的位元大小,預設為32 #宣告無符號數字段 #sql_attr_uint = 目標類型 # sql_attr_uint = forum_id:9 # forum_id 的 9 位 #sql_attr_uint = 組 ID #聲明BOOL字段 # 布林屬性聲明 # 多值(允許任意數量的屬性),可選 # 相當於 1 位元大小的 sql_attr_uint # # sql_attr_bool = is_deleted

# bigint 屬性聲明 # 多值(允許任意數量的屬性),可選 # 宣告一個有符號(與 uint 不同!)64 位元屬性 # 聲明長整字段 # sql_attr_bigint = my_bigint_id

# UNIX 時間戳屬性聲明 # 多值(允許任意數量的屬性),可選 # 類似整數,但也可以用在日期函數中 #聲明時間字段 # sql_attr_timestamp = posts_ts # sql_attr_timestamp = Last_edited_ts #sql_attr_timestamp = 日期添加 # 字串序數屬性聲明 # 多值(允許任意數量的屬性),可選 # 對字串進行排序(按位元組),並將它們的索引儲存在排序清單中 # 按此attr排序相當於按原始字串排序 # 聲明字串欄位用於排序等,但此欄位不會被存儲 # sql_attr_str2ordinal = 作者姓名

# 浮點屬性聲明 # 多值(允許任意數量的屬性),可選 # 值以單精度、32 位元 IEEE 754 格式存儲 #聲明浮點字段 # sql_attr_float = 緯度弧度 # sql_attr_float = long_radians

# 多值屬性 (MVA) 屬性聲明 # 多值(允許任意數量的屬性),可選 # MVA 值是無符號 32 位元整數的可變長度列表 # # 語法為 ATTR-TYPE ATTR-NAME 'from' SOURCE-TYPE [;QUERY] [;RANGE-QUERY] # ATTR-TYPE 是 'uint' 或 'timestamp' # SOURCE-TYPE 是“字段”、“查詢”或“範圍查詢” # QUERY 是用來取得所有 ( docid, attrvalue ) 對的 SQL 查詢 # RANGE-QUERY 是用來取得最小和最大 ID 值的 SQL 查詢,類似於 'sql_query_range' # 聲明複合字段 # sql_attr_multi = 查詢中的 uint 標記;從標籤中選擇 docid、tagid # sql_attr_multi = 來自範圍查詢的 uint 標記; # 從標籤中選擇docid、tagid WHERE id>=$start AND id

# 字串屬性聲明 # 多值(允許任意數量),可選 # 允許您儲存和檢索字串 # 只是儲存數據,但不會改變索引字段 # sql_attr_string = stitle

# wordcount 屬性聲明 # 多值(允許任意數量),可選 # 讓你在索引時計算單字數 # 將轉換成關鍵字的字段,用於提高匹配率 # sql_attr_str2wordcount = stitle

# 組合欄位加屬性宣告(來自單一欄位) # 將列儲存為屬性,但也將其索引為全文字段 # 跟sql_attr_string不同的是該屬性加入索引 # sql_field_string = 作者 # sql_field_str2wordcount = 標題 # 後查詢,在 sql_query 完成時執行 # 可選,預設為空 # 取後查詢 # sql_query_post = # 索引後查詢,在索引成功完成時執行 # 可選,預設為空 # $maxid 擴展為實際從資料庫取得的最大文件 ID # 索引後查詢 # sql_query_post_index = 替換為計數器 ( id, val ) # VALUES ( 'max_indexed_id', $maxid )

# 範圍查詢限制,以毫秒為單位 # 可選,預設為0,表示不延遲 # 在每個查詢步驟之前強制執行給定的延遲 #分區查詢的時間間隔 sql_ranged_throttle = 0

# 文件資訊查詢,僅用於 CLI 搜尋(即測試和偵錯) # 可選,預設為空 # 必須包含 $id 巨集並且必須透過該 id 取得文檔 #命令列調試查詢結果用 sql_query_info = SELECT * FROM ko_link WHERE id=$id

#kill-list 查詢,取得kill-list 的文檔ID # k-list 將抑制同一查詢中先前索引的匹配 # 可選,預設為空 ##清理指定查詢ID列表,用於資料的更改 # sql_query_killlist = 從已編輯的文件中選擇 id>=@last_reindex

# columns to unpack on indexer side when indexing # multi-value, optional, default is empty list # 啟用ZIP壓縮 可以降低系統負載 但必須保證zlib庫zlib-dev庫可用 # unpack_zlib = zlib_column # unpack_mysqlcompress = compressed_column # unpack_mysqlcompress = compressed_column_2

# maximum unpacked length allowed in MySQL COMPRESS() unpacker # optional, default is 16M # 壓縮快取區大小 不能小於欄位儲存值 # unpack_mysqlcompress_maxsize = 16M

########################## ## xmlpipe2 配置 ########################## # type = xmlpipe

# shell command to invoke xmlpipe stream producer # mandatory # # xmlpipe_command = cat /usr/local/sphinx/var/test.xml

# xmlpipe2 field declaration # multi-value, optional, default is empty # # xmlpipe_field = subject # xmlpipe_field = content

# xmlpipe2 attribute declaration # multi-value, optional, default is empty # all xmlpipe_attr_XXX options are fully similar to sql_attr_XXX # # xmlpipe_attr_timestamp = published # xmlpipe_attr_uint = author_id

# perform UTF-8 validation, and filter out incorrect codes # avoids XML parser choking on non-UTF-8 documents # optional, default is 0 # # xmlpipe_fixup_utf8 = 1 }

# inherited source example # 繼承資料來源 # all the parameters are copied from the parent source, # and may then be overridden in this source definition #source src1throttled : src1 #{ # sql_ranged_throttle = 100 #}

##################### ## index definition ##################### # local index example # # this is an index which is stored locally in the filesystem # # all indexing-time options (such as morphology and charsets) # are configured per local index index test1 { # index type # optional, default is 'plain' # known values are 'plain', 'distributed', and 'rt' (see samples below) #索引類型 本機 分散式 # type = plain

# document source(s) to index # multi-value, mandatory # document IDs must be globally unique across all sources #資料來源,可以多個資料來源 source = src1

# index files path and file name, without extension # mandatory, path must be writable, extensions will be auto-appended # 索引儲存路徑 path = /usr/local/sphinx/var/data/test1

# document attribute values (docinfo) storage mode # optional, default is 'extern' # known values are 'none', 'extern' and 'inline' #索引儲存方式 docinfo = extern

# memory locking for cached data (.spa and .spi), to prevent swapping # optional, default is 0 (do not mlock) # requires searchd to be run from root #記憶體鎖定 需要保證足夠權限 mlock = 0

# a list of morphology preprocessors to apply # optional, default is empty # # builtin preprocessors are 'none', 'stem_en', 'stem_ru', 'stem_enru', # 'soundex', and 'metaphone'; additional preprocessors available from # libstemmer are 'libstemmer_XXX', 其中 XXX is algorithm code # (see libstemmer_c/libstemmer/modules.txt) # 字詞擷取器 # morphology = stem_en, stem_ru, soundex # morphology = libstemmer_german # morphology = libstemmer_sv morphology = stem_en

# minimum word length at which to enable stemming # optional, default is 1 (stem everything) # 字幹化的最小字長 # min_stemming_len = 1

# stopword files list (space separated) # optional, default is empty # contents are plain text, charset_table and stemming are both applied # 停用搜尋字詞 # stopwords = /usr/local/sphinx/var/data/stopwords.txt

# wordforms file, in "mapfrom > mapto" plain text format # optional, default is empty # 詞型字典可用spelldump工俱生成 # wordforms = /usr/local/sphinx/var/data/wordforms.txt

# tokenizing exceptions file # optional, default is empty #Token特例文件,就是有些詞是完整詞意,不能拆分索引如a&t 跟a & t # plain text, case sensitive, space insensitive in map-from part # one "Map Several Words => ToASingleOne" entry per line # # exceptions = /usr/local/sphinx/var/data/exceptions.txt

# minimum indexed word length # default is 1 (index everything) # 最小索引長度,就是小於指定長度的字不被索引 min_word_len = 1

# charset encoding type # optional, default is 'sbcs' # known types are 'sbcs' (Single Byte CharSet) and 'utf-8' # 字元編碼 charset_type = utf-8

# charset definition and case folding rules "table" # optional, default value depends on charset_type # # defaults are configured to include English and Russian characters only # you need to change the table to include additional ones # this behavior MAY change in future versions # # 'sbcs' default value is # charset_table = 0..9, A..Z->a..z, _, a..z, U+A8->U+B8, U+B8, U+C0..U+DF->U +E0..U+FF, U+E0..U+FF # 轉換字元表 # 'utf-8' default value is # charset_table = 0..9, A..Z->a..z, _, a..z, U+410..U+42F->U+430..U+44F, U+430.. U+44F

# ignored characters list # optional, default value is empty # 忽略字元表 # ignore_chars = U+00AD

# minimum word prefix length to index # optional, default is 0 (do not index prefixes) #索引的最小前綴長度,小心使用,索引和搜尋的時間都會惡化 # min_prefix_len = 0

# minimum word infix length to index # optional, default is 0 (do not index infixes) #索引的最小中綴長度 小心使用,索引和搜尋的時間都會惡化 # min_infix_len = 0

# list of fields to limit prefix/infix indexing to # optional, default value is empty (index all fields in prefix/infix mode) # 未知 # prefix_fields = filename # infix_fields = url, domain

# enable star-syntax (wildcards) when searching prefix/infix indexes # search-time only, does not affect indexing, can be 0 or 1 # optional, default is 0 (do not use wildcard syntax) # 啟用星號語法 # enable_star = 1

# expand keywords with exact forms and/or stars when searching fit indexes # search-time only, does not affect indexing, can be 0 or 1 # optional, default is 0 (do not expand keywords) # 擴大搜尋關鍵字 形式如: running -> ( running | *running* | =running ) # expand_keywords = 1

# n-gram length to index, for CJK indexing # only supports 0 and 1 for now, other lengths to be implemented # optional, default is 0 (disable n-grams) # 其他語言如中文的基本支持 # ngram_len = 1

# n-gram characters list, for CJK indexing # optional, default is empty #中文或其他語言的值範圍 # ngram_chars = U+3000..U+2FA1F

# phrase boundary characters list # optional, default is empty # 邊界符 # phrase_boundary = ., ?, !, U+2026 # horizo​​ntal ellipsis

# phrase boundary word position increment # optional, default is 0 # 邊界符增量 # phrase_boundary_step = 100

# blended characters list # blended chars are indexed both as separators and valid characters # for instance, AT&T will results in 3 tokens ("at", "t", and "at&t") # optional, default is empty # 混合字元列表 # blend_chars = +, &, U+23

# blended token indexing mode # a comma separated list of blended token indexing variants # known variants are trim_none, trim_head, trim_tail, trim_both, skip_pure # optional, default is trim_none #未知 # blend_mode = trim_tail, skip_pure

# whether to strip HTML tags from incoming documents # known values are 0 (do not strip) 和 1 (do strip) # optional, default is 0 # 刪除html標籤 (小心文字被刪除) html_strip = 0

# what HTML attributes to index if stripping HTML # optional, default is empty (do not index anything) # 保留的HTML標籤 # html_index_attrs = img=alt,title; a=title;

# what HTML elements contents to strip # optional, default is empty (do not strip element contents) # 不但刪除標籤,其包含的文字也將刪除 # html_remove_elements = style, script

# whether to preopen index data files on startup # optional, default is 0 (do not preopen), searchd-only # 預先開啟索引還是每次查詢的時候在開啟索引 # preopen = 1

# whether to keep dictionary (.spi) on disk, 或 cache it in RAM # optional, default is 0 (cache in RAM), searchd-only # 將字典檔案是否儲存在記憶體中 # ondisk_dict = 1

# whether to enable in-place inversion (2x less disk, 90-95% speed) # optional, default is 0 (use separate temporary files), indexer-only # 是否啟用原地索引倒轉 將少磁碟使用 效能會有一點損失 # inplace_enable = 1

# in-place fine-tuning options # optional, defaults are listed below #微調原地倒轉 # inplace_hit_gap = 0 # preallocated hitlist gap size # inplace_docinfo_gap = 0 # preallocated docinfo gap size # inplace_reloc_factor = 0.1 # relocation buffer size within arena # inplace_write_factor = 0.1 # write buffer size within arena

# whether to index original keywords along with stemmed versions # enables "=exactform" operator to work # optional, default is 0 # 是否在索引原關鍵字的字幹化/重映射後的形式的同時也索引原詞 # index_exact_words = 1

# position increment on overshort (less that min_word_len) words # optional, allowed values are 0 and 1, default is 1 #在經過過短的字(比 min_word_len短的字)處後增加位置值 # overshort_step = 1

# position increment on stopword # optional, allowed values are 0 and 1, default is 1 #在經過 停用詞 處後增加位置值可選選項 # stopword_step = 1

# hitless words list # positions for these keywords will not be stored in the index # optional, allowed values are 'all', or a list file name # 不能中斷的字元列表 # hitless_words = all # hitless_words = hitless.txt #字元檔

# detect and index sentence and paragraph boundaries # required for the SENTENCE and PARAGRAPH operators to work # optional, allowed values are 0 and 1, default is 0 # 是否檢查標籤合併 針對HTML # index_sp = 1

# index zones, delimited by HTML/XML tags # a comma separated list of tags and wildcards # required for the ZONE operator to work # optional, default is empty string (do not index zones) # 對HTML標籤的權重 # index_zones = title, h*, th }

# inherited index example # 索引繼承 # all the parameters are copied from the parent index, # and may then be overridden in this index definition #index test1stemmed : test1 #{ # path = /usr/local/sphinx/var/data/test1stemmed # morphology = stem_en #}

# distributed index example # # this is a virtual index which can NOT be directly indexed, # and only contains references to other local and/or remote indexes #index dist1 #{ #分散式索引配置 # 'distributed' index type MUST be specified # type = distributed

# local index to be searched # there can be many local indexes configured # local = test1 # local = test1stemmed

# remote agent # multiple remote agents may be specified # syntax for TCP connections is 'hostname:port:index1,[index2[,...]]' # syntax for local UNIX connections is '/path/to/socket:index1,[index2[,...]]' # agent = localhost:9313:remote1 # agent = localhost:9314:remote2,remote3 # agent = /var/run/searchd.sock:remote4

# blackhole remote agent, for debugging/testing # network errors and search results will be ignored # # agent_blackhole = testbox:9312:testindex1,testindex2

# remote agent connection timeout, milliseconds # optional, default is 1000 ms, ie. 1 sec # agent_connect_timeout = 1000

# remote agent query timeout, milliseconds # optional, default is 3000 ms, ie. 3 sec # agent_query_timeout = 3000 #}

# realtime index example # # you can run INSERT, REPLACE, and DELETE on this index on the fly # using MySQL protocol (see 'listen' directive below) #index rt #{ # 'rt' index type must be specified to use RT index # type = rt

# index files path and file name, without extension # mandatory, path must be writable, extensions will be auto-appended

# path = /usr/local/sphinx/var/data/rt

# RAM chunk size limit # RT index will keep at most this much data in RAM, then flush to disk # optional, default is 32M # # rt_mem_limit = 512M

# full-text field declaration # multi-value, mandatory # rt_field = title # rt_field = content

# unsigned integer attribute declaration # multi-value (an arbitrary number of attributes is allowed), optional # declares an unsigned 32-bit attribute # rt_attr_uint = gid

# RT indexes currently support the following attribute types: # uint, bigint, float, timestamp, string # # rt_attr_bigint = guid # rt_attr_float = gpa # rt_attr_timestamp = ts_added # rt_attr_string = author #}

###################### ## indexer settings ######################

indexer { #索引過程記憶體使用限制。可選選項,預設32M。 # memory limit, in bytes, kiloytes (16384K) or megabytes (256M) # optional, default is 32M, max is 2047M, recommended is 256M to 1024M mem_limit = 32M

# maximum IO calls per second (for I/O throttling) # optional, default is 0 (unlimited) # 每秒最大I/O操作次數,用於限制I/O操作。可選選項,預設為0(無限制)。 # max_iops = 40

# maximum IO call size, bytes (for I/O throttling) # optional, default is 0 (unlimited) # 最大允許的I/O操作大小,以位元組為單位,用於I/O節流。可選選項,預設為0(不限制)。 # max_iosize = 1048576

# maximum xmlpipe2 field length, bytes # optional, default is 2M # 對於XMLLpipe2資料來源允許的最大的域大小 # max_xmlpipe2_field = 4M

# write buffer size, bytes # several (currently up to 4) buffers will be allocated # write buffers are allocated in addition to mem_limit # optional, default is 1M # 寫緩衝區的大小,單位是位元組。可選選項,預設值是1MB。 # write_buffer = 1M

# maximum file field adaptive buffer size # optional, default is 8M, minimum is 1M # # max_file_field_buffer = 32M }

####################### ## searchd settings #######################

searchd { # [hostname:]port[:protocol], or /unix/socket/path to listen on # known protocols are 'sphinx' (SphinxAPI) and 'mysql41' (SphinxQL) # # multi-value, multiple listen points are allowed # optional, defaults are 9312:sphinx and 9306:mysql41, as below # # listen = 127.0.0.1 # listen = 192.168.0.1:9312 # listen = 9312 # listen = /var/run/searchd.sock listen = 9312 #listen = 9306:mysql41

# log file, searchd run info is logged here # optional, default is 'searchd.log' # 全部searchd運行時事件會被記錄在這個日誌檔案中。 log = /usr/local/sphinx/var/log/searchd.log

# query log file, all search queries are logged here # optional, default is empty (do not log queries) # 全部搜尋查詢會被記錄在此文件中。 query_log = /usr/local/sphinx/var/log/query.log

# client read timeout, seconds # optional, default is 5 #網路客戶端請求的讀取逾時時間,單位是秒。 read_timeout = 5

# request timeout, seconds # optional, default is 5 minutes #在使用持久連接時,兩次查詢之間等待的最長時間(單位是秒)。 client_timeout = 300

# maximum amount of children to fork (concurrent searches to run) # optional, default is 0 (unlimited) #子進程的最大數量 ,用來控制伺服器負載。任何時候不可能有比此設定值更多的搜尋同時運行。當達到限制時,新的輸入客戶端會被臨時失敗(SEARCH_RETRY)狀態碼駁回,同時給予一個聲明伺服器已到最大連線限制的訊息。 max_children = 30

# PID file, searchd process ID file name # mandatory #進程ID文件 pid_file = /usr/local/sphinx/var/log/searchd.pid

# max amount of matches the daemon ever keeps in RAM, per-index # WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL # default is 1000 (just like Google) #守護程式在記憶體中為每個索引所保持並傳回給客戶端的匹配數目的最大值。 max_matches = 1000

# seamless rotate, prevents rotate stalls if precaching huge datasets # optional, default is 1 #防止 searchd 輪換在需要預取大量資料的索引時停止回應。可選選項,預設為1(啟用無縫(seamless)輪換)。 seamless_rotate = 1

# whether to forcibly preopen all indexes on startup # optional, default is 1 (preopen everything) #是否在啟動是強制重新開啟所有索引檔案。可選選項,預設為0(不重新開啟)。 preopen_indexes = 1

# whether to unlink .old index copies on succesful rotation. # optional, default is 1 (do unlink) #索引輪替成功之後,是否刪除以.old為副檔名的索引拷貝。可選選項,預設為1(刪除這些索引拷貝)。 unlink_old = 1

# attribute updates periodic flush timeout, seconds # updates will be automatically dumped to disk this frequently # optional, default is 0 (disable periodic flush) # UpdateAttributes() 呼叫時候更新是否隔一段時間寫入磁碟 # attr_flush_period = 900

# instance-wide ondisk_dict defaults (per-index value take precedence) # optional, default is 0 (precache all dictionaries in RAM) #對 ondisk_dict 指令的全域的預設值。 可選選項,預設值是0(將字典預先緩衝到記憶體)。 # ondisk_dict_default = 1

# MVA updates pool size # shared between all instances of searchd, disables attr flushes! # optional, default size is 1M #網路通訊時允許的最大的套件的大小。 mva_updates_pool = 1M

# max allowed network packet size # limits both query packets from clients, and responses from agents # optional, default size is 8M #用於多值屬性MVA更新的儲存空間的共用池大小。 max_packet_size = 8M

# crash log path # searchd will (try to) log crashed query to 'crash_log_path.PID' file # optional, default is empty (do not create crash logs) #崩潰日誌檔的路徑 # crash_log_path = /usr/local/sphinx/var/log/crash

# max allowed per-query filter count # optional, default is 256 #每次查詢允許設定的過濾器的最大個數。只用於內部檢查,不直接影響記憶體使用或效能。 max_filters = 256

# max allowed per-filter values count # optional, default is 4096 #單一過濾器允許的值的最大個數。只用於內部檢查,不直接影響記憶體使用或效能。 max_filter_values = 4096

# socket listen queue length # optional, default is 5 #TCP監聽積壓列表長度。無法如對的請求立即失敗並收到「連線被拒」錯誤訊息 # listen_backlog = 5

# per-keyword read buffer size # optional, default is 256K #每個關鍵字的讀取緩衝區的大小。可選選項,預設值是256K。 # read_buffer = 256K

# unhinted read size (currently used when reading hits) # optional, default is 32K #無提示時讀取操作的大小。可選選項,預設值是32K。 # read_unhinted = 32K

# max allowed per-batch query count (aka multi-query count) # optional, default is 32 #限制每批次的查詢量。一個OPEN之後的查詢量 max_batch_queries = 32

# max common subtree document cache size, per-query # optional, default is 0 (disable subtree optimization) # # subtree_docs_cache = 4M

# max common subtree hit cache size, per-query # optional, default is 0 (disable subtree optimization) # 限制RAM使用一個共同的子樹優化 預設不優化 # subtree_hits_cache = 8M

# multi-processing mode (MPM) # known values are none, fork, prefork, and threads # optional, default is fork # 工作方式 workers = threads # for RT to work

# max threads to create for searching local parts of a distributed index # optional, default is 0, which means disable multi-threaded searching # should work with all MPMs (ie. does NOT require workers=threads) # # dist_threads = 4

# binlog files path; use empty string to disable binlog # optional, default is build-time configured data directory # 二進位日誌路徑 # binlog_path = # disable logging # binlog_path = /usr/local/sphinx/var/data # binlog.001 etc will be created there

# binlog flush/sync mode # 0 means flush and sync every second # 1 means flush and sync every transaction # 2 means flush every transaction, sync every second # optional, default is 2 # 日誌刷新模式 # binlog_flush = 2

# binlog per-file size limit # optional, default is 128M, 0 means no limit #最大日誌大小 # binlog_max_log_size = 256M

# per-thread stack size, only affects workers=threads mode # optional, default is 64K #每個執行緒的堆疊大小。 # thread_stack = 128K

# per-keyword expansion limit (for dict=keywords prefix searches) # optional, default is 0 (no limit) # 擴大為一個關鍵字的最大數目 # expansion_limit = 1000

# RT RAM chunks flush period # optional, default is 0 (no periodic flush) #RT索引在記憶體中檢查的時間 # rt_flush_period = 900

# query log file format # optional, known values are plain and sphinxql, default is plain # 查詢日誌格式 # query_log_format = sphinxql

# version string returned to MySQL network protocol clients # optional, default is empty (use Sphinx version) # MYSQL版本 # mysql_version_string = 5.0.37

# trusted plugin directory # optional, default is empty (disable UDFs) # 插件目錄 # plugin_dir = /usr/local/sphinx/lib

# default server-wide collat​​ion # optional, default is libc_ci # 連結字元集 # collat​​ion_server = utf8_general_ci

# server-wide locale for libc based collat​​ions # optional, default is C # collat​​ion 選項 # collat​​ion_libc_locale = ru_RU.UTF-8

# threaded server watchdog (only used in workers=threads mode) # optional, values are 0 and 1, default is 1 (watchdog on) # 是否啟用伺服器監控進程 # watchdog = 1

# SphinxQL compatibility mode (legacy columns and their names) # optional, default is 0 (SQL compliant syntax and result sets) #sphinxql 相容模式 # compat_sphinxql_magics = 1 }

# --eof--

建立索引: /usr/local/sphinx/bin/indexer --config /usr/local/sphinx/etc/sphinx.conf index1

不停服務下索引: /usr/local/sphinx/bin/indexer --config /usr/local/sphinx/etc/sphinx.conf --all --rotate

啟動索引服務,使PHP的客戶端可用 /usr/local/sphinx/bin/searchd --config /usr/local/sphinx/etc/sphinx.conf

建立PHP測試檔

  1. $s = new SphinxClient;
  2. setServer("localhost", 9312);
  3. $
  4. $Mode (SPH_MATCH_ANY);
  5. $s->setMaxQueryTime(3);
  6. $result = $s->query("test");#查詢
  7. print_r ($result);
?>
複製程式碼

運行即可

searchd 指令: 強行停止: searchd --config /home/myuser/sphinx.conf –stop

安靜停止: searchd --config /home/myuser/sphinx.conf –stopwait

狀態: searchd --config /home/myuser/sphinx.conf –status

指定PID文件 searchd --config /home/myuser/sphinx.conf --pidfile /home/myuser/sphinx.pid

啟動的為控制台模式: searchd --config /home/myuser/sphinx.conf –console

只啟動指定索引 searchd --index myindex

產生一些字典的工具軟體: spelldump

indextool 一些轉移等工具的軟體

搜尋字串規則: * operator OR: hello | world

* operator NOT: hello -world hello !world

* field search operator: @title hello @body world

* field position limit modifier (introduced in version 0.9.9-rc1): @body[50] hello

* multiple-field search operator: @(title,body) hello world

* all-field search operator: @* hello

* phrase search operator: "hello world"

* proximity search operator: "hello world"~10

* quorum matching operator: "the world is a wonderful place"/3

* strict order operator (aka operator "before"): aaa

* exact form modifier (introduced in version 0.9.9-rc1): raining =cats and =dogs

* field-start and field-end modifier (introduced in version 0.9.9-rc2): ^hello world$

* NEAR, generalized proximity operator (introduced in version 2.0.1-beta): hello NEAR/3 world NEAR/4 "my test"

* SENTENCE operator (introduced in version 2.0.1-beta): all SENTENCE words SENTENCE "in one sentence"

* PARAGRAPH operator (introduced in version 2.0.1-beta): "Bill Gates" PARAGRAPH "Steve Jobs"

* zone limit operator: ZONE:(h3,h4) only in these titles

表達式,支援函數等 日期用的是時間戳 (好像不是作為MYSQL儲存引擎的時候用不到) * Arithmetic operators: +, -, *, /, %, DIV, MOD * Comparison operators: =, =, * Boolean operators: AND, OR, NOT * Bitwise operators: &, | * ABS() * BIGINT() * CEIL() * COS() * CRC32() * DAY() * EXP() * FLOOR() * GEODIST() * IDIV() * IF() * IN() * INTERVAL() * LN() * LOG10() * LOG2() * MAX() * MIN() * MONTH() * NOW() * POW() * SIN() * SINT() * SQRT() * YEAR() * YEARMONTH() * YEARMONTHDAY()

客戶端方法介紹:

  1. include_once 'sphinxapi.php';

  2. $s = new SphinxClient();
  3. $s; ->setServer("localhost", 9312);
  4. $s->SetConnectTimeout ( 1 );//設定連結逾時
  5. /*

  6. $s->AddQuery() ;//清單查詢
  7. $s->RunQueries ();//執行清單查詢
  8. $s->ResetFilters();//清除篩選條件
  9. $s->BuildExcerpts($docs, $ index, $words);//產生簡單
  10. $s->BuildKeywords($query, $index, $hits);//產生關鍵字
  11. $s->GetLastError();//錯誤
  12. $s->GetLastWarning();//警告
  13. $s->FlushAttributes();//索引刷入硬碟
  14. $s->IsConnectError();//連結錯誤
  15. $s- >ResetGroupBy();//重設分組
  16. $s->SetFieldWeights(array('sub_title'=>1));//加權最小為1

  17. $s-> SetIDRange($min, $max);//ID範圍
  18. $s->SetIndexWeights(array('test1'=>1));//索引權重
  19. $s->Status();//服務是否可用
  20. $s->UpdateAttributes($index, $attrs, $values);//更新硬碟索引
  21. */
  22. /*
  23. SPH_MATCH_ALL, matches all query words (default mode) ;
  24. SPH_MATCH_ANY, matches any of the query words;
  25. SPH_MATCH_PHRASE, matches query as a phrase, requiring perfect match;
  26. SPH_MATCH_BOOLEse, requiring perfect match;
  27. SPH_MATCH ”) ;
  28. SPH_MATCH_EXTENDED, matches query as an expression in Sphinx internal query language (see Section 5.3, “Extended query syntax”). As of 0.9.9, this has function supered 筆ance. The ident is retained for legacy application code that will continue to be compatible once Sphinx and its components, including the API, are upgraded.
  29. SPH_MATCH_EXTENDED2, V​​形
  30. */
  31. $s->setMatchMode(SPH_MATCH_ANY);//匹配模式
  32. $s->setMaxQueryTime(3);//查詢逾時
  33. //$s->SetSelect ( $select ); //設定傳回的欄位
  34. /*
  35. $cl->SetSelect ( "*, @weight+(user_karma+ln(pageviews))*0.1 AS myweight" );
  36. $cl->SetSelect ( " exp_years, salary_gbp*{$gbp_usd_rate} AS salary_usd,
  37. IF(age>40,1,0) AS over40" );
  38. $cl->SetSelect ( "*, AVG(price) AS avrice" );
  39. */
  40. /*

  41. $cl->SetGroupBy ( "category", SPH_GROUPBY_ATTR, "@count desc" );
  42. $cl->SetGroupDict ( "vendor " );
  43. ==
  44. SELECT id, weight, all-attributes,
  45. COUNT(DISTINCT vendor) AS @distinct,
  46. COUNT(*) AS @count
  47. FROM products
  48. COUNT(*) AS @count
  49. FROM products
  50. COUNT(*) AS @count
  51. FROM products
  52. COUNT(*) AS @count
  53. FROM products
  54. COUNT(*) AS @count
  55. FROM products
  56. COUNT(*) AS @count
  57. FROM 程式> GROUP BY category
  58. ORDER BY @count DESC
  59. */
  60. //$s->SetGroupBy ( $groupby, SPH_GROUPBY_ATTR, $groupsort );//匯總
  61. ///$-> (Groct $distinct );//設定不重複欄位
  62. $s->SetArrayResult ( true );//結果是否有ID

  63. /*

  64. 。 ts by an attribute in ascending order (smaller attribute values first);
  65. SPH_SORT_TIME_SEGMENTS mode, that sorts by time segments (last hour/day/week/month) in descending order, and then by relevorder in descing month) in descending。 SQL-like combination of columns in ASC/DESC order;
  66. SPH_SORT_EXPR mode, that sorts by an arithmetic expression.
  67. */
  68. //$s->SetSortMode (SPH_SOs_ortMode( SPH_SO2by);模式
  69. /*

  70. $s->SetOverride($attrname, $attrtype, $values);
  71. $s->ResetOverrides();*/
  72. /*
  73. $s->SetRetries($count);//設定失敗重試
  74. $s->SetRankingMode($ranker);//設定排名模式皆適用於SPH_MATCH_EXTENDED2搜尋
  75. //第3個參數當為true時,相當於$attribute!=$value,預設值是false
  76. $s->SetFilter ( 'target_type', $filtervals );//設定過濾,值列表
  77. $s->SetFilterFloatRange($attribute, $min, $max);//浮動範圍
  78. $s->SetFilterRange($attribute, $min, $max);//指定範圍
  79. $ s->SetGeoAnchor($attrlat, $attrlong, $lat, $long);
  80. */
//link
//$s->SetFilter ( 'target_type', array(1),true );

$s->SetLimits ( 0, 10 );//顯示數量:開始量最大量右偏移量

$result = $s->query("good ","team");//查詢

print_r($result);

複製程式碼


來源:php.cn
本網站聲明
本文內容由網友自願投稿,版權歸原作者所有。本站不承擔相應的法律責任。如發現涉嫌抄襲或侵權的內容,請聯絡admin@php.cn
熱門教學
更多>
最新下載
更多>
網站特效
網站源碼
網站素材
前端模板