Skip to content

Commit c553f74

Browse files
committed
[ticket/16234] Fix & enhance Sphinx search backend functionality
PHPBB3-16234 PHPBB3-16233 PHPBB3-15367 PHPBB3-13958
1 parent aae8637 commit c553f74

4 files changed

Lines changed: 1924 additions & 13 deletions

File tree

docs/sphinx.sample.conf

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,10 +70,16 @@ index index_phpbb_{SPHINX_ID}_main
7070
docinfo = extern
7171
morphology = none
7272
stopwords =
73+
wordforms = # optional, specify path to wordforms file. See ./docs/sphinx_wordforms.txt for example
74+
exceptions = # optional, specify path to exceptions file. See ./docs/sphinx_exceptions.txt for example
7375
min_word_len = 2
7476
charset_table = U+FF10..U+FF19->0..9, 0..9, U+FF41..U+FF5A->a..z, U+FF21..U+FF3A->a..z, A..Z->a..z, a..z, U+0149, U+017F, U+0138, U+00DF, U+00FF, U+00C0..U+00D6->U+00E0..U+00F6, U+00E0..U+00F6, U+00D8..U+00DE->U+00F8..U+00FE, U+00F8..U+00FE, U+0100->U+0101, U+0101, U+0102->U+0103, U+0103, U+0104->U+0105, U+0105, U+0106->U+0107, U+0107, U+0108->U+0109, U+0109, U+010A->U+010B, U+010B, U+010C->U+010D, U+010D, U+010E->U+010F, U+010F, U+0110->U+0111, U+0111, U+0112->U+0113, U+0113, U+0114->U+0115, U+0115, U+0116->U+0117, U+0117, U+0118->U+0119, U+0119, U+011A->U+011B, U+011B, U+011C->U+011D, U+011D, U+011E->U+011F, U+011F, U+0130->U+0131, U+0131, U+0132->U+0133, U+0133, U+0134->U+0135, U+0135, U+0136->U+0137, U+0137, U+0139->U+013A, U+013A, U+013B->U+013C, U+013C, U+013D->U+013E, U+013E, U+013F->U+0140, U+0140, U+0141->U+0142, U+0142, U+0143->U+0144, U+0144, U+0145->U+0146, U+0146, U+0147->U+0148, U+0148, U+014A->U+014B, U+014B, U+014C->U+014D, U+014D, U+014E->U+014F, U+014F, U+0150->U+0151, U+0151, U+0152->U+0153, U+0153, U+0154->U+0155, U+0155, U+0156->U+0157, U+0157, U+0158->U+0159, U+0159, U+015A->U+015B, U+015B, U+015C->U+015D, U+015D, U+015E->U+015F, U+015F, U+0160->U+0161, U+0161, U+0162->U+0163, U+0163, U+0164->U+0165, U+0165, U+0166->U+0167, U+0167, U+0168->U+0169, U+0169, U+016A->U+016B, U+016B, U+016C->U+016D, U+016D, U+016E->U+016F, U+016F, U+0170->U+0171, U+0171, U+0172->U+0173, U+0173, U+0174->U+0175, U+0175, U+0176->U+0177, U+0177, U+0178->U+00FF, U+00FF, U+0179->U+017A, U+017A, U+017B->U+017C, U+017C, U+017D->U+017E, U+017E, U+0410..U+042F->U+0430..U+044F, U+0430..U+044F, U+4E00..U+9FFF
75-
min_prefix_len = 0
76-
min_infix_len = 0
77+
ignore_chars = U+0027, U+002C
78+
min_prefix_len = 3 # Minimum number of characters for wildcard searches by prefix (min 1). Default is 3. If specified, set min_infix_len to 0
79+
min_infix_len = 0 # Minimum number of characters for wildcard searches by infix (min 2). If specified, set min_prefix_len to 0
80+
html_strip = 1
81+
index_exact_words = 0 # Set to 1 to enable exact search operator. Requires wordforms or morphology
82+
blend_chars = U+23, U+24, U+25, U+26, U+40
7783
}
7884
index index_phpbb_{SPHINX_ID}_delta : index_phpbb_{SPHINX_ID}_main
7985
{

docs/sphinx_exceptions.txt

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
# Sample tokenised exception file for phpBB using Sphinx search engine
2+
# Copyright (c) 2020 -- KYPREO
3+
#
4+
# Exceptions allow one or more tokens to be mapped to a single keyword.
5+
# Exceptions are defined BEFORE tokenisation and are therefore case sensitive and accept characters that would normally be excluded such as punctuation.
6+
# Exceptions are applied to both indexing and searching, such that search queries for one mapped variation will find all others.
7+
# List needs to be customised according to board and language requirements.
8+
# Please remove all commented lines before use.
9+
#
10+
# See Sphinx documentation for further details: http://sphinxsearch.com/docs/current/conf-exceptions.html
11+
#
12+
# Examples:
13+
#
14+
# Acronyms and initialisms
15+
U.S.A. => usa
16+
u.s.a. => usa
17+
U.S.A => usa
18+
u.s.a => usa
19+
USA => usa
20+
U.S. => usa
21+
u.s. => usa
22+
U.S => usa
23+
u.s => usa
24+
US => usa
25+
# Abbreviations using ampersand
26+
profit & loss => p&l
27+
profit and loss => p&l
28+
P & L => p&l
29+
P and L => p&l
30+
p & l => p&l
31+
p and l => p&l
32+
# Ampersands
33+
& => and
34+
# Ordinals
35+
1st => first
36+
1ST => first
37+
2nd => second
38+
2ND => second
39+
3rd => third
40+
3RD => third
41+
4th => fourth
42+
4TH => fourth
43+
5th => fifth
44+
5TH => fifth
45+
6th => sixth
46+
6TH => sixth
47+
7th => seventh
48+
7TH => seventh
49+
8th => eighth
50+
8TH => eighth
51+
9th => ninth
52+
9TH => ninth
53+
10th => tenth
54+
10TH => tenth
55+
# Numerals
56+
1 => one
57+
2 => two
58+
3 => three
59+
4 => four
60+
5 => five
61+
6 => six
62+
7 => seven
63+
8 => eight
64+
9 => nine
65+
10 => ten
66+
11 => eleven
67+
12 => twelve
68+
13 => thirteen
69+
14 => fourteen
70+
15 => fifteen
71+
16 => sixteen
72+
17 => seventeen
73+
18 => eighteen
74+
19 => nineteen
75+
20 => twenty
76+
30 => thirty
77+
40 => forty
78+
50 => fifty
79+
60 => sixty
80+
70 => seventy
81+
80 => eighty
82+
90 => ninety
83+
100 => one hundred
84+
1000 => one thousand
85+
10000 => ten thousand
86+
100000 => one hundred thousand
87+
1000000 => one million
88+
1000000000 => one billion
89+
# Numbered groupings / decades
90+
10s => tens
91+
20s => twenties
92+
30s => thirties
93+
40s => forties
94+
50s => fifties
95+
60s => sixties
96+
70s => seventies
97+
80s => eighties
98+
90s => nineties

0 commit comments

Comments
 (0)