Skip to content

Commit c5e0ce5

Browse files
committed
Pass active backend to index queryset calls (closes django-haystack#534)
Now the Index index_queryset() and read_queryset() methods will be called with the active backend name so they can optionally perform backend-specific filtering. This is extremely useful when using something like Solr cores to maintain language specific backends, allowing an Index to select the appropriate documents for each language:: def index_queryset(self, using=None): return Post.objects.filter(language=using) Changes: * clear_index, update_index and rebuild_index all default to processing *every* backend. ``--using`` may now be provided multiple times to select a subset of the configured backends. * Added examples to the Multiple Index documentation page
1 parent e0fd6ab commit c5e0ce5

17 files changed

Lines changed: 194 additions & 54 deletions

docs/autocomplete.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ Example (continuing from the tutorial)::
4242
def get_model(self):
4343
return Note
4444

45-
def index_queryset(self):
45+
def index_queryset(self, using=None):
4646
"""Used when the entire index for model is updated."""
4747
return Note.objects.filter(pub_date__lte=datetime.datetime.now())
4848

docs/migration_from_1_to_2.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ A converted Haystack 2.X index should look like::
155155
def get_model(self):
156156
return Note
157157

158-
def index_queryset(self):
158+
def index_queryset(self, using=None):
159159
"""Used when the entire index for model is updated."""
160160
return self.get_model().objects.filter(pub_date__lte=datetime.datetime.now())
161161

docs/multiple_index.rst

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,3 +163,39 @@ via the ``SearchQuerySet.using`` method::
163163
Note that the models a ``SearchQuerySet`` is trying to pull from must all come
164164
from the same index. Haystack is not able to combine search queries against
165165
different indexes.
166+
167+
168+
Custom Index Selection
169+
======================
170+
171+
If a specific backend has been selected, the ``SearchIndex.index_queryset`` and
172+
``SearchIndex.read_queryset`` will receive the backend name, giving indexes the
173+
opportunity to customize the returned queryset.
174+
175+
For example, a site which uses separate indexes for recent items and older
176+
content might define ``index_queryset`` to filter the items based on date::
177+
178+
def index_queryset(self, using=None):
179+
qs = Note.objects.all()
180+
archive_limit = datetime.datetime.now() - datetime.timedelta(days=90)
181+
182+
if using == "archive":
183+
return qs.filter(pub_date__lte=archive_limit)
184+
else:
185+
return qs.filter(pub_date__gte=archive_limit)
186+
187+
188+
Multi-lingual Content
189+
---------------------
190+
191+
Most search engines require you to set the language at the index level. For
192+
example, a multi-lingual site using Solr can use `multiple cores <http://wiki.apache.org/solr/CoreAdmin>`_ and corresponding Haystack
193+
backends using the language name. Under this scenario, queries are simple::
194+
195+
sqs = SearchQuerySet.using(lang).auto_query(…)
196+
197+
During index updates, the Index's ``index_queryset`` method will need to filter
198+
the items to avoid sending the wrong content to the search engine::
199+
200+
def index_queryset(self, using=None):
201+
return Post.objects.filter(language=using)

docs/searchindex_api.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ For the impatient::
3434
def get_model(self):
3535
return Note
3636

37-
def index_queryset(self):
37+
def index_queryset(self, using=None):
3838
"Used when the entire index for model is updated."
3939
return self.get_model().objects.filter(pub_date__lte=datetime.datetime.now())
4040

@@ -386,7 +386,7 @@ This method is required & you must override it to return the correct class.
386386
``index_queryset``
387387
------------------
388388

389-
.. method:: SearchIndex.index_queryset(self)
389+
.. method:: SearchIndex.index_queryset(self, using=None)
390390

391391
Get the default QuerySet to index when doing a full update.
392392

@@ -395,7 +395,7 @@ Subclasses can override this method to avoid indexing certain objects.
395395
``read_queryset``
396396
-----------------
397397

398-
.. method:: SearchIndex.read_queryset(self)
398+
.. method:: SearchIndex.read_queryset(self, using=None)
399399

400400
Get the default QuerySet for read actions.
401401

@@ -609,7 +609,7 @@ For the impatient::
609609
fields = ['user', 'pub_date']
610610

611611
# Note that regular ``SearchIndex`` methods apply.
612-
def index_queryset(self):
612+
def index_queryset(self, using=None):
613613
"Used when the entire index for model is updated."
614614
return Note.objects.filter(pub_date__lte=datetime.datetime.now())
615615

docs/tutorial.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ Haystack to automatically pick it up. The ``NoteIndex`` should look like::
221221
def get_model(self):
222222
return Note
223223

224-
def index_queryset(self):
224+
def index_queryset(self, using=None):
225225
"""Used when the entire index for model is updated."""
226226
return self.get_model().objects.filter(pub_date__lte=datetime.datetime.now())
227227

example_project/regular_app/search_indexes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ class DogIndex(indexes.SearchIndex, indexes.Indexable):
1919
def get_model(self):
2020
return Dog
2121

22-
def index_queryset(self):
22+
def index_queryset(self, using=None):
2323
return self.get_model().objects.filter(public=True)
2424

2525
def prepare_toys(self, obj):

haystack/indexes.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ class NoteIndex(indexes.SearchIndex, indexes.Indexable):
7676
def get_model(self):
7777
return Note
7878
79-
def index_queryset(self):
79+
def index_queryset(self, using=None):
8080
return self.get_model().objects.filter(pub_date__lte=datetime.datetime.now())
8181
8282
"""
@@ -102,24 +102,24 @@ def get_model(self):
102102
"""
103103
raise NotImplementedError("You must provide a 'model' method for the '%r' index." % self)
104104

105-
def index_queryset(self):
105+
def index_queryset(self, using=None):
106106
"""
107107
Get the default QuerySet to index when doing a full update.
108108
109109
Subclasses can override this method to avoid indexing certain objects.
110110
"""
111111
return self.get_model()._default_manager.all()
112112

113-
def read_queryset(self):
113+
def read_queryset(self, using=None):
114114
"""
115115
Get the default QuerySet for read actions.
116116
117117
Subclasses can override this method to work with other managers.
118118
Useful when working with default managers that filter some objects.
119119
"""
120-
return self.index_queryset()
120+
return self.index_queryset(using=using)
121121

122-
def build_queryset(self, start_date=None, end_date=None):
122+
def build_queryset(self, using=None, start_date=None, end_date=None):
123123
"""
124124
Get the default QuerySet to index when doing an index update.
125125
@@ -154,7 +154,7 @@ def build_queryset(self, start_date=None, end_date=None):
154154
warnings.warn("'SearchIndex.get_queryset' was deprecated in Haystack v2. Please rename the method 'index_queryset'.")
155155
index_qs = self.get_queryset()
156156
else:
157-
index_qs = self.index_queryset()
157+
index_qs = self.index_queryset(using=using)
158158

159159
if not hasattr(index_qs, 'filter'):
160160
raise ImproperlyConfigured("The '%r' class must return a 'QuerySet' in the 'index_queryset' method." % self)
Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from optparse import make_option
22
import sys
3+
34
from django.core.management.base import BaseCommand
4-
from haystack.constants import DEFAULT_ALIAS
55

66

77
class Command(BaseCommand):
@@ -10,35 +10,41 @@ class Command(BaseCommand):
1010
make_option('--noinput', action='store_false', dest='interactive', default=True,
1111
help='If provided, no prompts will be issued to the user and the data will be wiped out.'
1212
),
13-
make_option("-u", "--using", action="store", type="string", dest="using", default=DEFAULT_ALIAS,
14-
help='If provided, chooses a connection to work with.'
13+
make_option("-u", "--using", action="append", dest="using",
14+
default=[],
15+
help='Update only the named backend (can be used multiple times). '
16+
'By default all backends will be updated.'
1517
),
1618
)
1719
option_list = BaseCommand.option_list + base_options
18-
20+
1921
def handle(self, **options):
2022
"""Clears out the search index completely."""
2123
from haystack import connections
2224
self.verbosity = int(options.get('verbosity', 1))
23-
self.using = options.get('using')
24-
25+
26+
using = options.get('using')
27+
if not using:
28+
using = connections.connections_info.keys()
29+
2530
if options.get('interactive', True):
2631
print
27-
print "WARNING: This will irreparably remove EVERYTHING from your search index in connection '%s'." % self.using
32+
print "WARNING: This will irreparably remove EVERYTHING from your search index in connection '%s'." % "', '".join(using)
2833
print "Your choices after this are to restore from backups or rebuild via the `rebuild_index` command."
29-
34+
3035
yes_or_no = raw_input("Are you sure you wish to continue? [y/N] ")
3136
print
32-
37+
3338
if not yes_or_no.lower().startswith('y'):
3439
print "No action taken."
3540
sys.exit()
36-
41+
3742
if self.verbosity >= 1:
3843
print "Removing all documents from your index because you said so."
39-
40-
backend = connections[self.using].get_backend()
41-
backend.clear()
42-
44+
45+
for backend_name in using:
46+
backend = connections[backend_name].get_backend()
47+
backend.clear()
48+
4349
if self.verbosity >= 1:
4450
print "All documents removed."

haystack/management/commands/update_index.py

Lines changed: 32 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from datetime import timedelta
22
from optparse import make_option
3+
import logging
34
import os
4-
import warnings
55

66
from django import db
77
from django.conf import settings
@@ -11,7 +11,6 @@
1111
from django.utils.encoding import smart_str
1212

1313
from haystack import connections as haystack_connections
14-
from haystack.constants import DEFAULT_ALIAS
1514
from haystack.query import SearchQuerySet
1615

1716
try:
@@ -71,9 +70,9 @@ def do_update(backend, index, qs, start, end, total, verbosity=1):
7170

7271
if verbosity >= 2:
7372
if hasattr(os, 'getppid') and os.getpid() == os.getppid():
74-
print " indexed %s - %d of %d." % (start+1, end, total)
73+
print " indexed %s - %d of %d." % (start + 1, end, total)
7574
else:
76-
print " indexed %s - %d of %d (by %s)." % (start+1, end, total, os.getpid())
75+
print " indexed %s - %d of %d (by %s)." % (start + 1, end, total, os.getpid())
7776

7877
# FIXME: Get the right backend.
7978
backend.update(index, current_qs)
@@ -121,8 +120,10 @@ class Command(LabelCommand):
121120
make_option('-r', '--remove', action='store_true', dest='remove',
122121
default=False, help='Remove objects from the index that are no longer present in the database.'
123122
),
124-
make_option("-u", "--using", action="store", type="string", dest="using", default=DEFAULT_ALIAS,
125-
help='If provided, chooses a connection to work with.'
123+
make_option("-u", "--using", action="append", dest="using",
124+
default=[],
125+
help='Update only the named backend (can be used multiple times). '
126+
'By default all backends will be updated.'
126127
),
127128
make_option('-k', '--workers', action='store', dest='workers',
128129
default=0, type='int',
@@ -137,9 +138,11 @@ def handle(self, *items, **options):
137138
self.start_date = None
138139
self.end_date = None
139140
self.remove = options.get('remove', False)
140-
self.using = options.get('using')
141141
self.workers = int(options.get('workers', 0))
142-
self.backend = haystack_connections[self.using].get_backend()
142+
143+
self.backends = options.get('using')
144+
if not self.backends:
145+
self.backends = haystack_connections.connections_info.keys()
143146

144147
age = options.get('age', DEFAULT_AGE)
145148
start_date = options.get('start_date')
@@ -202,9 +205,18 @@ def get_models(self, label):
202205
return [get_model(app_label, model_name)]
203206

204207
def handle_label(self, label, **options):
208+
for using in self.backends:
209+
try:
210+
self.update_backend(label, using)
211+
except:
212+
logging.exception("Error updating %s using %s ", label, using)
213+
raise
214+
215+
def update_backend(self, label, using):
205216
from haystack.exceptions import NotHandled
206217

207-
unified_index = haystack_connections[self.using].get_unified_index()
218+
backend = haystack_connections[using].get_backend()
219+
unified_index = haystack_connections[using].get_unified_index()
208220

209221
if self.workers > 0:
210222
import multiprocessing
@@ -218,17 +230,21 @@ def handle_label(self, label, **options):
218230
continue
219231

220232
if self.workers > 0:
221-
# workers resetting connections leads to references to models / connections getting stale and having their connection disconnected from under them. Resetting before the loop continues and it accesses the ORM makes it better.
233+
# workers resetting connections leads to references to models / connections getting
234+
# stale and having their connection disconnected from under them. Resetting before
235+
# the loop continues and it accesses the ORM makes it better.
222236
db.close_connection()
223237

224-
qs = index.build_queryset(start_date=self.start_date, end_date=self.end_date)
238+
qs = index.build_queryset(using=using, start_date=self.start_date,
239+
end_date=self.end_date)
240+
225241
total = qs.count()
226242

227243
if self.verbosity >= 1:
228244
print "Indexing %d %s." % (total, smart_str(model._meta.verbose_name_plural))
229245

230246
pks_seen = set([smart_str(pk) for pk in qs.values_list('pk', flat=True)])
231-
batch_size = self.batchsize or self.backend.batch_size
247+
batch_size = self.batchsize or backend.batch_size
232248

233249
if self.workers > 0:
234250
ghetto_queue = []
@@ -237,9 +253,9 @@ def handle_label(self, label, **options):
237253
end = min(start + batch_size, total)
238254

239255
if self.workers == 0:
240-
do_update(self.backend, index, qs, start, end, total, self.verbosity)
256+
do_update(backend, index, qs, start, end, total, self.verbosity)
241257
else:
242-
ghetto_queue.append(('do_update', model, start, end, total, self.using, self.start_date, self.end_date, self.verbosity))
258+
ghetto_queue.append(('do_update', model, start, end, total, using, self.start_date, self.end_date, self.verbosity))
243259

244260
if self.workers > 0:
245261
pool = multiprocessing.Pool(self.workers)
@@ -261,9 +277,9 @@ def handle_label(self, label, **options):
261277
upper_bound = start + batch_size
262278

263279
if self.workers == 0:
264-
do_remove(self.backend, index, model, pks_seen, start, upper_bound)
280+
do_remove(backend, index, model, pks_seen, start, upper_bound)
265281
else:
266-
ghetto_queue.append(('do_remove', model, pks_seen, start, upper_bound, self.using, self.verbosity))
282+
ghetto_queue.append(('do_remove', model, pks_seen, start, upper_bound, using, self.verbosity))
267283

268284
if self.workers > 0:
269285
pool = multiprocessing.Pool(self.workers)

haystack/query.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ def post_process_results(self, results):
205205
try:
206206
ui = connections[self.query._using].get_unified_index()
207207
index = ui.get_index(model)
208-
objects = index.read_queryset()
208+
objects = index.read_queryset(using=self.query._using)
209209
loaded_objects[model] = objects.in_bulk(models_pks[model])
210210
except NotHandled:
211211
self.log.warning("Model '%s.%s' not handled by the routers.", self.app_label, self.model_name)

0 commit comments

Comments
 (0)