Skip to content

Commit

Permalink
Merge pull request #221 from safarijv/commitByDefault
Browse files Browse the repository at this point in the history
Add ability to declare Solr object with default commit policy
  • Loading branch information
acdha authored Sep 5, 2018
2 parents fcbf73e + f8080ec commit d0d482b
Show file tree
Hide file tree
Showing 3 changed files with 118 additions and 27 deletions.
18 changes: 18 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,24 @@ If your Solr servers run off https
solr = pysolr.SolrCloud(zookeeper, "collection", verify=path/to/cert.perm)
Custom Commit Policy
~~~~~~~~~~~~~~~~~~~~

.. code-block:: python
# Setup a Solr instance. The trailing slash is optional.
# All request to solr will result in a commit
solr = pysolr.Solr('http://localhost:8983/solr/core_0/', search_handler='/autocomplete', always_commit=True)
``always_commit`` signals to the Solr object to either commit or not commit by default for any solr request.
Be sure to change this to True if you are upgrading from a version where the default policy was alway commit by default.

Functions like ``add`` and ``delete`` also still provide a way to override the default by passing the ``commit`` kwarg.

It is generally good practice to limit the amount of commits to solr.
Excessive commits risk opening too many searcher or using too many system resources.



LICENSE
=======
Expand Down
15 changes: 10 additions & 5 deletions pysolr.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,8 @@ class Solr(object):
solr = pysolr.Solr('http://localhost:8983/solr', results_cls=dict)
"""
def __init__(self, url, decoder=None, timeout=60, results_cls=Results, search_handler='select', use_qt_param=False,

def __init__(self, url, decoder=None, timeout=60, results_cls=Results, search_handler='select', use_qt_param=False, always_commit=False,
auth=None, verify=True):
self.decoder = decoder or json.JSONDecoder()
self.url = url
Expand All @@ -330,6 +331,7 @@ def __init__(self, url, decoder=None, timeout=60, results_cls=Results, search_ha
self.use_qt_param = use_qt_param
self.auth = auth
self.verify = verify
self.always_commit = always_commit

def get_session(self):
if self.session is None:
Expand Down Expand Up @@ -447,7 +449,7 @@ def _mlt(self, params, handler='mlt'):
def _suggest_terms(self, params, handler='terms'):
return self._select(params, handler)

def _update(self, message, clean_ctrl_chars=True, commit=True, softCommit=False, waitFlush=None, waitSearcher=None,
def _update(self, message, clean_ctrl_chars=True, commit=None, softCommit=False, waitFlush=None, waitSearcher=None,
overwrite=None, handler='update'):
"""
Posts the given xml message to http://<self.url>/update and
Expand All @@ -471,6 +473,9 @@ def _update(self, message, clean_ctrl_chars=True, commit=True, softCommit=False,

path = '%s/' % path_handler

if commit is None:
commit = self.always_commit

if commit:
query_vars.append('commit=%s' % str(bool(commit)).lower())
elif softCommit:
Expand Down Expand Up @@ -854,15 +859,15 @@ def _build_doc(self, doc, boost=None, fieldUpdates=None):

return doc_elem

def add(self, docs, boost=None, fieldUpdates=None, commit=True, softCommit=False, commitWithin=None, waitFlush=None,
def add(self, docs, boost=None, fieldUpdates=None, commit=None, softCommit=False, commitWithin=None, waitFlush=None,
waitSearcher=None, overwrite=None, handler='update'):
"""
Adds or updates documents.
Requires ``docs``, which is a list of dictionaries. Each key is the
field name and each value is the value to index.
Optionally accepts ``commit``. Default is ``True``.
Optionally accepts ``commit``. Default is ``None``. None signals to use default
Optionally accepts ``softCommit``. Default is ``False``.
Expand Down Expand Up @@ -912,7 +917,7 @@ def add(self, docs, boost=None, fieldUpdates=None, commit=True, softCommit=False
return self._update(m, commit=commit, softCommit=softCommit, waitFlush=waitFlush, waitSearcher=waitSearcher,
overwrite=overwrite, handler=handler)

def delete(self, id=None, q=None, commit=True, softCommit=False, waitFlush=None, waitSearcher=None, handler='update'):
def delete(self, id=None, q=None, commit=None, softCommit=False, waitFlush=None, waitSearcher=None, handler='update'):
"""
Deletes documents.
Expand Down
112 changes: 90 additions & 22 deletions tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import unittest
from io import StringIO
from xml.etree import ElementTree
import random

from pysolr import (NESTED_DOC_KEY, Results, Solr, SolrError, clean_xml_string,
force_bytes, force_unicode, json, safe_urlencode, sanitize,
Expand Down Expand Up @@ -137,7 +138,12 @@ def test_iter(self):
self.assertEqual(to_iter[2], {'id': 3})


class SolrTestCase(unittest.TestCase):
class SolrTestCaseMixin(object):
def get_solr(self, collection, timeout=60, always_commit=False):
return Solr('http://localhost:8983/solr/%s' % collection, timeout=timeout, always_commit=always_commit)


class SolrTestCase(unittest.TestCase, SolrTestCaseMixin):
def setUp(self):
super(SolrTestCase, self).setUp()
self.solr = self.get_solr("core0")
Expand Down Expand Up @@ -219,12 +225,12 @@ def setUp(self):
]

# Clear it.
self.solr.delete(q='*:*')
self.solr.delete(q='*:*', commit=True)

# Index our docs. Yes, this leans on functionality we're going to test
# later & if it's broken, everything will catastrophically fail.
# Such is life.
self.solr.add(self.docs)
self.solr.add(self.docs, commit=True)

# Mock the _send_request method on the solr instance so that we can
# test that custom handlers are called correctly.
Expand All @@ -236,16 +242,17 @@ def assertURLStartsWith(self, URL, path):
# slash handling are caught quickly:
return self.assertEqual(URL, '%s/%s' % (self.solr.url.replace('/core0', ''), path))

def get_solr(self, collection, timeout=60):
return Solr('http://localhost:8983/solr/%s' % collection, timeout=timeout)
def get_solr(self, collection, timeout=60, always_commit=False):
return Solr('http://localhost:8983/solr/%s' % collection, timeout=timeout, always_commit=always_commit)

def test_init(self):
self.assertEqual(self.solr.url, 'http://localhost:8983/solr/core0')
self.assertTrue(isinstance(self.solr.decoder, json.JSONDecoder))
self.assertEqual(self.solr.timeout, 60)

custom_solr = self.get_solr("core0", timeout=17)
custom_solr = self.get_solr("core0", timeout=17, always_commit=True)
self.assertEqual(custom_solr.timeout, 17)
self.assertEqual(custom_solr.always_commit, True)

def test_custom_results_class(self):
solr = Solr('http://localhost:8983/solr/core0', results_cls=dict)
Expand Down Expand Up @@ -607,7 +614,7 @@ def test_add(self):
'id': 'doc_7',
'title': 'Another example doc',
},
])
], commit=True)
# add should default to 'update' handler
args, kwargs = self.solr._send_request.call_args
self.assertTrue(args[1].startswith('update/?'))
Expand All @@ -617,7 +624,7 @@ def test_add(self):

# add should support custom handlers
with self.assertRaises(SolrError):
self.solr.add([], handler='fakehandler')
self.solr.add([], handler='fakehandler', commit=True)
args, kwargs = self.solr._send_request.call_args
self.assertTrue(args[1].startswith('fakehandler'))

Expand All @@ -628,7 +635,7 @@ def test_add_with_boost(self):
boost={'title': 10.0})

self.solr.add([{'id': 'doc_7', 'title': 'Spam doc doc'}],
boost={'title': 0})
boost={'title': 0}, commit=True)

res = self.solr.search('doc')
self.assertEqual(len(res), 5)
Expand All @@ -640,7 +647,7 @@ def test_field_update_inc(self):
updateList = []
for i, doc in enumerate(originalDocs):
updateList.append({'id': doc['id'], 'popularity': 5})
self.solr.add(updateList, fieldUpdates={'popularity': 'inc'})
self.solr.add(updateList, fieldUpdates={'popularity': 'inc'}, commit=True)

updatedDocs = self.solr.search('doc')
self.assertEqual(len(updatedDocs), 3)
Expand All @@ -658,7 +665,7 @@ def test_field_update_set(self):
updateList = []
for i, doc in enumerate(originalDocs):
updateList.append({'id': doc['id'], 'popularity': updated_popularity})
self.solr.add(updateList, fieldUpdates={'popularity': 'set'})
self.solr.add(updateList, fieldUpdates={'popularity': 'set'}, commit=True)

updatedDocs = self.solr.search('doc')
self.assertEqual(len(updatedDocs), 3)
Expand All @@ -681,14 +688,14 @@ def test_field_update_add(self):
'title': 'Multivalued doc 2',
'word_ss': ['charlie', 'delta'],
},
])
], commit=True)

originalDocs = self.solr.search('multivalued')
self.assertEqual(len(originalDocs), 2)
updateList = []
for i, doc in enumerate(originalDocs):
updateList.append({'id': doc['id'], 'word_ss': ['epsilon', 'gamma']})
self.solr.add(updateList, fieldUpdates={'word_ss': 'add'})
self.solr.add(updateList, fieldUpdates={'word_ss': 'add'}, commit=True)

updatedDocs = self.solr.search('multivalued')
self.assertEqual(len(updatedDocs), 2)
Expand All @@ -701,7 +708,7 @@ def test_field_update_add(self):

def test_delete(self):
self.assertEqual(len(self.solr.search('doc')), 3)
self.solr.delete(id='doc_1')
self.solr.delete(id='doc_1', commit=True)
# delete should default to 'update' handler
args, kwargs = self.solr._send_request.call_args
self.assertTrue(args[1].startswith('update/?'))
Expand All @@ -711,16 +718,16 @@ def test_delete(self):
self.assertEqual(len(self.solr.search('type_s:child')), 3)
self.assertEqual(len(self.solr.search('type_s:grandchild')), 1)
self.solr.delete(q='price:[0 TO 15]')
self.solr.delete(q='type_s:parent')
self.solr.delete(q='type_s:parent', commit=True)
# one simple doc should remain
# parent documents were also deleted but children remain as orphans
self.assertEqual(len(self.solr.search('doc')), 1)
self.assertEqual(len(self.solr.search('type_s:parent')), 0)
self.assertEqual(len(self.solr.search('type_s:child')), 3)
self.solr.delete(q='type_s:child OR type_s:grandchild')
self.solr.delete(q='type_s:child OR type_s:grandchild', commit=True)

self.assertEqual(len(self.solr.search('*:*')), 1)
self.solr.delete(q='*:*')
self.solr.delete(q='*:*', commit=True)
self.assertEqual(len(self.solr.search('*:*')), 0)

# Test delete() with `id' being a list.
Expand All @@ -739,12 +746,12 @@ def leaf_doc(doc):
self.assertEqual(len(self.solr.search(leaf_q)), len(to_delete_docs))
# Extract a random doc from the list, to later check it wasn't deleted.
graced_doc_id = to_delete_ids.pop(random.randint(0, len(to_delete_ids) - 1))
self.solr.delete(id=to_delete_ids)
self.solr.delete(id=to_delete_ids, commit=True)
# There should be only one left, our graced id
self.assertEqual(len(self.solr.search(leaf_q)), 1)
self.assertEqual(len(self.solr.search('id:%s' % graced_doc_id)), 1)
# Now we can wipe the graced document too. None should be left.
self.solr.delete(id=graced_doc_id)
self.solr.delete(id=graced_doc_id, commit=True)
self.assertEqual(len(self.solr.search(leaf_q)), 0)

# Can't delete when the list of documents is empty
Expand All @@ -758,7 +765,7 @@ def leaf_doc(doc):

# delete should support custom handlers
with self.assertRaises(SolrError):
self.solr.delete(id='doc_1', handler='fakehandler')
self.solr.delete(id='doc_1', handler='fakehandler', commit=True)
args, kwargs = self.solr._send_request.call_args
self.assertTrue(args[1].startswith('fakehandler'))

Expand All @@ -769,14 +776,29 @@ def test_commit(self):
'id': 'doc_6',
'title': 'Newly added doc',
}
], commit=False)
])
self.assertEqual(len(self.solr.search('doc')), 3)
self.solr.commit()
# commit should default to 'update' handler
args, kwargs = self.solr._send_request.call_args
self.assertTrue(args[1].startswith('update/?'))
self.assertEqual(len(self.solr.search('doc')), 4)

def test_can_handles_default_commit_policy(self):
expected_commits = [False, True, False]
commit_arg = [False, True, None]

for expected_commit, arg in zip(expected_commits, commit_arg):
self.solr.add([
{
'id': 'doc_6',
'title': 'Newly added doc',
}
], commit=arg)
args, _ = self.solr._send_request.call_args
committing_in_url = 'commit' in args[1]
self.assertEqual(expected_commit, committing_in_url)

def test_overwrite(self):
self.assertEqual(len(self.solr.search('id:doc_overwrite_1')), 0)
self.solr.add([
Expand All @@ -788,7 +810,7 @@ def test_overwrite(self):
'id': 'doc_overwrite_1',
'title': 'Kim is more awesome.',
}
], overwrite=False)
], overwrite=False, commit=True)
self.assertEqual(len(self.solr.search('id:doc_overwrite_1')), 2)

# commit should support custom handlers
Expand Down Expand Up @@ -941,3 +963,49 @@ def test_request_handler(self):
# reset the values to what they were before the test
self.solr.use_qt_param = before_test_use_qt_param
self.solr.search_handler = before_test_search_handler


class SolrCommitByDefaultTestCase(unittest.TestCase, SolrTestCaseMixin):

def setUp(self):
super(SolrCommitByDefaultTestCase, self).setUp()
self.solr = self.get_solr("core0", always_commit=True)
self.docs = [
{
'id': 'doc_1',
'title': 'Newly added doc',
},
{
'id': 'doc_2',
'title': 'Another example doc',
},
]

def test_does_not_require_commit(self):
# add should not require commit arg
self.solr.add(self.docs)

self.assertEqual(len(self.solr.search('doc')), 2)
self.assertEqual(len(self.solr.search('example')), 1)

# update should not require commit arg
self.docs[0]['title'] = "Updated Doc"
self.docs[1]['title'] = "Another example updated doc"
self.solr.add(self.docs, fieldUpdates={'title': 'set'})
self.assertEqual(len(self.solr.search('updated')), 2)
self.assertEqual(len(self.solr.search('example')), 1)

# delete should not require commit arg
self.solr.delete(q='*:*')
self.assertEqual(len(self.solr.search('*')), 0)

def test_can_handles_default_commit_policy(self):
self.solr._send_request = Mock(wraps=self.solr._send_request)
expected_commits = [False, True, True]
commit_arg = [False, True, None]

for expected_commit, arg in zip(expected_commits, commit_arg):
self.solr.add(self.docs, commit=arg)
args, _ = self.solr._send_request.call_args
committing_in_url = 'commit' in args[1]
self.assertEqual(expected_commit, committing_in_url)

0 comments on commit d0d482b

Please sign in to comment.