Skip to content

Commit

Permalink
Merge pull request #3315 from freelawproject/3309-es-bulk-indexing-in…
Browse files Browse the repository at this point in the history
…-batches

3309 Performs ES bulk indexing in batches to avoid timeouts.
  • Loading branch information
mlissner authored Oct 28, 2023
2 parents 38e3702 + 4714f25 commit f429231
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 4 deletions.
16 changes: 12 additions & 4 deletions cl/search/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -621,7 +621,9 @@ def update_children_docs_by_query(
ubq = (
UpdateByQuery(using=client, index=es_document._index._name)
.query(s.to_dict()["query"])
.params(slices="auto")
.params(
slices=es_document._index._settings["number_of_shards"]
) # Set slices equal to the number of shards.
)

script_lines = []
Expand Down Expand Up @@ -842,8 +844,9 @@ def index_parent_and_child_docs(
"_op_type": "index",
"_index": parent_es_document._index._name,
}

child_docs_to_index = []
for child in child_docs.iterator():
for i, child in enumerate(child_docs.iterator()):
child_doc = child_es_document().prepare(child)
child_params = {
"_id": getattr(ES_CHILD_ID(child.pk), child_id_property),
Expand All @@ -853,8 +856,13 @@ def index_parent_and_child_docs(
child_doc.update(child_params)
child_docs_to_index.append(child_doc)

# Perform bulk indexing for child documents
bulk(client, child_docs_to_index)
if i % settings.ELASTICSEARCH_BULK_BATCH_SIZE == 0:
bulk(client, child_docs_to_index)
child_docs_to_index.clear()

# Index the last batch
if child_docs_to_index:
bulk(client, child_docs_to_index)

if settings.ELASTICSEARCH_DSL_AUTO_REFRESH:
# Set auto-refresh, used for testing.
Expand Down
7 changes: 7 additions & 0 deletions cl/settings/third_party/elasticsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,3 +178,10 @@
ELASTICSEARCH_THROTTLING_TASK_RATE = env(
"ELASTICSEARCH_THROTTLING_TASK_RATE", default="30/m"
)

################################
# ES bulk indexing batch size #
################################
ELASTICSEARCH_BULK_BATCH_SIZE = env(
"ELASTICSEARCH_BULK_BATCH_SIZE", default=200
)

0 comments on commit f429231

Please sign in to comment.