Skip to content

Commit

Permalink
Merge pull request #71 from dlcs/bugfix/memory_leak
Browse files Browse the repository at this point in the history
Close PIL Image after use
  • Loading branch information
donaldgray authored Jul 17, 2024
2 parents c807c4b + 3b7ac0d commit ed07588
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 14 deletions.
7 changes: 4 additions & 3 deletions src/app/engine/rasterizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def rasterize_pdf(self, subfolder_path):
pdf_source = os.path.join(subfolder_path, "source.pdf")
images = self.__rasterize(pdf_source, subfolder_path, dpi=self._dpi)
images = self.__validate_rasterized_images(images, pdf_source, subfolder_path)
return images
return [i.filename for i in images]

def __rasterize(
self, pdf_source, subfolder_path, start_page=None, last_page=None, dpi=None
Expand Down Expand Up @@ -65,6 +65,7 @@ def __validate_rasterized_images(self, images, pdf_source, subfolder_path):
if res == ResizeResult.SINGLE_PIXEL:
single_pixel_pages.append(idx + 1)
idx += 1
im.close()

if single_pixel_pages:
return self.__rescale_single_page_default_dpi(
Expand All @@ -89,8 +90,8 @@ def __ensure_image_size(self, idx, im: Image):
logger.info(
f"resizing image index {idx} from {w},{h} to {scale_w},{scale_h}"
)
resized = im.resize((scale_w, scale_h), resample=Image.LANCZOS)
resized.save(filename)
with im.resize((scale_w, scale_h), resample=Image.LANCZOS) as resized:
resized.save(filename)
return ResizeResult.RESIZED

return ResizeResult.NOOP
Expand Down
14 changes: 8 additions & 6 deletions src/app/engine/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ def __build_bucket_base_url(self):
else:
return f"https://s3.amazonaws.com/{self._bucket_name}"

def put_images(self, images, submission_id, composite_id, customer_id, space_id):
def put_images(
self, image_paths, submission_id, composite_id, customer_id, space_id
):
s3_uris = []

key_prefix = self.__get_key_prefix(
Expand All @@ -36,14 +38,14 @@ def put_images(self, images, submission_id, composite_id, customer_id, space_id)
with tqdm.tqdm(
desc=f"[{submission_id}] Upload images to S3",
unit=" image",
total=len(images),
total=len(image_paths),
) as progress_bar:
with ThreadPoolExecutor(max_workers=self._upload_threads) as executor:
# It's critical that the list of S3 URI's returned by this method is in the
# same order as the list of images provided to it. '.map(...)' gives us that,
# whilst '.submit(...)' does not.
for s3_uri in executor.map(
self.__put_image, repeat(key_prefix), images
self.__put_image, repeat(key_prefix), image_paths
):
s3_uris.append(s3_uri)
progress_bar.update(1)
Expand All @@ -52,8 +54,8 @@ def put_images(self, images, submission_id, composite_id, customer_id, space_id)
def __get_key_prefix(self, submission_id, composite_id, customer, space):
return f"{self._object_key_prefix}/{customer}/{space}/{composite_id or submission_id}"

def __put_image(self, key_prefix, image):
object_key = f"{key_prefix}/{os.path.basename(image.filename)}"
with open(image.filename, "rb") as file:
def __put_image(self, key_prefix, image_path):
object_key = f"{key_prefix}/{os.path.basename(image_path)}"
with open(image_path, "rb") as file:
self._client.put_object(Bucket=self._bucket_name, Key=object_key, Body=file)
return f"{self._bucket_base_url}/{object_key}"
10 changes: 5 additions & 5 deletions src/app/engine/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ def process_member(args):
folder_path = None
try:
folder_path = __fetch_origin(member, member.json_data["origin"])
images = __rasterize_composite(member, folder_path)
s3_urls = __push_images_to_dlcs(member, images)
image_paths = __rasterize_composite(member, folder_path)
s3_urls = __push_images_to_dlcs(member, image_paths)
dlcs_requests = __build_dlcs_requests(member, s3_urls)
dlcs_responses = __initiate_dlcs_ingest(member, dlcs_requests, args["auth"])
return __build_result(member, dlcs_responses)
Expand All @@ -49,12 +49,12 @@ def __rasterize_composite(member, pdf_path):
return pdf_rasterizer.rasterize_pdf(pdf_path)


def __push_images_to_dlcs(member, images):
__update_status(member, "PUSHING_TO_DLCS", image_count=len(images))
def __push_images_to_dlcs(member, image_paths):
__update_status(member, "PUSHING_TO_DLCS", image_count=len(image_paths))
composite_id = member.json_data.get("compositeId")
customer = member.collection.customer
space = member.json_data["space"]
return s3_client.put_images(images, member.id, composite_id, customer, space)
return s3_client.put_images(image_paths, member.id, composite_id, customer, space)


def __build_dlcs_requests(member, dlcs_uris):
Expand Down

0 comments on commit ed07588

Please sign in to comment.