Skip to content

Commit

Permalink
Profile and optimize the permessage-deflate extension.
Browse files Browse the repository at this point in the history
dataclasses.replace is surprisingly expensive.

zlib functions make up the bulk of the cost now.
  • Loading branch information
aaugustin committed Sep 26, 2024
1 parent a0b20f0 commit baadc33
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 8 deletions.
2 changes: 1 addition & 1 deletion experiments/compression/corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,6 @@ def main(corpus):

if __name__ == "__main__":
if len(sys.argv) < 2:
print(f"Usage: {sys.argv[0]} [directory]")
print(f"Usage: {sys.argv[0]} <directory>")
sys.exit(2)
main(pathlib.Path(sys.argv[1]))
45 changes: 45 additions & 0 deletions experiments/profiling/compression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/usr/bin/env python

"""
Profile the permessage-deflate extension.
Usage::
$ pip install line_profiler
$ python experiments/compression/corpus.py experiments/compression/corpus
$ PYTHONPATH=src python -m kernprof \
--line-by-line \
--prof-mod src/websockets/extensions/permessage_deflate.py \
--view \
experiments/profiling/compression.py experiments/compression/corpus 12 5 6
"""

import pathlib
import sys

from websockets.extensions.permessage_deflate import PerMessageDeflate
from websockets.frames import OP_TEXT, Frame


def compress_and_decompress(corpus, max_window_bits, memory_level, level):
extension = PerMessageDeflate(
remote_no_context_takeover=False,
local_no_context_takeover=False,
remote_max_window_bits=max_window_bits,
local_max_window_bits=max_window_bits,
compress_settings={"memLevel": memory_level, "level": level},
)
for data in corpus:
frame = Frame(OP_TEXT, data)
frame = extension.encode(frame)
frame = extension.decode(frame)


if __name__ == "__main__":
if len(sys.argv) < 2 or not pathlib.Path(sys.argv[1]).is_dir():
print(f"Usage: {sys.argv[0]} <directory> [<max_window_bits>] [<mem_level>]")
corpus = [file.read_bytes() for file in pathlib.Path(sys.argv[1]).iterdir()]
max_window_bits = int(sys.argv[2]) if len(sys.argv) > 2 else 12
memory_level = int(sys.argv[3]) if len(sys.argv) > 3 else 5
level = int(sys.argv[4]) if len(sys.argv) > 4 else 6
compress_and_decompress(corpus, max_window_bits, memory_level, level)
29 changes: 22 additions & 7 deletions src/websockets/extensions/permessage_deflate.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

import dataclasses
import zlib
from collections.abc import Sequence
from typing import Any
Expand Down Expand Up @@ -120,7 +119,6 @@ def decode(
else:
if not frame.rsv1:
return frame
frame = dataclasses.replace(frame, rsv1=False)
if not frame.fin:
self.decode_cont_data = True

Expand All @@ -146,7 +144,15 @@ def decode(
if frame.fin and self.remote_no_context_takeover:
del self.decoder

return dataclasses.replace(frame, data=data)
return frames.Frame(
frame.opcode,
data,
frame.fin,
# Unset the rsv1 flag on the first frame of a compressed message.
False,
frame.rsv2,
frame.rsv3,
)

def encode(self, frame: frames.Frame) -> frames.Frame:
"""
Expand All @@ -161,8 +167,6 @@ def encode(self, frame: frames.Frame) -> frames.Frame:
# data" flag similar to "decode continuation data" at this time.

if frame.opcode is not frames.OP_CONT:
# Set the rsv1 flag on the first frame of a compressed message.
frame = dataclasses.replace(frame, rsv1=True)
# Re-initialize per-message decoder.
if self.local_no_context_takeover:
self.encoder = zlib.compressobj(
Expand All @@ -172,14 +176,25 @@ def encode(self, frame: frames.Frame) -> frames.Frame:

# Compress data.
data = self.encoder.compress(frame.data) + self.encoder.flush(zlib.Z_SYNC_FLUSH)
if frame.fin and data.endswith(_EMPTY_UNCOMPRESSED_BLOCK):
if frame.fin and data[-4:] == _EMPTY_UNCOMPRESSED_BLOCK:
# Making a copy is faster than memoryview(a)[:-4] until about 2kB.
# On larger messages, it's slower but profiling shows that it's
# marginal compared to compress() and flush(). Keep it simple.
data = data[:-4]

# Allow garbage collection of the encoder if it won't be reused.
if frame.fin and self.local_no_context_takeover:
del self.encoder

return dataclasses.replace(frame, data=data)
return frames.Frame(
frame.opcode,
data,
frame.fin,
# Set the rsv1 flag on the first frame of a compressed message.
frame.opcode is not frames.OP_CONT,
frame.rsv2,
frame.rsv3,
)


def _build_parameters(
Expand Down

0 comments on commit baadc33

Please sign in to comment.