diff --git a/.gitignore b/.gitignore
index 324e7706..d8e6697a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,7 +8,7 @@
 .tox
 build/
 compliance/reports/
-experiments/compression/corpus.pkl
+experiments/compression/corpus/
 dist/
 docs/_build/
 htmlcov/
diff --git a/docs/topics/compression.rst b/docs/topics/compression.rst
index eaf99070..be263e56 100644
--- a/docs/topics/compression.rst
+++ b/docs/topics/compression.rst
@@ -7,37 +7,36 @@ Most WebSocket servers exchange JSON messages because they're convenient to
 parse and serialize in a browser. These messages contain text data and tend to
 be repetitive.
 
-This makes the stream of messages highly compressible. Enabling compression
+This makes the stream of messages highly compressible. Compressing messages
 can reduce network traffic by more than 80%.
 
-There's a standard for compressing messages. :rfc:`7692` defines WebSocket
-Per-Message Deflate, a compression extension based on the Deflate_ algorithm.
+websockets implements WebSocket Per-Message Deflate, a compression extension
+based on the Deflate_ algorithm specified in :rfc:`7692`.
 
 .. _Deflate: https://en.wikipedia.org/wiki/Deflate
 
-Configuring compression
------------------------
+:func:`~websockets.asyncio.client.connect` and
+:func:`~websockets.asyncio.server.serve` enable compression by default because
+the reduction in network bandwidth is usually worth the additional memory and
+CPU cost.
 
-:func:`~websockets.client.connect` and :func:`~websockets.server.serve` enable
-compression by default because the reduction in network bandwidth is usually
-worth the additional memory and CPU cost.
 
-If you want to disable compression, set ``compression=None``::
+Configuring compression
+-----------------------
 
-    import websockets
+To disable compression, set ``compression=None``::
 
-    websockets.connect(..., compression=None)
+    connect(..., compression=None, ...)
 
-    websockets.serve(..., compression=None)
+    serve(..., compression=None, ...)
 
-If you want to customize compression settings, you can enable the Per-Message
-Deflate extension explicitly with :class:`ClientPerMessageDeflateFactory` or
+To customize compression settings, enable the Per-Message Deflate extension
+explicitly with :class:`ClientPerMessageDeflateFactory` or
 :class:`ServerPerMessageDeflateFactory`::
 
-    import websockets
     from websockets.extensions import permessage_deflate
 
-    websockets.connect(
+    connect(
         ...,
         extensions=[
             permessage_deflate.ClientPerMessageDeflateFactory(
@@ -46,9 +45,10 @@ Deflate extension explicitly with :class:`ClientPerMessageDeflateFactory` or
                 compress_settings={"memLevel": 4},
             ),
         ],
+        ...,
     )
 
-    websockets.serve(
+    serve(
         ...,
         extensions=[
             permessage_deflate.ServerPerMessageDeflateFactory(
@@ -57,13 +57,14 @@ Deflate extension explicitly with :class:`ClientPerMessageDeflateFactory` or
                 compress_settings={"memLevel": 4},
             ),
         ],
+        ...,
     )
 
 The Window Bits and Memory Level values in these examples reduce memory usage
 at the expense of compression rate.
 
-Compression settings
---------------------
+Compression parameters
+----------------------
 
 When a client and a server enable the Per-Message Deflate extension, they
 negotiate two parameters to guarantee compatibility between compression and
@@ -81,9 +82,9 @@ and memory usage for both sides.
   This requires retaining the compression context and state between messages,
   which increases the memory footprint of a connection.
 
-* **Window Bits** controls the size of the compression context. It must be
-  an integer between 9 (lowest memory usage) and 15 (best compression).
-  Setting it to 8 is possible but rejected by some versions of zlib.
+* **Window Bits** controls the size of the compression context. It must be an
+  integer between 9 (lowest memory usage) and 15 (best compression). Setting it
+  to 8 is possible but rejected by some versions of zlib and not very useful.
 
   On the server side, websockets defaults to 12. Specifically, the compression
   window size (server to client) is always 12 while the decompression window
@@ -94,9 +95,8 @@ and memory usage for both sides.
   has the same effect as defaulting to 15.
 
 :mod:`zlib` offers additional parameters for tuning compression. They control
-the trade-off between compression rate, memory usage, and CPU usage only for
-compressing. They're transparent for decompressing. Unless mentioned
-otherwise, websockets inherits defaults of :func:`~zlib.compressobj`.
+the trade-off between compression rate, memory usage, and CPU usage for
+compressing. They're transparent for decompressing.
 
 * **Memory Level** controls the size of the compression state. It must be an
   integer between 1 (lowest memory usage) and 9 (best compression).
@@ -108,87 +108,82 @@ otherwise, websockets inherits defaults of :func:`~zlib.compressobj`.
 * **Compression Level** controls the effort to optimize compression. It must
   be an integer between 1 (lowest CPU usage) and 9 (best compression).
 
+  websockets relies on the default value chosen by :func:`~zlib.compressobj`,
+  ``Z_DEFAULT_COMPRESSION``.
+
 * **Strategy** selects the compression strategy. The best choice depends on
   the type of data being compressed.
 
+  websockets relies on the default value chosen by :func:`~zlib.compressobj`,
+  ``Z_DEFAULT_STRATEGY``.
 
-Tuning compression
-------------------
+To customize these parameters, add keyword arguments for
+:func:`~zlib.compressobj` in ``compress_settings``.
 
-For servers
-...........
+Default settings for servers
+----------------------------
 
 By default, websockets enables compression with conservative settings that
 optimize memory usage at the cost of a slightly worse compression rate:
-Window Bits = 12 and Memory Level = 5. This strikes a good balance for small
+Window Bits = 12 and Memory Level = 5. This strikes a good balance for small
 messages that are typical of WebSocket servers.
 
-Here's how various compression settings affect memory usage of a single
-connection on a 64-bit system, as well a benchmark of compressed size and
-compression time for a corpus of small JSON documents.
+Here's an example of how compression settings affect memory usage per
+connection, compressed size, and compression time for a corpus of JSON
+documents.
 
 =========== ============ ============ ================ ================
 Window Bits Memory Level Memory usage Size vs. default Time vs. default
 =========== ============ ============ ================ ================
-15          8            322 KiB      -4.0%            +15%
-14          7            178 KiB      -2.6%            +10%
-13          6            106 KiB      -1.4%            +5%
-**12**      **5**        **70 KiB**   **=**            **=**
-11          4            52 KiB       +3.7%            -5%
-10          3            43 KiB       +90%             +50%
-9           2            39 KiB       +160%            +100%
-—           —            19 KiB       +452%            —
+15          8            316 KiB      -10%             +10%
+14          7            172 KiB      -7%              +5%
+13          6            100 KiB       -3%             +2%
+**12**      **5**        **64 KiB**   **=**            **=**
+11          4            46 KiB       +10%             +4%
+10          3            37 KiB       +70%             +40%
+9           2            33 KiB       +130%            +90%
+—           —            14 KiB       +350%            —
 =========== ============ ============ ================ ================
 
 Window Bits and Memory Level don't have to move in lockstep. However, other
 combinations don't yield significantly better results than those shown above.
 
-Compressed size and compression time depend heavily on the kind of messages
-exchanged by the application so this example may not apply to your use case.
-
-You can adapt `compression/benchmark.py`_ by creating a list of typical
-messages and passing it to the ``_run`` function.
-
-Window Bits = 11 and Memory Level = 4 looks like the sweet spot in this table.
-
-websockets defaults to Window Bits = 12 and Memory Level = 5 to stay away from
-Window Bits = 10 or Memory Level = 3 where performance craters, raising doubts
-on what could happen at Window Bits = 11 and Memory Level = 4 on a different
+websockets defaults to Window Bits = 12 and Memory Level = 5 to stay away from
+Window Bits = 10 or Memory Level = 3 where performance craters, raising doubts
+on what could happen at Window Bits = 11 and Memory Level = 4 on a different
 corpus.
 
 Defaults must be safe for all applications, hence a more conservative choice.
 
-.. _compression/benchmark.py: https://github.com/python-websockets/websockets/blob/main/experiments/compression/benchmark.py
+Optimizing settings
+-------------------
 
-The benchmark focuses on compression because it's more expensive than
-decompression. Indeed, leaving aside small allocations, theoretical memory
-usage is:
+Compressed size and compression time depend on the structure of messages
+exchanged by your application. As a consequence, default settings may not be
+optimal for your use case.
 
-* ``(1 << (windowBits + 2)) + (1 << (memLevel + 9))`` for compression;
-* ``1 << windowBits`` for decompression.
+To compare how various compression settings perform for your use case:
 
-CPU usage is also higher for compression than decompression.
+1. Create a corpus of typical messages in a directory, one message per file.
+2. Run the `compression/benchmark.py`_ script, passing the directory in
+   argument.
 
-While it's always possible for a server to use a smaller window size for
-compressing outgoing messages, using a smaller window size for decompressing
-incoming messages requires collaboration from clients.
+The script measures compressed size and compression time for all combinations of
+Window Bits and Memory Level. It outputs two tables with absolute values and two
+tables with values relative to websockets' default settings.
 
-When a client doesn't support configuring the size of its compression window,
-websockets enables compression with the largest possible decompression window.
-In most use cases, this is more efficient than disabling compression both ways.
+Pick your favorite settings in these tables and configure them as shown above.
 
-If you are very sensitive to memory usage, you can reverse this behavior by
-setting the ``require_client_max_window_bits`` parameter of
-:class:`ServerPerMessageDeflateFactory` to ``True``.
+.. _compression/benchmark.py: https://github.com/python-websockets/websockets/blob/main/experiments/compression/benchmark.py
 
-For clients
-...........
+Default settings for clients
+----------------------------
 
-By default, websockets enables compression with Memory Level = 5 but leaves
+By default, websockets enables compression with Memory Level = 5 but leaves
 the Window Bits setting up to the server.
 
-There's two good reasons and one bad reason for not optimizing the client side
-like the server side:
+There's two good reasons and one bad reason for not optimizing Window Bits on
+the client side as on the server side:
 
 1. If the maintainers of a server configured some optimized settings, we don't
    want to override them with more restrictive settings.
@@ -196,8 +191,9 @@ like the server side:
 2. Optimizing memory usage doesn't matter very much for clients because it's
    uncommon to open thousands of client connections in a program.
 
-3. On a more pragmatic note, some servers misbehave badly when a client
-   configures compression settings. `AWS API Gateway`_ is the worst offender.
+3. On a more pragmatic and annoying note, some servers misbehave badly when a
+   client configures compression settings. `AWS API Gateway`_ is the worst
+   offender.
 
    .. _AWS API Gateway: https://github.com/python-websockets/websockets/issues/1065
 
@@ -207,6 +203,29 @@ like the server side:
    Until the ecosystem levels up, interoperability with buggy servers seems
    more valuable than optimizing memory usage.
 
+Decompression
+-------------
+
+The discussion above focuses on compression because it's more expensive than
+decompression. Indeed, leaving aside small allocations, theoretical memory
+usage is:
+
+* ``(1 << (windowBits + 2)) + (1 << (memLevel + 9))`` for compression;
+* ``1 << windowBits`` for decompression.
+
+CPU usage is also higher for compression than decompression.
+
+While it's always possible for a server to use a smaller window size for
+compressing outgoing messages, using a smaller window size for decompressing
+incoming messages requires collaboration from clients.
+
+When a client doesn't support configuring the size of its compression window,
+websockets enables compression with the largest possible decompression window.
+In most use cases, this is more efficient than disabling compression both ways.
+
+If you are very sensitive to memory usage, you can reverse this behavior by
+setting the ``require_client_max_window_bits`` parameter of
+:class:`ServerPerMessageDeflateFactory` to ``True``.
 
 Further reading
 ---------------
@@ -216,7 +235,7 @@ settings affect memory usage and how to optimize them.
 
 .. _blog post by Ilya Grigorik: https://www.igvita.com/2013/11/27/configuring-and-optimizing-websocket-compression/
 
-This `experiment by Peter Thorson`_ recommends Window Bits = 11 and Memory
-Level = 4 for optimizing memory usage.
+This `experiment by Peter Thorson`_ recommends Window Bits = 11 and Memory
+Level = 4 for optimizing memory usage.
 
 .. _experiment by Peter Thorson: https://mailarchive.ietf.org/arch/msg/hybi/F9t4uPufVEy8KBLuL36cZjCmM_Y/
diff --git a/docs/topics/design.rst b/docs/topics/design.rst
index f164d299..cc65e6a7 100644
--- a/docs/topics/design.rst
+++ b/docs/topics/design.rst
@@ -488,55 +488,6 @@ they're drained. That's why all APIs that write frames are asynchronous.
 Of course, it's still possible for an application to create its own unbounded
 buffers and break the backpressure. Be careful with queues.
 
-
-.. _buffers:
-
-Buffers
--------
-
-.. note::
-
-    This section discusses buffers from the perspective of a server but it
-    applies to clients as well.
-
-An asynchronous systems works best when its buffers are almost always empty.
-
-For example, if a client sends data too fast for a server, the queue of
-incoming messages will be constantly full. The server will always be 32
-messages (by default) behind the client. This consumes memory and increases
-latency for no good reason. The problem is called bufferbloat.
-
-If buffers are almost always full and that problem cannot be solved by adding
-capacity — typically because the system is bottlenecked by the output and
-constantly regulated by backpressure — reducing the size of buffers minimizes
-negative consequences.
-
-By default websockets has rather high limits. You can decrease them according
-to your application's characteristics.
-
-Bufferbloat can happen at every level in the stack where there is a buffer.
-For each connection, the receiving side contains these buffers:
-
-- OS buffers: tuning them is an advanced optimization.
-- :class:`~asyncio.StreamReader` bytes buffer: the default limit is 64 KiB.
-  You can set another limit by passing a ``read_limit`` keyword argument to
-  :func:`~client.connect()` or :func:`~server.serve`.
-- Incoming messages :class:`~collections.deque`: its size depends both on
-  the size and the number of messages it contains. By default the maximum
-  UTF-8 encoded size is 1 MiB and the maximum number is 32. In the worst case,
-  after UTF-8 decoding, a single message could take up to 4 MiB of memory and
-  the overall memory consumption could reach 128 MiB. You should adjust these
-  limits by setting the ``max_size`` and ``max_queue`` keyword arguments of
-  :func:`~client.connect()` or :func:`~server.serve` according to your
-  application's requirements.
-
-For each connection, the sending side contains these buffers:
-
-- :class:`~asyncio.StreamWriter` bytes buffer: the default size is 64 KiB.
-  You can set another limit by passing a ``write_limit`` keyword argument to
-  :func:`~client.connect()` or :func:`~server.serve`.
-- OS buffers: tuning them is an advanced optimization.
-
 Concurrency
 -----------
 
diff --git a/docs/topics/memory.rst b/docs/topics/memory.rst
index e44247a7..efbcbb83 100644
--- a/docs/topics/memory.rst
+++ b/docs/topics/memory.rst
@@ -1,5 +1,5 @@
-Memory usage
-============
+Memory and buffers
+==================
 
 .. currentmodule:: websockets
 
@@ -9,40 +9,148 @@ memory usage can become a bottleneck.
 
 Memory usage of a single connection is the sum of:
 
-1. the baseline amount of memory websockets requires for each connection,
-2. the amount of data held in buffers before the application processes it,
-3. any additional memory allocated by the application itself.
+1. the baseline amount of memory that websockets uses for each connection;
+2. the amount of memory needed by your application code;
+3. the amount of data held in buffers.
 
-Baseline
---------
+Connection
+----------
 
-Compression settings are the main factor affecting the baseline amount of
-memory used by each connection.
+Compression settings are the primary factor affecting how much memory each
+connection uses.
 
-With websockets' defaults, on the server side, a single connections uses
-70 KiB of memory.
+The :mod:`asyncio` implementation with default settings uses 64 KiB of memory
+for each connection.
+
+You can reduce memory usage to 14 KiB per connection if you disable compression
+entirely.
 
 Refer to the :doc:`topic guide on compression <../topics/compression>` to
 learn more about tuning compression settings.
 
+Application
+-----------
+
+Your application will allocate memory for its data structures. Memory usage
+depends on your use case and your implementation.
+
+Make sure that you don't keep references to data that you don't need anymore
+because this prevents garbage collection.
+
 Buffers
 -------
 
-Under normal circumstances, buffers are almost always empty.
+Typical WebSocket applications exchange small messages at a rate that doesn't
+saturate the CPU or the network. Buffers are almost always empty. This is the
+optimal situation. Buffers absorb bursts of incoming or outgoing messages
+without having to pause reading or writing.
+
+If the application receives messages faster than it can process them, receive
+buffers will fill up when. If the application sends messages faster than the
+network can transmit them, send buffers will fill up.
+
+When buffers are almost always full, not only does the additional memory usage
+fail to bring any benefit, but latency degrades as well. This problem is called
+bufferbloat_. If it cannot be resolved by adding capacity, typically because the
+system is bottlenecked by its output and constantly regulated by
+:ref:`backpressure <backpressure>`, then buffers should be kept small to ensure
+that backpressure kicks in quickly.
+
+.. _bufferbloat: https://en.wikipedia.org/wiki/Bufferbloat
+
+To sum up, buffers should be sized to absorb bursts of messages. Making them
+larger than necessary often causes more harm than good.
+
+There are three levels of buffering in an application built with websockets.
+
+TCP buffers
+...........
+
+The operating system allocates buffers for each TCP connection. The receive
+buffer stores data received from the network until the application reads it.
+The send buffer stores data written by the application until it's sent to
+the network and acknowledged by the recipient.
+
+Modern operating systems adjust the size of TCP buffers automatically to match
+network conditions. Overall, you shouldn't worry about TCP buffers. Just be
+aware that they exist.
+
+In very high throughput scenarios, TCP buffers may grow to several megabytes
+to store the data in flight. Then, they can make up the bulk of the memory
+usage of a connection.
+
+I/O library buffers
+...................
+
+I/O libraries like :mod:`asyncio` may provide read and write buffers to reduce
+the frequency of system calls or the need to pause reading or writing.
+
+You should keep these buffers small. Increasing them can help with spiky
+workloads but it can also backfire because it delays backpressure.
+
+* In the new :mod:`asyncio` implementation, there is no library-level read
+  buffer.
+
+  There is a write buffer. The ``write_limit`` argument of
+  :func:`~asyncio.client.connect` and :func:`~asyncio.server.serve` controls its
+  size. When the write buffer grows above the high-water mark,
+  :meth:`~asyncio.connection.Connection.send` waits until it drains under the
+  low-water mark to return. This creates backpressure on coroutines that send
+  messages.
+
+* In the legacy :mod:`asyncio` implementation, there is a library-level read
+  buffer. The ``read_limit`` argument of :func:`~client.connect` and
+  :func:`~server.serve` controls its size. When the read buffer grows above the
+  high-water mark, the connection stops reading from the network until it drains
+  under the low-water mark. This creates backpressure on the TCP connection.
+
+  There is a write buffer. It as controlled by ``write_limit``. It behaves like
+  the new :mod:`asyncio` implementation described above.
+
+* In the :mod:`threading` implementation, there are no library-level buffers.
+  All I/O operations are performed directly on the :class:`~socket.socket`.
+
+websockets' buffers
+...................
+
+Incoming messages are queued in a buffer after they have been received from the
+network and parsed. A larger buffer may help a slow applications handle bursts
+of messages while remaining responsive to control frames.
+
+The memory footprint of this buffer is bounded by the product of ``max_size``,
+which controls the size of items in the queue, and ``max_queue``, which controls
+the number of items.
+
+The ``max_size`` argument of :func:`~asyncio.client.connect` and
+:func:`~asyncio.server.serve` defaults to 1 MiB. Most applications never receive
+such large messages. Configuring a smaller value puts a tighter boundary on
+memory usage. This can make your application more resilient to denial of service
+attacks.
+
+The behavior of the ``max_queue`` argument of :func:`~asyncio.client.connect`
+and :func:`~asyncio.server.serve` varies across implementations.
 
-Under high load, if a server receives more messages than it can process,
-bufferbloat can result in excessive memory usage.
+* In the new :mod:`asyncio` implementation, ``max_queue`` is the high-water mark
+  of a queue of incoming frames. It defaults to 16 frames. If the queue grows
+  larger, the connection stops reading from the network until the application
+  consumes messages and the queue goes below the low-water mark. This creates
+  backpressure on the TCP connection.
 
-By default websockets has generous limits. It is strongly recommended to adapt
-them to your application. When you call :func:`~server.serve`:
+  Each item in the queue is a frame. A frame can be a message or a message
+  fragment. Either way, it must be smaller than ``max_size``, the maximum size
+  of a message. The queue may use up to ``max_size * max_queue`` bytes of
+  memory. By default, this is 16 MiB.
 
-- Set ``max_size`` (default: 1 MiB, UTF-8 encoded) to the maximum size of
-  messages your application generates.
-- Set ``max_queue`` (default: 32) to the maximum number of messages your
-  application expects to receive faster than it can process them. The queue
-  provides burst tolerance without slowing down the TCP connection.
+* In the legacy :mod:`asyncio` implementation, ``max_queue`` is the maximum
+  size of a queue of incoming messages. It defaults to 32 messages. If the queue
+  fills up, the connection stops reading from the library-level read buffer
+  described above. If that buffer fills up as well, it will create backpressure
+  on the TCP connection.
 
-Furthermore, you can lower ``read_limit`` and ``write_limit`` (default:
-64 KiB) to reduce the size of buffers for incoming and outgoing data.
+  Text messages are decoded before they're added to the queue. Since Python can
+  use up to 4 bytes of memory per character, the queue may use up to ``4 *
+  max_size * max_queue`` bytes of memory. By default, this is 128 MiB.
 
-The design document provides :ref:`more details about buffers <buffers>`.
+* In the :mod:`threading` implementation, there is no queue of incoming
+  messages. The ``max_queue`` argument doesn't exist. The connection keeps at
+  most one message in memory at a time.
diff --git a/experiments/compression/benchmark.py b/experiments/compression/benchmark.py
index 4fbdf622..86ebece3 100644
--- a/experiments/compression/benchmark.py
+++ b/experiments/compression/benchmark.py
@@ -1,72 +1,32 @@
 #!/usr/bin/env python
 
-import getpass
-import json
-import pickle
-import subprocess
+import collections
+import pathlib
 import sys
 import time
 import zlib
 
 
-CORPUS_FILE = "corpus.pkl"
-
 REPEAT = 10
 
 WB, ML = 12, 5  # defaults used as a reference
 
 
-def _corpus():
-    OAUTH_TOKEN = getpass.getpass("OAuth Token? ")
-    COMMIT_API = (
-        f'curl -H "Authorization: token {OAUTH_TOKEN}" '
-        f"https://api.github.com/repos/python-websockets/websockets/git/commits/:sha"
-    )
-
-    commits = []
-
-    head = subprocess.check_output("git rev-parse HEAD", shell=True).decode().strip()
-    todo = [head]
-    seen = set()
-
-    while todo:
-        sha = todo.pop(0)
-        commit = subprocess.check_output(COMMIT_API.replace(":sha", sha), shell=True)
-        commits.append(commit)
-        seen.add(sha)
-        for parent in json.loads(commit)["parents"]:
-            sha = parent["sha"]
-            if sha not in seen and sha not in todo:
-                todo.append(sha)
-        time.sleep(1)  # rate throttling
-
-    return commits
-
-
-def corpus():
-    data = _corpus()
-    with open(CORPUS_FILE, "wb") as handle:
-        pickle.dump(data, handle)
-
-
-def _run(data):
-    size = {}
-    duration = {}
+def benchmark(data):
+    size = collections.defaultdict(dict)
+    duration = collections.defaultdict(dict)
 
     for wbits in range(9, 16):
-        size[wbits] = {}
-        duration[wbits] = {}
-
         for memLevel in range(1, 10):
             encoder = zlib.compressobj(wbits=-wbits, memLevel=memLevel)
             encoded = []
 
+            print(f"Compressing {REPEAT} times with {wbits=} and {memLevel=}")
+
             t0 = time.perf_counter()
 
             for _ in range(REPEAT):
                 for item in data:
-                    if isinstance(item, str):
-                        item = item.encode()
                     # Taken from PerMessageDeflate.encode
                     item = encoder.compress(item) + encoder.flush(zlib.Z_SYNC_FLUSH)
                     if item.endswith(b"\x00\x00\xff\xff"):
@@ -75,7 +35,7 @@ def _run(data):
 
             t1 = time.perf_counter()
 
-            size[wbits][memLevel] = sum(len(item) for item in encoded)
+            size[wbits][memLevel] = sum(len(item) for item in encoded) / REPEAT
             duration[wbits][memLevel] = (t1 - t0) / REPEAT
 
     raw_size = sum(len(item) for item in data)
@@ -149,15 +109,13 @@ def _run(data):
     print()
 
 
-def run():
-    with open(CORPUS_FILE, "rb") as handle:
-        data = pickle.load(handle)
-    _run(data)
+def main(corpus):
+    data = [file.read_bytes() for file in corpus.iterdir()]
+    benchmark(data)
 
 
-try:
-    run = globals()[sys.argv[1]]
-except (KeyError, IndexError):
-    print(f"Usage: {sys.argv[0]} [corpus|run]")
-else:
-    run()
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print(f"Usage: {sys.argv[0]} [directory]")
+        sys.exit(2)
+    main(pathlib.Path(sys.argv[1]))
diff --git a/experiments/compression/client.py b/experiments/compression/client.py
index 3ee19ddc..69bfd5e7 100644
--- a/experiments/compression/client.py
+++ b/experiments/compression/client.py
@@ -4,8 +4,8 @@
 import statistics
 import tracemalloc
 
-import websockets
-from websockets.extensions import permessage_deflate
+from websockets.asyncio.client import connect
+from websockets.extensions.permessage_deflate import ClientPerMessageDeflateFactory
 
 
 CLIENTS = 20
@@ -16,16 +16,16 @@
 MEM_SIZE = []
 
 
-async def client(client):
+async def client(num):
     # Space out connections to make them sequential.
-    await asyncio.sleep(client * INTERVAL)
+    await asyncio.sleep(num * INTERVAL)
 
     tracemalloc.start()
 
-    async with websockets.connect(
+    async with connect(
         "ws://localhost:8765",
         extensions=[
-            permessage_deflate.ClientPerMessageDeflateFactory(
+            ClientPerMessageDeflateFactory(
                 server_max_window_bits=WB,
                 client_max_window_bits=WB,
                 compress_settings={"memLevel": ML},
@@ -42,11 +42,13 @@ async def client(client):
         tracemalloc.stop()
 
         # Hold connection open until the end of the test.
-        await asyncio.sleep(CLIENTS * INTERVAL)
+        await asyncio.sleep((CLIENTS + 1 - num) * INTERVAL)
 
 
 async def clients():
-    await asyncio.gather(*[client(client) for client in range(CLIENTS + 1)])
+    # Start one more client than necessary because we will ignore
+    # non-representative results from the first connection.
+    await asyncio.gather(*[client(num) for num in range(CLIENTS + 1)])
 
 
 asyncio.run(clients())
diff --git a/experiments/compression/corpus.py b/experiments/compression/corpus.py
new file mode 100644
index 00000000..da5661df
--- /dev/null
+++ b/experiments/compression/corpus.py
@@ -0,0 +1,52 @@
+#!/usr/bin/env python
+
+import getpass
+import json
+import pathlib
+import subprocess
+import sys
+import time
+
+
+def github_commits():
+    OAUTH_TOKEN = getpass.getpass("OAuth Token? ")
+    COMMIT_API = (
+        f'curl -H "Authorization: token {OAUTH_TOKEN}" '
+        f"https://api.github.com/repos/python-websockets/websockets/git/commits/:sha"
+    )
+
+    commits = []
+
+    head = subprocess.check_output(
+        "git rev-parse origin/main",
+        shell=True,
+        text=True,
+    ).strip()
+    todo = [head]
+    seen = set()
+
+    while todo:
+        sha = todo.pop(0)
+        commit = subprocess.check_output(COMMIT_API.replace(":sha", sha), shell=True)
+        commits.append(commit)
+        seen.add(sha)
+        for parent in json.loads(commit)["parents"]:
+            sha = parent["sha"]
+            if sha not in seen and sha not in todo:
+                todo.append(sha)
+        time.sleep(1)  # rate throttling
+
+    return commits
+
+
+def main(corpus):
+    data = github_commits()
+    for num, content in enumerate(reversed(data)):
+        (corpus / f"{num:04d}.json").write_bytes(content)
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print(f"Usage: {sys.argv[0]} [directory]")
+        sys.exit(2)
+    main(pathlib.Path(sys.argv[1]))
diff --git a/experiments/compression/server.py b/experiments/compression/server.py
index 8d1ee3cd..1c28f735 100644
--- a/experiments/compression/server.py
+++ b/experiments/compression/server.py
@@ -6,8 +6,8 @@
 import statistics
 import tracemalloc
 
-import websockets
-from websockets.extensions import permessage_deflate
+from websockets.asyncio.server import serve
+from websockets.extensions.permessage_deflate import ServerPerMessageDeflateFactory
 
 
 CLIENTS = 20
@@ -44,12 +44,12 @@ async def server():
     print()
     loop.add_signal_handler(signal.SIGTERM, stop.set_result, None)
 
-    async with websockets.serve(
+    async with serve(
         handler,
         "localhost",
         8765,
         extensions=[
-            permessage_deflate.ServerPerMessageDeflateFactory(
+            ServerPerMessageDeflateFactory(
                 server_max_window_bits=WB,
                 client_max_window_bits=WB,
                 compress_settings={"memLevel": ML},
@@ -63,7 +63,7 @@ async def server():
 asyncio.run(server())
 
 
-# First connection may incur non-representative setup costs.
+# First connection incurs non-representative setup costs.
 del MEM_SIZE[0]
 
 print(f"µ = {statistics.mean(MEM_SIZE) / 1024:.1f} KiB")
diff --git a/src/websockets/extensions/permessage_deflate.py b/src/websockets/extensions/permessage_deflate.py
index fea14131..5b907b79 100644
--- a/src/websockets/extensions/permessage_deflate.py
+++ b/src/websockets/extensions/permessage_deflate.py
@@ -62,7 +62,8 @@ def __init__(
 
         if not self.local_no_context_takeover:
             self.encoder = zlib.compressobj(
-                wbits=-self.local_max_window_bits, **self.compress_settings
+                wbits=-self.local_max_window_bits,
+                **self.compress_settings,
             )
 
         # To handle continuation frames properly, we must keep track of
@@ -156,7 +157,8 @@ def encode(self, frame: frames.Frame) -> frames.Frame:
             # Re-initialize per-message decoder.
             if self.local_no_context_takeover:
                 self.encoder = zlib.compressobj(
-                    wbits=-self.local_max_window_bits, **self.compress_settings
+                    wbits=-self.local_max_window_bits,
+                    **self.compress_settings,
                 )
 
         # Compress data.