From 673324fce65dbe34e5496709a77271d45aa1988b Mon Sep 17 00:00:00 2001 From: Plamen Dimitrov Date: Thu, 14 Mar 2024 11:40:50 +0800 Subject: [PATCH] Handle connection resets in the status server more gracefully MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Connection resets due to network instability can lead to the status server not catching a test status, an asyncio error like full.log│2024-03-08 03:08:44,053 asyncio base_events L1744 ERROR| Task exception was never retrieved │ full.log│future: exception=ConnectionResetError(104, 'Connection reset by peer')> ├ full.log│Traceback (most recent call last): │ full.log│ File "/usr/lib/python3.10/site-packages/avocado/core/status/server.py", line 53, in cb │ full.log│ raw_message = await reader.readline() │ full.log│ File "/usr/lib64/python3.10/asyncio/streams.py", line 525, in readline │ full.log│ line = await self.readuntil(sep) │ full.log│ File "/usr/lib64/python3.10/asyncio/streams.py", line 617, in readuntil │ full.log│ await self._wait_for_data('readuntil') │ full.log│ File "/usr/lib64/python3.10/asyncio/streams.py", line 502, in _wait_for_data │ full.log│ await self._waiter │ full.log│ File "/usr/lib64/python3.10/asyncio/selector_events.py", line 854, in _read_ready__data_received │ full.log│ data = self._sock.recv(self.max_size) │ full.log│ConnectionResetError: [Errno 104] Connection reset by peer and worst yet to test tasks hanging indefinitely without the job ever completing properly. This was mostly observed in cases of LXC and remote spawner isolation where the isolated task process completes but the task on the side of the task machine remains unfinished. Signed-off-by: Plamen Dimitrov --- avocado/core/status/server.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/avocado/core/status/server.py b/avocado/core/status/server.py index 046dbd6873..d2ccf9d725 100644 --- a/avocado/core/status/server.py +++ b/avocado/core/status/server.py @@ -50,7 +50,10 @@ def close(self): async def cb(self, reader, _): while True: - raw_message = await reader.readline() + try: + raw_message = await reader.readline() + except ConnectionResetError: + continue if not raw_message: return self._repo.process_raw_message(raw_message)