From ab3da3cac3eb80541d5f581474adc62a134fd3e0 Mon Sep 17 00:00:00 2001 From: Peter Saveliev Date: Wed, 20 Mar 2024 22:15:11 +0100 Subject: [PATCH] decoder: fix hex data support, add docs --- docs/debug.rst | 158 +++------------------------------ pyroute2/decoder/args.py | 27 +++--- pyroute2/decoder/loader.py | 89 ++++++++++++++++++- pyroute2/decoder/main.py | 174 ++++++++++++++++++++++++++++++++++++- 4 files changed, 282 insertions(+), 166 deletions(-) diff --git a/docs/debug.rst b/docs/debug.rst index 756087174..c3254a1e2 100644 --- a/docs/debug.rst +++ b/docs/debug.rst @@ -1,153 +1,15 @@ .. debug: -Netlink debug howto -------------------- +Netlink debugging howto +----------------------- -Dump data -========= +pyroute2-decoder +================ -Either run the required command via `strace`, or attach to the running -process with `strace -p`. Use `-s {int}` argument to make sure that all -the messages are dumped. The `-x` argument instructs `strace` to produce -output in the hex format that can be passed to the pyroute2 decoder:: +.. automodule:: pyroute2.decoder.main + :members: - $ strace -e trace=network -x -s 16384 ip ro - socket(PF_NETLINK, SOCK_RAW|SOCK_CLOEXEC, NETLINK_ROUTE) = 3 - setsockopt(3, SOL_SOCKET, SO_SNDBUF, [32768], 4) = 0 - setsockopt(3, SOL_SOCKET, SO_RCVBUF, [1048576], 4) = 0 - bind(3, {sa_family=AF_NETLINK, pid=0, groups=00000000}, 12) = 0 - getsockname(3, {sa_family=AF_NETLINK, pid=28616, groups=00000000}, [12]) = 0 - sendto(3, "\x28\x00\x00\x00\x1a\x00\x01\x03 [skip] ", 40, 0, NULL, 0) = 40 - recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, - msg_iov(1)=[{"\x3c\x00\x00\x00\x18 [skip]", 16384}], - msg_controllen=0, msg_flags=0}, 0) = 480 - socket(PF_LOCAL, SOCK_DGRAM|SOCK_CLOEXEC, 0) = 4 - 192.168.122.0/24 dev virbr0 proto kernel scope link src 192.168.122.1 - recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, - msg_iov(1)=[{"\x14\x00\x00\x00\x03 [skip]", 16384}], - msg_controllen=0, msg_flags=0}, 0) = 20 - +++ exited with 0 +++ - -Now you can copy `send…()` and `recv…()` buffer strings to a file. - -Strace compatibility note -========================= - -Starting with version 4.13, `strace` parses Netlink message headers and -displays them in their parsed form instead of displaying the whole buffer in -its raw form. The rest of the buffer is still shown, but due to it being -incomplete, the method mentioned above doesn't work anymore. - -For the time being, the easiest workaround is probably to use an older strace -version as it only depends on libc6. - -Decode data -=========== - -The decoder is not provided with rpm or pip packages, so you should -have a local git repo of the project:: - - $ git clone - $ cd pyroute2 - -Now run the decoder:: - - $ export PYTHONPATH=`pwd` - $ python tests/decoder/decoder.py - -E.g. for the route dump in the file `rt.dump` the command line -should be:: - - $ python tests/decoder/decoder.py \ - pyroute2.netlink.rtnl.rtmsg.rtmsg \ - rt.dump - -**Why should I specify the message class?** Why there is no marshalling -in the decoder script? 'Cause it is intended to be used with different -netlink protocols, not only RTNL, but also nl80211, nfnetlink etc. -There is no common marshalling for all the netlink protocols. - -**How to specify the message class?** All the netlink protocols are -defined under `pyroute2/netlink/`, e.g. `rtmsg` module is -`pyroute2/netlink/rtnl/rtmsg.py`. Thereafter you should specify the -class inside the module, since there can be several classes. In the -`rtmsg` case the line will be `pyroute.netlink.rtnl.rtmsg.rtmsg` or, -more friendly to the bash autocomplete, `pyroute2/netlink/rtnl/rtmsg.rtmsg`. -Notice, that the class you have to specify with dot anyways. - -**What is the data file format?** Rules are as follows: - -* The data dump should be in a hex format. Two possible variants are: - `\\x00\\x01\\x02\\x03` or `00:01:02:03`. -* There can be several packets in the same file. They should be of the - same type. -* Spaces and line ends are ignored, so you can format the dump as you - want. -* The `#` symbol starts a comment until the end of the line. -* The `#!` symbols start a comment until the end of the file. - -Example:: - - # ifinfmsg headers - # - # nlmsg header - \x84\x00\x00\x00 # length - \x10\x00 # type - \x05\x06 # flags - \x49\x61\x03\x55 # sequence number - \x00\x00\x00\x00 # pid - # RTNL header - \x00\x00 # ifi_family - \x00\x00 # ifi_type - \x00\x00\x00\x00 # ifi_index - \x00\x00\x00\x00 # ifi_flags - \x00\x00\x00\x00 # ifi_change - # ... - - -Compile data -============ - -Starting with 0.4.1, the library provides `BatchSocket` class, that -only compiles and collects requests instead of sending them to the -kernel. E.g., it is used by `IPBatch`, that combines `BatchSocket` -with `IPRouteMixin`, providing RTNL compiler:: - - $ python3 - Python 3.4.3 (default, Mar 31 2016, 20:42:37) - [GCC 5.3.1 20151207 (Red Hat 5.3.1-2)] on linux - Type "help", "copyright", "credits" or "license" for more information. - # import all the stuff - >>> from pyroute2 import IPBatch - >>> from pyroute2.common import hexdump - # create the compiler - >>> ipb = IPBatch() - # compile requests into one buffer - >>> ipb.link("add", index=550, kind="dummy", ifname="test") - >>> ipb.link("set", index=550, state="up") - >>> ipb.addr("add", index=550, address="10.0.0.2", mask=24) - # inspect the buffer - >>> hexdump(ipb.batch) - '3c:00:00:00:10:00:05:06:00:00:00:00:a2:7c:00:00:00:00:00:00: - 26:02:00:00:00:00:00:00:00:00:00:00:09:00:03:00:74:65:73:74: - 00:00:00:00:10:00:12:00:0a:00:01:00:64:75:6d:6d:79:00:00:00: - 20:00:00:00:13:00:05:06:00:00:00:00:a2:7c:00:00:00:00:00:00: - 26:02:00:00:01:00:00:00:01:00:00:00:28:00:00:00:14:00:05:06: - 00:00:00:00:a2:7c:00:00:02:18:00:00:26:02:00:00:08:00:01:00: - 0a:00:00:02:08:00:02:00:0a:00:00:02' - # reset the buffer - >>> ipb.reset() - -Pls notice, that in Python2 you should use `hexdump(str(ipb.batch))` -instead of `hexdump(ipb.batch)`. - -The data, compiled by `IPBatch` can be used either to run batch -requests, when one `send()` call sends several messages at once, or -to produce binary buffers to test your own netlink parsers. Or just -to dump some data to be sent later and probably even on another host:: - - >>> ipr = IPRoute() - >>> ipr.sendto(ipb.batch, (0, 0)) - -The compiler always produces requests with `sequence_number == 0`, -so if there will be any responses, they can be handled as broadcasts. +filter functions +================ +.. automodule:: pyroute2.decoder.loader + :members: MatchOps diff --git a/pyroute2/decoder/args.py b/pyroute2/decoder/args.py index 091e1650e..50516ec83 100644 --- a/pyroute2/decoder/args.py +++ b/pyroute2/decoder/args.py @@ -1,15 +1,16 @@ import argparse -argument_parser = argparse.ArgumentParser() -argument_parser.add_argument( - '-c', '--cls', help='message class to use for decoding the data' -) -argument_parser.add_argument('-d', '--data', help='data dump file') -argument_parser.add_argument( - '-f', '--format', default='hex', help='data file format: hex, pcap' -) -argument_parser.add_argument( - '-m', '--match', help='match protocol family (only for pcap data)' -) -args = argument_parser.parse_args() -__all__ = [args] + +def parse_args(): + argument_parser = argparse.ArgumentParser() + argument_parser.add_argument( + '-c', '--cls', help='message class to use for decoding the data' + ) + argument_parser.add_argument('-d', '--data', help='data dump file') + argument_parser.add_argument( + '-f', '--format', default='hex', help='data file format: hex, pcap' + ) + argument_parser.add_argument( + '-m', '--match', help='match protocol family (only for pcap data)' + ) + return argument_parser.parse_args() diff --git a/pyroute2/decoder/loader.py b/pyroute2/decoder/loader.py index 8011a5bb3..5f417faf4 100644 --- a/pyroute2/decoder/loader.py +++ b/pyroute2/decoder/loader.py @@ -5,7 +5,7 @@ from collections import namedtuple from importlib import import_module -from pyroute2.common import hexdump +from pyroute2.common import hexdump, load_dump PcapMetaData = namedtuple( "pCAPMetaData", @@ -70,9 +70,27 @@ def __repr__(self): class MatchOps: + ''' + Functions to match netlink messages. + + The matcher object maintains a stack, where every function + leaves True or False. A message matches only when the stack + contains True. + + Some functions take arguments from the command line, other + like `AND` and `OR` work with the stack. + ''' @staticmethod def AND(): + ''' + Consumes values left on the stack by functions to the + left and to the right in the expression, and leaves + the result of AND operation:: + + func_a{...} AND func_b{...} + ''' + def f(packet_header, ll_header, raw, data_offset, stack): v1 = stack.pop() v2 = stack.pop() @@ -82,6 +100,14 @@ def f(packet_header, ll_header, raw, data_offset, stack): @staticmethod def OR(): + ''' + Consumes values left on the stack by functions to the + left and to the right in the expression, and leaves + the result of OR operation:: + + func_a{...} OR func_b{...} + ''' + def f(packet_header, ll_header, raw, data_offset, stack): v1 = stack.pop() v2 = stack.pop() @@ -91,16 +117,52 @@ def f(packet_header, ll_header, raw, data_offset, stack): @staticmethod def ll_header(family): + ''' + Match link layer header fields. As for now only netlink + family is supported, see `pyroute2.netlink` for netlink + families (`NETLINK_.*`) constants:: + + # match generic netlink messages + ll_header{family=16} + + # match RTNL messages + ll_header{family=0} + ''' if not isinstance(family, int) or family < 0 or family > 0xFFFF: raise TypeError('family must be unsigned short integer') def f(packet_header, ll_header, raw, data_offset, stack): + if ll_header is None: + return False return ll_header.family == family return f @staticmethod def data(fmt, offset, value): + ''' + Match a voluntary data in the message. Use `struct` notation + for the format, integers for offset and value:: + + # match four bytes with offset 4 bytes and value 16, + # or 10:00:00:00 in hex: + + data{fmt='I', offset=4, value=16} + + # match one byte with offset 16 and value 1, or 01 in hex + + data{fmt='B', offset=16, value=1} + + More examples:: + + # match: + # * generic netlink protocol, 16 + # * message type 37 -- IPVS protocol for this session + # * message command 1 -- IPVS_CMD_NEW_SERVICE + ll_header{family=16} + AND data{fmt='H', offset=4, value=37} + AND data{fmt='B', offset=16, value=1} + ''' if not isinstance(fmt, str): raise TypeError('format must be string') if not isinstance(offset, int) or not isinstance(value, int): @@ -167,6 +229,24 @@ def match(self, packet_header, ll_header, data, offset): return all(stack) +class LoaderHex: + + def __init__(self, data, cls, script): + with open(data, 'r') as f: + self.raw = load_dump(f) + self.cls = cls + self.offset = 0 + self.matcher = Matcher(script) + + @property + def data(self): + while self.offset < len(self.raw): + msg = Message(None, None, self.cls, self.raw[self.offset:]) + msg.decode() + if self.matcher.match(None, None, self.raw, self.offset): + yield msg + self.offset += msg.msg['header']['length'] + class LoaderPcap: def __init__(self, data, cls, script): @@ -217,4 +297,9 @@ def get_loader(args): module = import_module(module_name) cls = getattr(module, cls_name) - return LoaderPcap(args.data, cls, args.match) + if args.format == 'pcap': + return LoaderPcap(args.data, cls, args.match) + elif args.format == 'hex': + return LoaderHex(args.data, cls, args.match) + else: + raise ValueError('data format not supported') diff --git a/pyroute2/decoder/main.py b/pyroute2/decoder/main.py index 5dc2f37f6..b5008e27c 100644 --- a/pyroute2/decoder/main.py +++ b/pyroute2/decoder/main.py @@ -1,15 +1,183 @@ ''' -A cli tool to decode netlink buffers. +This tool is intended to decode existing data dumps produced with +other tools like tcpdump or strace, and print the data out in +JSON format. + +The strace tool is not as convenient since version 4.13, as it +started to parse some of netlink messages at least partly, +rendering them useless for third party decoders. So if you plan +to use strace to obtain messages, be sure it is older than 4.13. +The strace related manual can be found in archive documentation +for older pyroute2 versions. + +This manual is focused on pcap dumps. + +An example session: + +.. code-block:: console + + # set up netlink monitoring interface + sudo ip link add dev nlmon0 type nlmon + sudo ip link set dev nlmon0 up + + # dump the traffic into a pcap file + # run netlink communication to be captured at the same time + sudo tcpdump -i nlmon0 -w nl.pcap + ^C + + # decode RTNL messages from the dump + pyroute2-decoder \\ + -c pyroute2.netlink.rtnl.marshal.MarshalRtnl \\ + -d nl.pcap \\ + -m "ll_header{family=0}" + +The result will be printed out in JSON format, so you can load +it directly from stdout, or use jq tool to navigate: + +.. code-block:: console + + # print only pcap headers information + pyroute2-decoder ... | jq '.[]."pcap header"' + +pcap data dumps +~~~~~~~~~~~~~~~ + +This format is the default for `pyroute2-decoder`. To explicitly instruct +the decoder to use the pcap format, use `-f pcap` or `--format pcap`. + +An ordinary everyday normal pcap dumps produced by tcpdump. The format +is described here shortly and only to the extent that is important for +the decoder. Please see other resources for detailed pcap format +descriptions. Pyroute2 decoder expect these headers in the pcap dump: + +* Pcap file header. This header is being decoded, but not used by the + tools as for now. +* Packet header. From this header the decoder uses only `incl_len` to + properly read the stored data. +* Link layer header. From this header only the family field is used as + for now, it can be matched with `ll_header{family=...}` expression. + +hex data dumps +~~~~~~~~~~~~~~ + +Use `-f hex` or `--format hex`. + +Just a raw data flow with no service headers added. The decoder uses +message headers to calculate the buffer lengths to read. This dump +can be obtained using strace or the IPBatch compiler. + +Data should use hex bytes representation either in escaped or in +colon separated format. Equivalent variants: + +* `\\\\x49\\\\x61\\\\x03\\\\x55` +* `49:61:03:55` + +Comment strings start with `#`, comments and whitespaces are ignored. +A message example: + +.. code-block:: + + # ifinfmsg headers + # + # nlmsg header + \\x84\\x00\\x00\\x00 # length + \\x10\\x00 # type + \\x05\\x06 # flags + \\x49\\x61\\x03\\x55 # sequence number + \\x00\\x00\\x00\\x00 # pid + # RTNL header + \\x00\\x00 # ifi_family + \\x00\\x00 # ifi_type + \\x00\\x00\\x00\\x00 # ifi_index + \\x00\\x00\\x00\\x00 # ifi_flags + \\x00\\x00\\x00\\x00 # ifi_change + # ... + + +message classes +~~~~~~~~~~~~~~~ + +In order to properly debug the stream, one should specify either +a message class, or a marshal class: + +.. code-block:: console + + # use a message class + pyroute2-decoder \\ + -c pyroute2.netlink.generic.ipvs.ipvsmsg \\ + ... + + # use a marshal class + pyroute2-decoder \\ + -c pyroute2.netlink.rtnl.marshal.MarshalRtnl \\ + ... + +The decoder will try to use the specified class to decode every +matching message. That work well for generic protocols, but for other +protocols like RTNL it's more convenient to use marshal classes +that return corresponding message classes for different message +types. + +generic protocols ids +~~~~~~~~~~~~~~~~~~~~~ + +Generic netlink protocols have dynamic IDs, so the first operation is to +get the ID. The message class used for that is `pyroute2.netlink.ctrlmsg`, +the request is `CTRL_CMD_GETFAMILY == 3`, and the response is +`CTRL_CMD_NEWFAMILY == 1`. The command is one byte right after the netlink +header, so the filters are: + +* `ll_header{family=16}` match family 16, NETLINK_GENERIC +* `data{fmt='B', offset=16, value=1}` match one byte with + value 1 by offset 16 + +Here is the code to get the family ID: + +.. code-block:: console + + pyroute2-decoder \\ + -c pyroute2.netlink.ctrlmsg \\ + -d nl.pcap \\ + -m "ll_header{family=16} AND data{fmt='B', offset=16, value=1}" | \\ + jq \\ + '.[0].data.attrs[] | select(.[0] | contains("FAMILY"))' + + [ + "CTRL_ATTR_FAMILY_NAME", + "IPVS" + ] + [ + "CTRL_ATTR_FAMILY_ID", + 37 + ] + +Having the family ID you can filter out relevant messages. The filters: + +* `ll_header{family=16}` match family 16, NETLINK_GENERIC +* `data{fmt='H', offset=4, value=37}` match IPVS family ID in + the message header +* `data{fmt='B', offset=16, value=1}` match IPVS_CMD_NEW_SERVICE + +.. code-block:: console + + pyroute2-decoder \\ + -c pyroute2.netlink.generic.ipvs.ipvsmsg \\ + -d nl0.pcap \\ + -m "ll_header{family=16} \\ + AND data{fmt='H', offset=4, value=37} \\ + AND data{fmt='B', offset=16, value=1}" + + ''' import json -from pyroute2.decoder.args import args +from pyroute2.decoder.args import parse_args from pyroute2.decoder.loader import get_loader def run(): - loader = get_loader(args) + loader = get_loader(parse_args()) ret = [] for message in loader.data: ret.append(message.dump())