diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..0237da2c --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,29 @@ +name: pyelftools-tests +on: + push: + branches: + - master + pull_request: + branches: + - master + +jobs: + build: + + runs-on: ${{ matrix.os }} + strategy: + matrix: + python-version: [2.7, 3.6, 3.7, 3.8] + os: [ubuntu-latest] + + steps: + + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Test + run: | + python test/all_tests.py + diff --git a/.gitignore b/.gitignore index 603cba77..30534490 100644 --- a/.gitignore +++ b/.gitignore @@ -7,5 +7,8 @@ build dist MANIFEST *.sublime-workspace +*.egg-info + + diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 37b42522..00000000 --- a/.travis.yml +++ /dev/null @@ -1,8 +0,0 @@ -language: python -python: - - "2.7" - - "3.2" - - "3.3" - - "3.4" - - "3.5" -script: python test/all_tests.py diff --git a/CHANGES b/CHANGES index b00fe3ff..42d42978 100644 --- a/CHANGES +++ b/CHANGES @@ -1,19 +1,68 @@ Changelog ========= -+ Version 0.25 (??) ++ Version 0.28 (2022.02.03) + + - Added a method for returning the index of a section by name (#331) + - Allow filtering by section types in iter_sections (#345) + - Support Android compressed rel/rela sections (#357) + - Initial support for PPC64LE (#360) + - Initial DWARF v5 support (#363 with several follow-ups) + - Fixed parsing for structures containing uids or gids in core + dumps (#354) + - Allow filtering by segment types in iter_segments (#375) + - Add support for .note.gnu.property (#386) + - Update readelf tests to work with more recent version of + readelf (#387) + - Add support for note GNU_PROPERTY_X86_FEATURE_1_AND (#388) + ++ Version 0.27 (2020.10.27) + + - Print addend wfor RELA relocations without symbol (#292) + - Implement symbol lookup for {GNU,}HashSection (#290) + - Major rewrite of expression parsing + - Cashed random access to CUs and DIEs (#264) + - GNU expressions (#303) + - Support parsing LSDA pointers from FDEs (#308) + - Add support for DWA_OP_GNU_push_tls_address in expressions (#315) + - Some initial support for AArch64 little-endian (#318) + - Support for ELF files with a large number of sections (#333) + - Some minimal support for DWARFv1 (#335) + - Many small bug fixes; see git log. + ++ Version 0.26 (2019.12.05) + + - Call relocation for ARM v3 (#194) + - More complete architecture coverage for ENUM_E_MACHINE (#206) + - Support for .debug_pubtypes and .debug_pubnames sections (#208) + - Support for DWARF v4 location lists (#214) + - Decode strings in dynamic string tables (#217) + - Improve symbol table handling in dynamic segments (#219) + - Improved handling of location information (#225) + - Avoid deprecation warnings in Python 3.7+ + - Add DWARF v5 OPs (#240) + - Handle many new translation forms and constants + - Lazy DIE parsing to speed up partial parsing of DWARF info (#249) + ++ Version 0.25 (2018.09.01) - Make parsing of SH_TYPE and PT_TYPE fields dependent on the machine (e_machine header field), making it possible to support conflicting type enums between different machines (#71 and #121). + - Add parsing and readelf dumping for .eh_frame (#155) + - Support compressed sections (#152) + - Better support for parsing core dumps (#147) + - More comprehensive handling of ARM relocations (#121) + - Convert all ascii encoding to utf-8 encoding (#182) - Don't attempt to hex/string dump SHT_NOBITS sections in readelf (#119). - - Add Python 3.5 testing to the tox file. + - Test with Python 3.6 - Minor bugfixes (#118) - Cleanup: Use argparse instead of optparse - Make readelf comparison tests run in parallel using multiprocessing; cuts testing time 3-5x + - Improvements in MIPS flags handling (#165) -+ Version 0.24 (04.08.2016) ++ Version 0.24 (2016.08.04) - Retrieve symbols by name - get_symbol_by_name (#58). - Symbol/section names are strings internally now, not bytestrings (this may @@ -29,7 +78,7 @@ Changelog - Support for zlib-compressed debug sections (#102) - Support for DWARF v4 line programs (#82) -+ Version 0.23 (08.11.2014) ++ Version 0.23 (2014.11.08) - Minimal Python 2.x version raised to 2.7 - Basic support for MIPS (contributed by Karl Vogel). @@ -37,7 +86,7 @@ Changelog - Support for parsing symbol table in dynamic segment (contributed by Nam T. Nguyen). -+ Version 0.22 (30.03.2014) ++ Version 0.22 (2014.03.30) - pyelftools repository moved to https://github.com/eliben/pyelftools - Support for version sections - contributed by Yann Rouillard. @@ -50,7 +99,7 @@ Changelog file name / path (based on pull request #16 by Shaheed Haque). - Set up Travis CI integration. -+ Version 0.21 (17.04.2013) ++ Version 0.21 (2013.04.17) - Added new example: dwarf_decode_address - decode function name and file & line information from an address. @@ -63,11 +112,11 @@ Changelog - Output of scripts/readelf.py now matches that of binutils 2.23.52. - Added more machine EM_ values to ENUM_E_TYPE. -+ Version 0.20 (27.01.2012) ++ Version 0.20 (2012.01.27) - Python 3 support - Fixed some problems with running tests - Issue #2: made all examples run (and test/run_examples_test.py pass) on Windows. -+ Version 0.10 - Initial public release (06.01.2012) ++ Version 0.10 - Initial public release (2012.01.06) diff --git a/MANIFEST.in b/MANIFEST.in index 4a861381..e2c7667f 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,8 +1,8 @@ recursive-include elftools *.py recursive-include scripts *.py recursive-include examples *.py *.elf *.out -recursive-include test *.py *.elf *.arm *.mips -include README +recursive-include test *.py *.elf *.arm *.mips *.o +include README.rst include LICENSE include CHANGES include tox.ini diff --git a/README.rst b/README.rst index 4ed20a1e..6b59c0ff 100644 --- a/README.rst +++ b/README.rst @@ -1,5 +1,10 @@ -Introduction: what is pyelftools? ---------------------------------- +========== +pyelftools +========== + +.. image:: https://github.com/eliben/pyelftools/workflows/pyelftools-tests/badge.svg + :align: center + :target: https://github.com/eliben/pyelftools/actions **pyelftools** is a pure-Python library for parsing and analyzing ELF files and DWARF debugging information. See the @@ -10,7 +15,7 @@ Pre-requisites -------------- As a user of **pyelftools**, one only needs Python to run. It works with -Python versions 2.7 and 3.x (x >= 2). For hacking on **pyelftools** the +Python versions 2.7 and 3.x (x >= 5). For hacking on **pyelftools** the requirements are a bit more strict, please see the `hacking guide `_. @@ -33,6 +38,9 @@ recent version of the code. This can be done by downloading the `master zip file `_ or just cloning the Git repository. +Since **pyelftools** has no external dependencies, it's also easy to use it +without installing, by locally adjusting ``PYTHONPATH``. + How to use it? -------------- @@ -45,13 +53,3 @@ License **pyelftools** is open source software. Its code is in the public domain. See the ``LICENSE`` file for more details. - -CI Status ---------- - -**pyelftools** has automatic testing enabled through the convenient -`Travis CI project `_. Here is the latest build status: - -.. image:: https://travis-ci.org/eliben/pyelftools.png?branch=master - :align: center - :target: https://travis-ci.org/eliben/pyelftools diff --git a/TODO b/TODO index 11b0adab..3cc524c8 100755 --- a/TODO +++ b/TODO @@ -9,15 +9,10 @@ New version construct --------- -The construct seems to be maintained again - they also backported my Python 3 -fixes. Theoretically, I can remove construct from pyelftools and use it as a -dependency instead. I don't really have time to play with this now, but may -do so in the future. - -Distribution ------------- - -python setup.py build sdist bdist_wheel upload +construct seems to be maintained again - they also backported my Python 3 fixes. +Theoretically, I can remove construct from pyelftools and use it as a dependency +instead. I don't really have time to play with this now, but may do so in the +future. Preparing a new release ----------------------- @@ -28,4 +23,14 @@ Preparing a new release * Untar the created ``dist/pyelftools-x.y.tar.gz`` and make sure everything looks ok * Now build with upload to send it to PyPi +* Tag new version in git * Test with pip install from some new virtualenv + +Distribution +------------ + +1. First install Twine (https://packaging.python.org/tutorials/packaging-projects/) +2. python3 -m twine upload dist/*, but make sure ``setup.py`` was already run + and the updated whl and tarbal are in dist/. + +Credentials for PyPI are stored in ~/.pypirc diff --git a/elftools/__init__.py b/elftools/__init__.py index d30e19e5..9eb4bfb1 100644 --- a/elftools/__init__.py +++ b/elftools/__init__.py @@ -4,4 +4,4 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- -__version__ = '0.24' +__version__ = '0.28' diff --git a/elftools/common/construct_utils.py b/elftools/common/construct_utils.py index 36332497..4b4a3920 100644 --- a/elftools/common/construct_utils.py +++ b/elftools/common/construct_utils.py @@ -8,7 +8,7 @@ #------------------------------------------------------------------------------- from ..construct import ( Subconstruct, ConstructError, ArrayError, Adapter, Field, RepeatUntil, - Rename + Rename, SizeofError ) diff --git a/elftools/common/exceptions.py b/elftools/common/exceptions.py index 5e409cf1..eb759bba 100644 --- a/elftools/common/exceptions.py +++ b/elftools/common/exceptions.py @@ -6,12 +6,12 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- -class ELFError(Exception): +class ELFError(Exception): pass class ELFRelocationError(ELFError): pass - + class ELFParseError(ELFError): pass diff --git a/elftools/common/py3compat.py b/elftools/common/py3compat.py index b901e078..25992644 100644 --- a/elftools/common/py3compat.py +++ b/elftools/common/py3compat.py @@ -21,6 +21,11 @@ # and strings are different types and bytes hold numeric values when # iterated over. + def bytes2hex(b, sep=''): + if not sep: + return b.hex() + return sep.join(map('{:02x}'.format, b)) + def bytes2str(b): return b.decode('latin-1') def str2bytes(s): return s.encode('latin-1') def int2byte(i): return bytes((i,)) @@ -41,6 +46,12 @@ def iterbytes(b): import cStringIO StringIO = BytesIO = cStringIO.StringIO + def bytes2hex(b, sep=''): + res = b.encode('hex') + if not sep: + return res + return sep.join(res[i:i+2] for i in range(0, len(res), 2)) + def bytes2str(b): return b def str2bytes(s): return s int2byte = chr @@ -64,3 +75,8 @@ def itervalues(d): def iteritems(d): """Return an iterator over the items of a dictionary.""" return getattr(d, 'items' if PY3 else 'iteritems')() + +try: + from collections.abc import Mapping # python >= 3.3 +except ImportError: + from collections import Mapping # python < 3.3 diff --git a/elftools/common/utils.py b/elftools/common/utils.py index 4e80e188..d1fde2ca 100644 --- a/elftools/common/utils.py +++ b/elftools/common/utils.py @@ -9,7 +9,7 @@ from contextlib import contextmanager from .exceptions import ELFParseError, ELFError, DWARFError from .py3compat import int2byte -from ..construct import ConstructError +from ..construct import ConstructError, ULInt8 def merge_dicts(*dicts): @@ -102,6 +102,11 @@ def roundup(num, bits): """ return (num - 1 | (1 << bits) - 1) + 1 +def read_blob(stream, length): + """Read length bytes from stream, return a list of ints + """ + return [struct_parse(ULInt8(''), stream) for i in range(length)] + #------------------------- PRIVATE ------------------------- def _assert_with_exception(cond, msg, exception_type): diff --git a/elftools/construct/adapters.py b/elftools/construct/adapters.py index 54fcc08c..545dbac1 100644 --- a/elftools/construct/adapters.py +++ b/elftools/construct/adapters.py @@ -279,7 +279,7 @@ class ExprAdapter(Adapter): * subcon - the subcon to adapt * encoder - a function that takes (obj, context) and returns an encoded version of obj - * decoder - a function that takes (obj, context) and returns an decoded + * decoder - a function that takes (obj, context) and returns a decoded version of obj Example: diff --git a/elftools/construct/core.py b/elftools/construct/core.py index 214c58fc..14a50f88 100644 --- a/elftools/construct/core.py +++ b/elftools/construct/core.py @@ -345,7 +345,7 @@ def __getstate__(self): return attrs def __setstate__(self, attrs): attrs["packer"] = Packer(attrs["packer"]) - return StaticField.__setstate__(attrs) + return StaticField.__setstate__(self, attrs) def _parse(self, stream, context): try: return self.packer.unpack(_read_stream(stream, self.length))[0] @@ -1297,6 +1297,8 @@ def _build(self, obj, stream, context): assert obj is None def _sizeof(self, context): return 0 + def __reduce__(self): + return self.__class__.__name__ Pass = Pass(None) class Terminator(Construct): diff --git a/elftools/construct/debug.py b/elftools/construct/debug.py index 6023df92..846daf89 100644 --- a/elftools/construct/debug.py +++ b/elftools/construct/debug.py @@ -15,17 +15,17 @@ class Probe(Construct): A probe: dumps the context, stack frames, and stream content to the screen to aid the debugging process. See also Debugger. - + Parameters: * name - the display name - * show_stream - whether or not to show stream contents. default is True. + * show_stream - whether or not to show stream contents. default is True. the stream must be seekable. * show_context - whether or not to show the context. default is True. - * show_stack - whether or not to show the upper stack frames. default + * show_stack - whether or not to show the upper stack frames. default is True. * stream_lookahead - the number of bytes to dump when show_stack is set. default is 100. - + Example: Struct("foo", UBInt8("a"), @@ -34,13 +34,13 @@ class Probe(Construct): ) """ __slots__ = [ - "printname", "show_stream", "show_context", "show_stack", + "printname", "show_stream", "show_context", "show_stack", "stream_lookahead" ] counter = 0 - - def __init__(self, name = None, show_stream = True, - show_context = True, show_stack = True, + + def __init__(self, name = None, show_stream = True, + show_context = True, show_stack = True, stream_lookahead = 100): Construct.__init__(self, None) if name is None: @@ -59,7 +59,7 @@ def _build(self, obj, stream, context): self.printout(stream, context) def _sizeof(self, context): return 0 - + def printout(self, stream, context): obj = Container() if self.show_stream: @@ -71,10 +71,10 @@ def printout(self, stream, context): stream.seek(-len(follows), 1) obj.following_stream_data = HexString(follows) print - + if self.show_context: obj.context = context - + if self.show_stack: obj.stack = ListContainer() frames = [s[0] for s in inspect.stack()][1:-1] @@ -83,7 +83,7 @@ def printout(self, stream, context): a = Container() a.__update__(f.f_locals) obj.stack.append(a) - + print("=" * 80) print("Probe", self.printname) print(obj) @@ -93,10 +93,10 @@ class Debugger(Subconstruct): """ A pdb-based debugger. When an exception occurs in the subcon, a debugger will appear and allow you to debug the error (and even fix on-the-fly). - + Parameters: * subcon - the subcon to debug - + Example: Debugger( Enum(UBInt8("foo"), @@ -131,4 +131,3 @@ def handle_exc(self, msg = None): print(msg) pdb.post_mortem(sys.exc_info()[2]) print("=" * 80) - diff --git a/elftools/construct/lib/binary.py b/elftools/construct/lib/binary.py index c73b887b..3efef0d7 100644 --- a/elftools/construct/lib/binary.py +++ b/elftools/construct/lib/binary.py @@ -28,8 +28,8 @@ def int_to_bin(number, width=32): _bit_values = { - 0: 0, - 1: 1, + 0: 0, + 1: 1, 48: 0, # '0' 49: 1, # '1' @@ -90,7 +90,7 @@ def swap_bytes(bits, bytesize=8): def encode_bin(data): - """ + """ Create a binary representation of the given b'' object. Assume 8-bit ASCII. Example: @@ -101,7 +101,7 @@ def encode_bin(data): def decode_bin(data): - """ + """ Locical opposite of decode_bin. """ if len(data) & 7: @@ -115,4 +115,3 @@ def decode_bin(data): i += 8 j += 1 return b"".join(chars) - diff --git a/elftools/construct/lib/container.py b/elftools/construct/lib/container.py index 2f89b2dc..5a580fac 100644 --- a/elftools/construct/lib/container.py +++ b/elftools/construct/lib/container.py @@ -2,8 +2,8 @@ Various containers. """ -from collections import MutableMapping from pprint import pformat +from .py3compat import MutableMapping def recursion_lock(retval, lock_name = "__recursion_lock__"): def decorator(func): diff --git a/elftools/construct/lib/hex.py b/elftools/construct/lib/hex.py index e378e228..b830644a 100644 --- a/elftools/construct/lib/hex.py +++ b/elftools/construct/lib/hex.py @@ -34,11 +34,10 @@ def __init__(self, data, linesize = 16): def __new__(cls, data, *args, **kwargs): return bytes.__new__(cls, data) - + def __str__(self): if not self: return "''" sep = "\n" return sep + sep.join( hexdump(self, self.linesize)) - diff --git a/elftools/construct/lib/py3compat.py b/elftools/construct/lib/py3compat.py index 4a52c293..16e12979 100644 --- a/elftools/construct/lib/py3compat.py +++ b/elftools/construct/lib/py3compat.py @@ -6,6 +6,11 @@ import sys PY3 = sys.version_info[0] == 3 +try: + from collections.abc import MutableMapping # python >= 3.3 +except ImportError: + from collections import MutableMapping # python < 3.3 + if PY3: import io @@ -41,7 +46,7 @@ def decodebytes(b, encoding): return bytes(b, encoding) advance_iterator = next - + else: import cStringIO StringIO = BytesIO = cStringIO.StringIO @@ -67,4 +72,3 @@ def decodebytes(b, encoding): def advance_iterator(it): return it.next() - diff --git a/elftools/dwarf/abbrevtable.py b/elftools/dwarf/abbrevtable.py index 36f6d2a0..6d29d5cf 100644 --- a/elftools/dwarf/abbrevtable.py +++ b/elftools/dwarf/abbrevtable.py @@ -33,7 +33,7 @@ def get_abbrev(self, code): """ Get the AbbrevDecl for a given code. Raise KeyError if no declaration for this code exists. """ - return AbbrevDecl(code, self._abbrev_map[code]) + return self._abbrev_map[code] def _parse_abbrev_table(self): """ Parse the abbrev table from the stream @@ -49,7 +49,7 @@ def _parse_abbrev_table(self): declaration = struct_parse( struct=self.structs.Dwarf_abbrev_declaration, stream=self.stream) - map[decl_code] = declaration + map[decl_code] = AbbrevDecl(decl_code, declaration) return map diff --git a/elftools/dwarf/aranges.py b/elftools/dwarf/aranges.py index ad5d48ab..c8bc8604 100644 --- a/elftools/dwarf/aranges.py +++ b/elftools/dwarf/aranges.py @@ -49,7 +49,10 @@ def cu_offset_at_addr(self, addr): 'offset' refers to the offset in the .debug_info section. """ tup = self.entries[bisect_right(self.keys, addr) - 1] - return tup.info_offset + if tup.begin_addr <= addr < tup.begin_addr + tup.length: + return tup.info_offset + else: + return None #------ PRIVATE ------# @@ -64,11 +67,11 @@ def _get_entries(self): while offset < self.size : aranges_header = struct_parse(self.structs.Dwarf_aranges_header, self.stream, offset) - + # End of useful data in the section if not aranges_header["address_size"]: break - + addr_size = self._get_addr_size_struct(aranges_header["address_size"]) # No segmentation diff --git a/elftools/dwarf/callframe.py b/elftools/dwarf/callframe.py index bcef78d7..8b3ec5c7 100644 --- a/elftools/dwarf/callframe.py +++ b/elftools/dwarf/callframe.py @@ -141,6 +141,14 @@ def _parse_entry_at(self, offset): else: cie = self._parse_cie_for_fde(offset, header, entry_structs) aug_bytes = self._read_augmentation_data(entry_structs) + lsda_encoding = cie.augmentation_dict.get('LSDA_encoding', DW_EH_encoding_flags['DW_EH_PE_omit']) + if lsda_encoding != DW_EH_encoding_flags['DW_EH_PE_omit']: + # parse LSDA pointer + lsda_pointer = self._parse_lsda_pointer(entry_structs, + self.stream.tell() - len(aug_bytes), + lsda_encoding) + else: + lsda_pointer = None # For convenience, compute the end offset for this entry end_offset = ( @@ -163,8 +171,10 @@ def _parse_entry_at(self, offset): cie = self._parse_cie_for_fde(offset, header, entry_structs) self._entry_cache[offset] = FDE( header=header, instructions=instructions, offset=offset, + structs=entry_structs, cie=cie, augmentation_bytes=aug_bytes, - structs=entry_structs, cie=cie) + lsda_pointer=lsda_pointer, + ) return self._entry_cache[offset] def _parse_instructions(self, structs, offset, end_offset): @@ -224,6 +234,8 @@ def _parse_instructions(self, structs, offset, end_offset): args = [ struct_parse(structs.Dwarf_uleb128(''), self.stream), struct_parse(structs.Dwarf_sleb128(''), self.stream)] + elif opcode == DW_CFA_GNU_args_size: + args = [struct_parse(structs.Dwarf_uleb128(''), self.stream)] else: dwarf_assert(False, 'Unknown CFI opcode: 0x%x' % opcode) @@ -321,6 +333,37 @@ def _read_augmentation_data(self, entry_structs): self.stream)['length'] return self.stream.read(augmentation_data_length) + def _parse_lsda_pointer(self, structs, stream_offset, encoding): + """ Parse bytes to get an LSDA pointer. + + The basic encoding (lower four bits of the encoding) describes how the values are encoded in a CIE or an FDE. + The modifier (upper four bits of the encoding) describes how the raw values, after decoded using a basic + encoding, should be modified before using. + + Ref: https://www.airs.com/blog/archives/460 + """ + assert encoding != DW_EH_encoding_flags['DW_EH_PE_omit'] + basic_encoding = encoding & 0x0f + modifier = encoding & 0xf0 + + formats = self._eh_encoding_to_field(structs) + + ptr = struct_parse( + Struct('Augmentation_Data', + formats[basic_encoding]('LSDA_pointer')), + self.stream, stream_pos=stream_offset)['LSDA_pointer'] + + if modifier == DW_EH_encoding_flags['DW_EH_PE_absptr']: + pass + + elif modifier == DW_EH_encoding_flags['DW_EH_PE_pcrel']: + ptr += self.address + stream_offset + + else: + assert False, 'Unsupported encoding modifier for LSDA pointer: {:#x}'.format(modifier) + + return ptr + def _parse_fde_header(self, entry_structs, offset): """ Compute a struct to parse the header of the current FDE. """ @@ -367,7 +410,8 @@ def _parse_fde_header(self, entry_structs, offset): return result - def _eh_encoding_to_field(self, entry_structs): + @staticmethod + def _eh_encoding_to_field(entry_structs): """ Return a mapping from basic encodings (DW_EH_encoding_flags) the corresponding field constructors (for instance @@ -375,9 +419,7 @@ def _eh_encoding_to_field(self, entry_structs): """ return { DW_EH_encoding_flags['DW_EH_PE_absptr']: - entry_structs.Dwarf_uint32 - if entry_structs.dwarf_format == 32 else - entry_structs.Dwarf_uint64, + entry_structs.Dwarf_target_addr, DW_EH_encoding_flags['DW_EH_PE_uleb128']: entry_structs.Dwarf_uleb128, DW_EH_encoding_flags['DW_EH_PE_udata2']: @@ -436,14 +478,14 @@ class CFIEntry(object): http://www.airs.com/blog/archives/460. """ def __init__(self, header, structs, instructions, offset, - augmentation_dict={}, augmentation_bytes=b'', cie=None): + augmentation_dict=None, augmentation_bytes=b'', cie=None): self.header = header self.structs = structs self.instructions = instructions self.offset = offset self.cie = cie self._decoded_table = None - self.augmentation_dict = augmentation_dict + self.augmentation_dict = augmentation_dict if augmentation_dict else {} self.augmentation_bytes = augmentation_bytes def get_decoded(self): @@ -467,7 +509,7 @@ def _decode_CFI_table(self): if isinstance(self, CIE): # For a CIE, initialize cur_line to an "empty" line cie = self - cur_line = dict(pc=0, cfa=None) + cur_line = dict(pc=0, cfa=CFARule(reg=None, offset=0)) reg_order = [] else: # FDE # For a FDE, we need to decode the attached CIE first, because its @@ -479,7 +521,7 @@ def _decode_CFI_table(self): last_line_in_CIE = copy.copy(cie_decoded_table.table[-1]) cur_line = copy.copy(last_line_in_CIE) else: - cur_line = dict(cfa=None) + cur_line = dict(cfa=CFARule(reg=None, offset=0)) cur_line['pc'] = self['initial_location'] reg_order = copy.copy(cie_decoded_table.reg_order) @@ -490,7 +532,9 @@ def _decode_CFI_table(self): line_stack = [] def _add_to_order(regnum): - if regnum not in cur_line: + # DW_CFA_restore and others remove registers from cur_line, + # but they stay in reg_order. Avoid duplicates. + if regnum not in reg_order: reg_order.append(regnum) for instr in self.instructions: @@ -575,7 +619,7 @@ def _add_to_order(regnum): # The current line is appended to the table after all instructions # have ended, if there were instructions. - if cur_line['cfa'] is not None or len(cur_line) > 2: + if cur_line['cfa'].reg is not None or len(cur_line) > 2: table.append(cur_line) return DecodedCallFrameTable(table=table, reg_order=reg_order) @@ -591,7 +635,9 @@ class CIE(CFIEntry): class FDE(CFIEntry): - pass + def __init__(self, header, structs, instructions, offset, augmentation_bytes=None, cie=None, lsda_pointer=None): + super(FDE, self).__init__(header, structs, instructions, offset, augmentation_bytes=augmentation_bytes, cie=cie) + self.lsda_pointer = lsda_pointer class ZERO(object): diff --git a/elftools/dwarf/compileunit.py b/elftools/dwarf/compileunit.py index 8b4030f4..eb66c571 100644 --- a/elftools/dwarf/compileunit.py +++ b/elftools/dwarf/compileunit.py @@ -6,7 +6,9 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- +from bisect import bisect_right from .die import DIE +from ..common.utils import dwarf_assert class CompileUnit(object): @@ -53,8 +55,16 @@ def __init__(self, header, dwarfinfo, structs, cu_offset, cu_die_offset): # requested. self._abbrev_table = None - # A list of DIEs belonging to this CU. Lazily parsed. + # A list of DIEs belonging to this CU. + # This list is lazily constructed as DIEs are iterated over. self._dielist = [] + # A list of file offsets, corresponding (by index) to the DIEs + # in `self._dielist`. This list exists separately from + # `self._dielist` to make it binary searchable, enabling the + # DIE population strategy used in `iter_DIE_children`. + # Like `self._dielist`, this list is lazily constructed + # as DIEs are iterated over. + self._diemap = [] def dwarf_format(self): """ Get the DWARF format (32 or 64) for this CU @@ -73,14 +83,94 @@ def get_top_DIE(self): """ Get the top DIE (which is either a DW_TAG_compile_unit or DW_TAG_partial_unit) of this CU """ - return self._get_DIE(0) + + # Note that a top DIE always has minimal offset and is therefore + # at the beginning of our lists, so no bisect is required. + if len(self._diemap) > 0: + return self._dielist[0] + + top = DIE( + cu=self, + stream=self.dwarfinfo.debug_info_sec.stream, + offset=self.cu_die_offset) + + self._dielist.insert(0, top) + self._diemap.insert(0, self.cu_die_offset) + + return top + + @property + def size(self): + return self['unit_length'] + self.structs.initial_length_field_size() + + def get_DIE_from_refaddr(self, refaddr): + """ Obtain a DIE contained in this CU from a reference. + + refaddr: + The offset into the .debug_info section, which must be + contained in this CU or a DWARFError will be raised. + + When using a reference class attribute with a form that is + relative to the compile unit, add unit add the compile unit's + .cu_addr before calling this function. + """ + # All DIEs are after the cu header and within the unit + dwarf_assert( + self.cu_die_offset <= refaddr < self.cu_offset + self.size, + 'refaddr %s not in DIE range of CU %s' % (refaddr, self.cu_offset)) + + return self._get_cached_DIE(refaddr) def iter_DIEs(self): """ Iterate over all the DIEs in the CU, in order of their appearance. Note that null DIEs will also be returned. """ - self._parse_DIEs() - return iter(self._dielist) + return self._iter_DIE_subtree(self.get_top_DIE()) + + def iter_DIE_children(self, die): + """ Given a DIE, yields either its children, without null DIE list + terminator, or nothing, if that DIE has no children. + + The null DIE terminator is saved in that DIE when iteration ended. + """ + if not die.has_children: + return + + # `cur_offset` tracks the stream offset of the next DIE to yield + # as we iterate over our children, + cur_offset = die.offset + die.size + + while True: + child = self._get_cached_DIE(cur_offset) + + child.set_parent(die) + + if child.is_null(): + die._terminator = child + return + + yield child + + if not child.has_children: + cur_offset += child.size + elif "DW_AT_sibling" in child.attributes: + sibling = child.attributes["DW_AT_sibling"] + cur_offset = sibling.value + self.cu_offset + else: + # If no DW_AT_sibling attribute is provided by the producer + # then the whole child subtree must be parsed to find its next + # sibling. There is one zero byte representing null DIE + # terminating children list. It is used to locate child subtree + # bounds. + + # If children are not parsed yet, this instruction will manage + # to recursive call of this function which will result in + # setting of `_terminator` attribute of the `child`. + if child._terminator is None: + for _ in self.iter_DIE_children(child): + pass + + cur_offset = child._terminator.offset + child._terminator.size #------ PRIVATE ------# @@ -89,64 +179,48 @@ def __getitem__(self, name): """ return self.header[name] - def _get_DIE(self, index): - """ Get the DIE at the given index - """ - self._parse_DIEs() - return self._dielist[index] - - def _parse_DIEs(self): - """ Parse all the DIEs pertaining to this CU from the stream and shove - them sequentially into self._dielist. - Also set the child/sibling/parent links in the DIEs according - (unflattening the prefix-order of the DIE tree). + def _iter_DIE_subtree(self, die): + """ Given a DIE, this yields it with its subtree including null DIEs + (child list terminators). """ - if len(self._dielist) > 0: - return + yield die + if die.has_children: + for c in die.iter_children(): + for d in self._iter_DIE_subtree(c): + yield d + yield die._terminator + + def _get_cached_DIE(self, offset): + """ Given a DIE offset, look it up in the cache. If not present, + parse the DIE and insert it into the cache. - # Compute the boundary (one byte past the bounds) of this CU in the - # stream - cu_boundary = ( self.cu_offset + - self['unit_length'] + - self.structs.initial_length_field_size()) - - # First pass: parse all DIEs and place them into self._dielist - die_offset = self.cu_die_offset - while die_offset < cu_boundary: - die = DIE( - cu=self, - stream=self.dwarfinfo.debug_info_sec.stream, - offset=die_offset) - self._dielist.append(die) - die_offset += die.size - - # Second pass - unflatten the DIE tree - self._unflatten_tree() - - def _unflatten_tree(self): - """ "Unflatten" the DIE tree from it serial representation, by setting - the child/sibling/parent links of DIEs. - - Assumes self._dielist was already populated by a linear list of DIEs - read from the stream section + offset: + The offset of the DIE in the debug_info section to retrieve. + + The stream reference is copied from the top DIE. The top die will + also be parsed and cached if needed. + + See also get_DIE_from_refaddr(self, refaddr). """ - # the first DIE in the list is the root node - root = self._dielist[0] - parentstack = [root] - - for die in self._dielist[1:]: - if not die.is_null(): - cur_parent = parentstack[-1] - # This DIE is a child of the current parent - cur_parent.add_child(die) - die.set_parent(cur_parent) - if die.has_children: - parentstack.append(die) - else: - # parentstack should not be really empty here. However, some - # compilers generate DWARF that has extra NULLs in the end and - # we don't want pyelftools to fail parsing them just because of - # this. - if len(parentstack) > 0: - # end of children for the current parent - parentstack.pop() + # The top die must be in the cache if any DIE is in the cache. + # The stream is the same for all DIEs in this CU, so populate + # the top DIE and obtain a reference to its stream. + top_die_stream = self.get_top_DIE().stream + + # `offset` is the offset in the stream of the DIE we want to return. + # The map is maintined as a parallel array to the list. We call + # bisect each time to ensure new DIEs are inserted in the correct + # order within both `self._dielist` and `self._diemap`. + i = bisect_right(self._diemap, offset) + + # Note that `self._diemap` cannot be empty because a the top DIE + # was inserted by the call to .get_top_DIE(). Also it has the minimal + # offset, so the bisect_right insert point will always be at least 1. + if offset == self._diemap[i - 1]: + die = self._dielist[i - 1] + else: + die = DIE(cu=self, stream=top_die_stream, offset=offset) + self._dielist.insert(i, die) + self._diemap.insert(i, offset) + + return die diff --git a/elftools/dwarf/constants.py b/elftools/dwarf/constants.py index e2072b0f..65420958 100644 --- a/elftools/dwarf/constants.py +++ b/elftools/dwarf/constants.py @@ -37,6 +37,23 @@ DW_LANG_UPC = 0x0012 DW_LANG_D = 0x0013 DW_LANG_Python = 0x0014 +DW_LANG_OpenCL = 0x0015 +DW_LANG_Go = 0x0016 +DW_LANG_Modula3 = 0x0017 +DW_LANG_Haskell = 0x0018 +DW_LANG_C_plus_plus_03 = 0x0019 +DW_LANG_C_plus_plus_11 = 0x001a +DW_LANG_OCaml = 0x001b +DW_LANG_Rust = 0x001c +DW_LANG_C11 = 0x001d +DW_LANG_Swift = 0x001e +DW_LANG_Julia = 0x001f +DW_LANG_Dylan = 0x0020 +DW_LANG_C_plus_plus_14 = 0x0021 +DW_LANG_Fortran03 = 0x0022 +DW_LANG_Fortran08 = 0x0023 +DW_LANG_RenderScript = 0x0024 +DW_LANG_BLISS = 0x0025 DW_LANG_Mips_Assembler = 0x8001 DW_LANG_Upc = 0x8765 DW_LANG_HP_Bliss = 0x8003 @@ -44,6 +61,8 @@ DW_LANG_HP_Pascal91 = 0x8005 DW_LANG_HP_IMacro = 0x8006 DW_LANG_HP_Assembler = 0x8007 +DW_LANG_GOOGLE_RenderScript = 0x8e57 +DW_LANG_BORLAND_Delphi = 0xb000 # Encoding @@ -65,6 +84,8 @@ DW_ATE_unsigned_fixed = 0xe DW_ATE_decimal_float = 0xf DW_ATE_UTF = 0x10 +DW_ATE_UCS = 0x11 +DW_ATE_ASCII = 0x12 DW_ATE_lo_user = 0x80 DW_ATE_hi_user = 0xff DW_ATE_HP_float80 = 0x80 @@ -135,7 +156,19 @@ DW_LNE_end_sequence = 0x01 DW_LNE_set_address = 0x02 DW_LNE_define_file = 0x03 +DW_LNE_set_discriminator = 0x04 +DW_LNE_lo_user = 0x80 +DW_LNE_hi_user = 0xff +# Line program header content types +# +DW_LNCT_path = 0x01 +DW_LNCT_directory_index = 0x02 +DW_LNCT_timestamp = 0x03 +DW_LNCT_size = 0x04 +DW_LNCT_MD5 = 0x05 +DW_LNCT_lo_user = 0x2000 +DW_LNCT_hi_user = 0x3fff # Call frame instructions # @@ -173,3 +206,19 @@ DW_CFA_val_offset = 0x14 DW_CFA_val_offset_sf = 0x15 DW_CFA_val_expression = 0x16 +DW_CFA_GNU_args_size = 0x2e + + +# Compilation unit types +# +# DWARFv5 introduces the "unit_type" field to each CU header, allowing +# individual CUs to indicate whether they're complete, partial, and so forth. +# See DWARFv5 3.1 ("Unit Entries") and 7.5.1 ("Unit Headers"). +DW_UT_compile = 0x01 +DW_UT_type = 0x02 +DW_UT_partial = 0x03 +DW_UT_skeleton = 0x04 +DW_UT_split_compile = 0x05 +DW_UT_split_type = 0x06 +DW_UT_lo_user = 0x80 +DW_UT_hi_user = 0xff diff --git a/elftools/dwarf/descriptions.py b/elftools/dwarf/descriptions.py index eb20333f..ef6ac19f 100644 --- a/elftools/dwarf/descriptions.py +++ b/elftools/dwarf/descriptions.py @@ -9,7 +9,7 @@ from collections import defaultdict from .constants import * -from .dwarf_expr import GenericExprVisitor +from .dwarf_expr import DWARFExprParser from .die import DIE from ..common.utils import preserve_stream_pos, dwarf_assert from ..common.py3compat import bytes2str @@ -99,18 +99,17 @@ def _full_reg_name(regnum): s += ' %s: %s ofs %s\n' % ( name, _full_reg_name(instr.args[0]), instr.args[1] * cie['data_alignment_factor']) - elif name == 'DW_CFA_def_cfa_offset': + elif name in ('DW_CFA_def_cfa_offset', 'DW_CFA_GNU_args_size'): s += ' %s: %s\n' % (name, instr.args[0]) elif name == 'DW_CFA_def_cfa_expression': expr_dumper = ExprDumper(entry.structs) - expr_dumper.process_expr(instr.args[0]) # readelf output is missing a colon for DW_CFA_def_cfa_expression - s += ' %s (%s)\n' % (name, expr_dumper.get_str()) + s += ' %s (%s)\n' % (name, expr_dumper.dump_expr(instr.args[0])) elif name == 'DW_CFA_expression': expr_dumper = ExprDumper(entry.structs) - expr_dumper.process_expr(instr.args[1]) s += ' %s: %s (%s)\n' % ( - name, _full_reg_name(instr.args[0]), expr_dumper.get_str()) + name, _full_reg_name(instr.args[0]), + expr_dumper.dump_expr(instr.args[1])) else: s += ' %s: \n' % name @@ -133,7 +132,7 @@ def describe_CFI_CFA_rule(rule): return '%s%+d' % (describe_reg_name(rule.reg), rule.offset) -def describe_DWARF_expr(expr, structs): +def describe_DWARF_expr(expr, structs, cu_offset=None): """ Textual description of a DWARF expression encoded in 'expr'. structs should come from the entity encompassing the expression - it's needed to be able to parse it correctly. @@ -146,9 +145,7 @@ def describe_DWARF_expr(expr, structs): _DWARF_EXPR_DUMPER_CACHE[cache_key] = \ ExprDumper(structs) dwarf_expr_dumper = _DWARF_EXPR_DUMPER_CACHE[cache_key] - dwarf_expr_dumper.clear() - dwarf_expr_dumper.process_expr(expr) - return '(' + dwarf_expr_dumper.get_str() + ')' + return '(' + dwarf_expr_dumper.dump_expr(expr, cu_offset) + ')' def describe_reg_name(regnum, machine_arch=None, default=True): @@ -162,6 +159,8 @@ def describe_reg_name(regnum, machine_arch=None, default=True): return _REG_NAMES_x86[regnum] elif machine_arch == 'x64': return _REG_NAMES_x64[regnum] + elif machine_arch == 'AArch64': + return _REG_NAMES_AArch64[regnum] elif default: return 'r%s' % regnum else: @@ -314,7 +313,6 @@ def _describe_attr_block(attr, die, section_offset): DW_LANG_D: '(D)', DW_LANG_Python: '(Python)', DW_LANG_Mips_Assembler: '(MIPS assembler)', - DW_LANG_Upc: '(nified Parallel C)', DW_LANG_HP_Bliss: '(HP Bliss)', DW_LANG_HP_Basic91: '(HP Basic 91)', DW_LANG_HP_Pascal91: '(HP Pascal 91)', @@ -339,6 +337,7 @@ def _describe_attr_block(attr, die, section_offset): DW_ATE_edited: '(edited)', DW_ATE_signed_fixed: '(signed_fixed)', DW_ATE_unsigned_fixed: '(unsigned_fixed)', + DW_ATE_UTF: '(unicode string)', DW_ATE_HP_float80: '(HP_float80)', DW_ATE_HP_complex_float80: '(HP_complex_float80)', DW_ATE_HP_float128: '(HP_float128)', @@ -425,7 +424,7 @@ def _location_list_extra(attr, die, section_offset): if attr.form in ('DW_FORM_data4', 'DW_FORM_data8', 'DW_FORM_sec_offset'): return '(location list)' else: - return describe_DWARF_expr(attr.value, die.cu.structs) + return describe_DWARF_expr(attr.value, die.cu.structs, die.cu.cu_offset) def _data_member_location_extra(attr, die, section_offset): @@ -438,7 +437,7 @@ def _data_member_location_extra(attr, die, section_offset): elif attr.form == 'DW_FORM_sdata': return str(attr.value) else: - return describe_DWARF_expr(attr.value, die.cu.structs) + return describe_DWARF_expr(attr.value, die.cu.structs, die.cu.cu_offset) def _import_extra(attr, die, section_offset): @@ -531,46 +530,78 @@ def _import_extra(attr, die, section_offset): 'mxcsr', 'fcw', 'fsw' ] +# https://developer.arm.com/documentation/ihi0057/e/?lang=en#dwarf-register-names +_REG_NAMES_AArch64 = [ + 'x0', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', + 'x8', 'x9', 'x10', 'x11', 'x12', 'x13', 'x14', 'x15', + 'x16', 'x17', 'x18', 'x19', 'x20', 'x21', 'x22', 'x23', + 'x24', 'x25', 'x26', 'x27', 'x28', 'x29', 'x30', 'sp', + '', 'ELR_mode', 'RA_SIGN_STATE', '', '', '', '', '', + '', '', '', '', '', '', 'VG', 'FFR', + 'p0', 'p1', 'p2', 'p3', 'p4', 'p5', 'p6', 'p7', + 'p8', 'p9', 'p10', 'p11', 'p12', 'p13', 'p14', 'p15', + 'v0', 'v1', 'v2', 'v3', 'v4', 'v5', 'v6', 'v7', + 'v8', 'v9', 'v10', 'v11', 'v12', 'v13', 'v14', 'v15', + 'v16', 'v17', 'v18', 'v19', 'v20', 'v21', 'v22', 'v23', + 'v24', 'v25', 'v26', 'v27', 'v28', 'v29', 'v30', 'v31', + 'z0', 'z1', 'z2', 'z3', 'z4', 'z5', 'z6', 'z7', + 'z8', 'z9', 'z10', 'z11', 'z12', 'z13', 'z14', 'z15', + 'z16', 'z17', 'z18', 'z19', 'z20', 'z21', 'z22', 'z23', + 'z24', 'z25', 'z26', 'z27', 'z28', 'z29', 'z30', 'z31' +] + -class ExprDumper(GenericExprVisitor): - """ A concrete visitor for DWARF expressions that dumps a textual +class ExprDumper(object): + """ A dumper for DWARF expressions that dumps a textual representation of the complete expression. - Usage: after creation, call process_expr, and then get_str for a - semicolon-delimited string representation of the decoded expression. + Usage: after creation, call dump_expr repeatedly - it's stateless. """ def __init__(self, structs): - super(ExprDumper, self).__init__(structs) + self.structs = structs + self.expr_parser = DWARFExprParser(self.structs) self._init_lookups() - self._str_parts = [] - - def clear(self): - self._str_parts = [] - def get_str(self): - return '; '.join(self._str_parts) + def dump_expr(self, expr, cu_offset=None): + """ Parse and dump a DWARF expression. expr should be a list of + (integer) byte values. cu_offset is the cu_offset + value from the CU object where the expression resides. + Only affects a handful of GNU opcodes, if None is provided, + that's not a crash condition, only the expression dump will + not be consistent of that of readelf. + + Returns a string representing the expression. + """ + parsed = self.expr_parser.parse_expr(expr) + s = [] + for deo in parsed: + s.append(self._dump_to_string(deo.op, deo.op_name, deo.args, cu_offset)) + return '; '.join(s) def _init_lookups(self): self._ops_with_decimal_arg = set([ 'DW_OP_const1u', 'DW_OP_const1s', 'DW_OP_const2u', 'DW_OP_const2s', - 'DW_OP_const4u', 'DW_OP_const4s', 'DW_OP_constu', 'DW_OP_consts', - 'DW_OP_pick', 'DW_OP_plus_uconst', 'DW_OP_bra', 'DW_OP_skip', - 'DW_OP_fbreg', 'DW_OP_piece', 'DW_OP_deref_size', - 'DW_OP_xderef_size', 'DW_OP_regx',]) + 'DW_OP_const4u', 'DW_OP_const4s', 'DW_OP_const8u', 'DW_OP_const8s', + 'DW_OP_constu', 'DW_OP_consts', 'DW_OP_pick', 'DW_OP_plus_uconst', + 'DW_OP_bra', 'DW_OP_skip', 'DW_OP_fbreg', 'DW_OP_piece', + 'DW_OP_deref_size', 'DW_OP_xderef_size', 'DW_OP_regx',]) for n in range(0, 32): self._ops_with_decimal_arg.add('DW_OP_breg%s' % n) - self._ops_with_two_decimal_args = set([ - 'DW_OP_const8u', 'DW_OP_const8s', 'DW_OP_bregx', 'DW_OP_bit_piece']) + self._ops_with_two_decimal_args = set(['DW_OP_bregx', 'DW_OP_bit_piece']) self._ops_with_hex_arg = set( ['DW_OP_addr', 'DW_OP_call2', 'DW_OP_call4', 'DW_OP_call_ref']) - def _after_visit(self, opcode, opcode_name, args): - self._str_parts.append(self._dump_to_string(opcode, opcode_name, args)) + def _dump_to_string(self, opcode, opcode_name, args, cu_offset=None): + # Some GNU ops contain an offset from the current CU as an argument, + # but readelf emits those ops with offset from the info section + # so we need the base offset of the parent CU. + # If omitted, arguments on some GNU opcodes will be off. + if cu_offset is None: + cu_offset = 0 - def _dump_to_string(self, opcode, opcode_name, args): if len(args) == 0: if opcode_name.startswith('DW_OP_reg'): regnum = int(opcode_name[9:]) @@ -598,5 +629,21 @@ def _dump_to_string(self, opcode, opcode_name, args): return '%s: %x' % (opcode_name, args[0]) elif opcode_name in self._ops_with_two_decimal_args: return '%s: %s %s' % (opcode_name, args[0], args[1]) + elif opcode_name == 'DW_OP_GNU_entry_value': + return '%s: (%s)' % (opcode_name, ','.join([self._dump_to_string(deo.op, deo.op_name, deo.args) for deo in args[0]])) + elif opcode_name == 'DW_OP_implicit_value': + return "%s %s byte block: %s" % (opcode_name, len(args[0]), ''.join(["%x " % b for b in args[0]])) + elif opcode_name == 'DW_OP_GNU_parameter_ref': + return "%s: <0x%x>" % (opcode_name, args[0] + cu_offset) + elif opcode_name == 'DW_OP_GNU_implicit_pointer': + return "%s: <0x%x> %d" % (opcode_name, args[0], args[1]) + elif opcode_name == 'DW_OP_GNU_convert': + return "%s <0x%x>" % (opcode_name, args[0] + cu_offset) + elif opcode_name == 'DW_OP_GNU_deref_type': + return "%s: %d <0x%x>" % (opcode_name, args[0], args[1] + cu_offset) + elif opcode_name == 'DW_OP_GNU_const_type': + return "%s: <0x%x> %d byte block: %s " % (opcode_name, args[0] + cu_offset, len(args[1]), ' '.join("%x" % b for b in args[1])) + elif opcode_name == 'DW_OP_GNU_regval_type': + return "%s: %d (%s) <0x%x>" % (opcode_name, args[0], describe_reg_name(args[0], _MACHINE_ARCH), args[1] + cu_offset) else: return '' % opcode_name diff --git a/elftools/dwarf/die.py b/elftools/dwarf/die.py index 184ff8cc..810cef49 100755 --- a/elftools/dwarf/die.py +++ b/elftools/dwarf/die.py @@ -86,7 +86,9 @@ def __init__(self, cu, stream, offset): self.has_children = None self.abbrev_code = None self.size = 0 - self._children = [] + # Null DIE terminator. It can be used to obtain offset range occupied + # by this DIE including its whole subtree. + self._terminator = None self._parent = None self._parse_DIE() @@ -96,10 +98,34 @@ def is_null(self): """ return self.tag is None + def get_DIE_from_attribute(self, name): + """ Return the DIE referenced by the named attribute of this DIE. + The attribute must be in the reference attribute class. + + name: + The name of the attribute in the reference class. + """ + attr = self.attributes[name] + if attr.form in ('DW_FORM_ref1', 'DW_FORM_ref2', 'DW_FORM_ref4', + 'DW_FORM_ref8', 'DW_FORM_ref'): + refaddr = self.cu.cu_offset + attr.raw_value + return self.cu.get_DIE_from_refaddr(refaddr) + elif attr.form in ('DW_FORM_ref_addr'): + return self.cu.dwarfinfo.get_DIE_from_refaddr(attr.raw_value) + elif attr.form in ('DW_FORM_ref_sig8'): + # Implement search type units for matching signature + raise NotImplementedError('%s (type unit by signature)' % attr.form) + elif attr.form in ('DW_FORM_ref_sup4', 'DW_FORM_ref_sup8'): + raise NotImplementedError('%s to dwo' % attr.form) + else: + raise DWARFError('%s is not a reference class form attribute' % attr) + def get_parent(self): - """ The parent DIE of this DIE. None if the DIE has no parent (i.e. a - top-level DIE). + """ Return the parent DIE of this DIE, or None if the DIE has no + parent (i.e. is a top-level DIE). """ + if self._parent is None: + self._search_ancestor_offspring() return self._parent def get_full_path(self): @@ -117,15 +143,16 @@ def get_full_path(self): return os.path.join(comp_dir, fname) def iter_children(self): - """ Yield all children of this DIE + """ Iterates all children of this DIE """ - return iter(self._children) + return self.cu.iter_DIE_children(self) def iter_siblings(self): """ Yield all siblings of this DIE """ - if self._parent: - for sibling in self._parent.iter_children(): + parent = self.get_parent() + if parent: + for sibling in parent.iter_children(): if sibling is not self: yield sibling else: @@ -134,14 +161,48 @@ def iter_siblings(self): # The following methods are used while creating the DIE and should not be # interesting to consumers # - def add_child(self, die): - self._children.append(die) def set_parent(self, die): self._parent = die #------ PRIVATE ------# + def _search_ancestor_offspring(self): + """ Search our ancestors identifying their offspring to find our parent. + + DIEs are stored as a flattened tree. The top DIE is the ancestor + of all DIEs in the unit. Each parent is guaranteed to be at + an offset less than their children. In each generation of children + the sibling with the closest offset not greater than our offset is + our ancestor. + """ + # This code is called when get_parent notices that the _parent has + # not been identified. To avoid execution for each sibling record all + # the children of any parent iterated. Assuming get_parent will also be + # called for siblings, it is more efficient if siblings references are + # provided and no worse than a single walk if they are missing, while + # stopping iteration early could result in O(n^2) walks. + search = self.cu.get_top_DIE() + while search.offset < self.offset: + prev = search + for child in search.iter_children(): + child.set_parent(search) + if child.offset <= self.offset: + prev = child + + # We also need to check the offset of the terminator DIE + if search.has_children and search._terminator.offset <= self.offset: + prev = search._terminator + + # If we didn't find a closer parent, give up, don't loop. + # Either we mis-parsed an ancestor or someone created a DIE + # by an offset that was not actually the start of a DIE. + if prev is search: + raise ValueError("offset %s not in CU %s DIE tree" % + (self.offset, self.cu.cu_offset)) + + search = prev + def __repr__(self): s = 'DIE %s, size=%s, has_children=%s\n' % ( self.tag, self.size, self.has_children) @@ -170,19 +231,24 @@ def _parse_DIE(self): self.size = self.stream.tell() - self.offset return - with preserve_stream_pos(self.stream): - abbrev_decl = self.cu.get_abbrev_table().get_abbrev( - self.abbrev_code) + abbrev_decl = self.cu.get_abbrev_table().get_abbrev(self.abbrev_code) self.tag = abbrev_decl['tag'] self.has_children = abbrev_decl.has_children() # Guided by the attributes listed in the abbreviation declaration, parse # values from the stream. - for name, form in abbrev_decl.iter_attr_specs(): + for spec in abbrev_decl['attr_spec']: + form = spec.form + name = spec.name attr_offset = self.stream.tell() - raw_value = struct_parse(structs.Dwarf_dw_form[form], self.stream) - - value = self._translate_attr_value(form, raw_value) + # Special case here: the attribute value is stored in the attribute + # definition in the abbreviation spec, not in the DIE itself. + if form == 'DW_FORM_implicit_const': + value = spec.value + raw_value = value + else: + raw_value = struct_parse(structs.Dwarf_dw_form[form], self.stream) + value = self._translate_attr_value(form, raw_value) self.attributes[name] = AttributeValue( name=name, form=form, @@ -190,19 +256,6 @@ def _parse_DIE(self): raw_value=raw_value, offset=attr_offset) - # Count and then consume any null termination bytes to avoid wrong die - # size calculation. - num_zero_terminators = 0 - with preserve_stream_pos(self.stream): - while True: - if self.stream.read(1) == 0: - num_zero_terminators += 1 - else: - break - if num_zero_terminators > 0: - # There was at least one zero termination -> consume all of them. - self.stream.read(num_zero_terminators) - self.size = self.stream.tell() - self.offset def _translate_attr_value(self, form, raw_value): @@ -212,8 +265,13 @@ def _translate_attr_value(self, form, raw_value): if form == 'DW_FORM_strp': with preserve_stream_pos(self.stream): value = self.dwarfinfo.get_string_from_table(raw_value) + elif form == 'DW_FORM_line_strp': + with preserve_stream_pos(self.stream): + value = self.dwarfinfo.get_string_from_linetable(raw_value) elif form == 'DW_FORM_flag': value = not raw_value == 0 + elif form == 'DW_FORM_flag_present': + value = True elif form == 'DW_FORM_indirect': try: form = DW_FORM_raw2name[raw_value] diff --git a/elftools/dwarf/dwarf_expr.py b/elftools/dwarf/dwarf_expr.py index 270a7816..07c6fa10 100644 --- a/elftools/dwarf/dwarf_expr.py +++ b/elftools/dwarf/dwarf_expr.py @@ -6,8 +6,10 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- +from collections import namedtuple + from ..common.py3compat import BytesIO, iteritems -from ..common.utils import struct_parse, bytelist2string +from ..common.utils import struct_parse, bytelist2string, read_blob # DWARF expression opcodes. name -> opcode mapping @@ -68,6 +70,28 @@ DW_OP_form_tls_address=0x9b, DW_OP_call_frame_cfa=0x9c, DW_OP_bit_piece=0x9d, + DW_OP_implicit_value=0x9e, + DW_OP_stack_value=0x9f, + DW_OP_implicit_pointer=0xa0, + DW_OP_addrx=0xa1, + DW_OP_constx=0xa2, + DW_OP_entry_value=0xa3, + DW_OP_const_type=0xa4, + DW_OP_regval_type=0xa5, + DW_OP_deref_type=0xa6, + DW_OP_xderef_type=0xa7, + DW_OP_convert=0xa8, + DW_OP_reinterpret=0xa9, + DW_OP_lo_user=0xe0, + DW_OP_GNU_push_tls_address=0xe0, + DW_OP_GNU_implicit_pointer=0xf2, + DW_OP_GNU_entry_value=0xf3, + DW_OP_GNU_const_type=0xf4, + DW_OP_GNU_regval_type=0xf5, + DW_OP_GNU_deref_type=0xf6, + DW_OP_GNU_convert=0xf7, + DW_OP_GNU_parameter_ref=0xfa, + DW_OP_hi_user=0xff, ) def _generate_dynamic_values(map, prefix, index_start, index_end, value_start): @@ -88,170 +112,146 @@ def _generate_dynamic_values(map, prefix, index_start, index_end, value_start): DW_OP_opcode2name = dict((v, k) for k, v in iteritems(DW_OP_name2opcode)) -class GenericExprVisitor(object): - """ A DWARF expression is a sequence of instructions encoded in a block - of bytes. This class decodes the sequence into discrete instructions - with their arguments and allows generic "visiting" to process them. +# Each parsed DWARF expression is returned as this type with its numeric opcode, +# op name (as a string) and a list of arguments. +DWARFExprOp = namedtuple('DWARFExprOp', 'op op_name args') + - Usage: subclass this class, and override the needed methods. The - easiest way would be to just override _after_visit, which gets passed - each decoded instruction (with its arguments) in order. Clients of - the visitor then just execute process_expr. The subclass can keep - its own internal information updated in _after_visit and provide - methods to extract it. For a good example of this usage, see the - ExprDumper class in the descriptions module. +class DWARFExprParser(object): + """DWARF expression parser. - A more complex usage could be to override visiting methods for - specific instructions, by placing them into the dispatch table. + When initialized, requires structs to cache a dispatch table. After that, + parse_expr can be called repeatedly - it's stateless. """ + def __init__(self, structs): - self.structs = structs - self._init_dispatch_table() - self.stream = None - self._cur_opcode = None - self._cur_opcode_name = None - self._cur_args = [] - - def process_expr(self, expr): - """ Process (visit) a DWARF expression. expr should be a list of - (integer) byte values. + self._dispatch_table = _init_dispatch_table(structs) + + def parse_expr(self, expr): + """ Parses expr (a list of integers) into a list of DWARFExprOp. + + The list can potentially be nested. """ - self.stream = BytesIO(bytelist2string(expr)) + stream = BytesIO(bytelist2string(expr)) + parsed = [] while True: # Get the next opcode from the stream. If nothing is left in the # stream, we're done. - byte = self.stream.read(1) + byte = stream.read(1) if len(byte) == 0: break - # Decode the opcode and its name - self._cur_opcode = ord(byte) - self._cur_opcode_name = DW_OP_opcode2name.get( - self._cur_opcode, 'OP:0x%x' % self._cur_opcode) - # Will be filled in by visitors - self._cur_args = [] - - # Dispatch to a visitor function - visitor = self._dispatch_table.get( - self._cur_opcode, - self._default_visitor) - visitor(self._cur_opcode, self._cur_opcode_name) - - # Finally call the post-visit function - self._after_visit( - self._cur_opcode, self._cur_opcode_name, self._cur_args) - - def _after_visit(self, opcode, opcode_name, args): - pass - - def _default_visitor(self, opcode, opcode_name): - pass - - def _visit_OP_with_no_args(self, opcode, opcode_name): - self._cur_args = [] - - def _visit_OP_addr(self, opcode, opcode_name): - self._cur_args = [ - struct_parse(self.structs.Dwarf_target_addr(''), self.stream)] - - def _make_visitor_arg_struct(self, struct_arg): - """ Create a visitor method for an opcode that that accepts a single - argument, specified by a struct. - """ - def visitor(opcode, opcode_name): - self._cur_args = [struct_parse(struct_arg, self.stream)] - return visitor + # Decode the opcode and its name. + op = ord(byte) + op_name = DW_OP_opcode2name.get(op, 'OP:0x%x' % op) - def _make_visitor_arg_struct2(self, struct_arg1, struct_arg2): - """ Create a visitor method for an opcode that that accepts two - arguments, specified by structs. - """ - def visitor(opcode, opcode_name): - self._cur_args = [ - struct_parse(struct_arg1, self.stream), - struct_parse(struct_arg2, self.stream)] - return visitor - - def _init_dispatch_table(self): - self._dispatch_table = {} - def add(opcode_name, func): - self._dispatch_table[DW_OP_name2opcode[opcode_name]] = func - - add('DW_OP_addr', self._visit_OP_addr) - add('DW_OP_const1u', - self._make_visitor_arg_struct(self.structs.Dwarf_uint8(''))) - add('DW_OP_const1s', - self._make_visitor_arg_struct(self.structs.Dwarf_int8(''))) - add('DW_OP_const2u', - self._make_visitor_arg_struct(self.structs.Dwarf_uint16(''))) - add('DW_OP_const2s', - self._make_visitor_arg_struct(self.structs.Dwarf_int16(''))) - add('DW_OP_const4u', - self._make_visitor_arg_struct(self.structs.Dwarf_uint32(''))) - add('DW_OP_const4s', - self._make_visitor_arg_struct(self.structs.Dwarf_int32(''))) - add('DW_OP_const8u', - self._make_visitor_arg_struct2( - self.structs.Dwarf_uint32(''), - self.structs.Dwarf_uint32(''))) - add('DW_OP_const8s', - self._make_visitor_arg_struct2( - self.structs.Dwarf_int32(''), - self.structs.Dwarf_int32(''))) - add('DW_OP_constu', - self._make_visitor_arg_struct(self.structs.Dwarf_uleb128(''))) - add('DW_OP_consts', - self._make_visitor_arg_struct(self.structs.Dwarf_sleb128(''))) - add('DW_OP_pick', - self._make_visitor_arg_struct(self.structs.Dwarf_uint8(''))) - add('DW_OP_plus_uconst', - self._make_visitor_arg_struct(self.structs.Dwarf_uleb128(''))) - add('DW_OP_bra', - self._make_visitor_arg_struct(self.structs.Dwarf_int16(''))) - add('DW_OP_skip', - self._make_visitor_arg_struct(self.structs.Dwarf_int16(''))) - - for opname in [ 'DW_OP_deref', 'DW_OP_dup', 'DW_OP_drop', 'DW_OP_over', - 'DW_OP_swap', 'DW_OP_swap', 'DW_OP_rot', 'DW_OP_xderef', - 'DW_OP_abs', 'DW_OP_and', 'DW_OP_div', 'DW_OP_minus', - 'DW_OP_mod', 'DW_OP_mul', 'DW_OP_neg', 'DW_OP_not', - 'DW_OP_plus', 'DW_OP_shl', 'DW_OP_shr', 'DW_OP_shra', - 'DW_OP_xor', 'DW_OP_eq', 'DW_OP_ge', 'DW_OP_gt', - 'DW_OP_le', 'DW_OP_lt', 'DW_OP_ne', 'DW_OP_nop', - 'DW_OP_push_object_address', 'DW_OP_form_tls_address', - 'DW_OP_call_frame_cfa']: - add(opname, self._visit_OP_with_no_args) - - for n in range(0, 32): - add('DW_OP_lit%s' % n, self._visit_OP_with_no_args) - add('DW_OP_reg%s' % n, self._visit_OP_with_no_args) - add('DW_OP_breg%s' % n, - self._make_visitor_arg_struct(self.structs.Dwarf_sleb128(''))) - - add('DW_OP_fbreg', - self._make_visitor_arg_struct(self.structs.Dwarf_sleb128(''))) - add('DW_OP_regx', - self._make_visitor_arg_struct(self.structs.Dwarf_uleb128(''))) - add('DW_OP_bregx', - self._make_visitor_arg_struct2( - self.structs.Dwarf_uleb128(''), - self.structs.Dwarf_sleb128(''))) - add('DW_OP_piece', - self._make_visitor_arg_struct(self.structs.Dwarf_uleb128(''))) - add('DW_OP_bit_piece', - self._make_visitor_arg_struct2( - self.structs.Dwarf_uleb128(''), - self.structs.Dwarf_uleb128(''))) - add('DW_OP_deref_size', - self._make_visitor_arg_struct(self.structs.Dwarf_int8(''))) - add('DW_OP_xderef_size', - self._make_visitor_arg_struct(self.structs.Dwarf_int8(''))) - add('DW_OP_call2', - self._make_visitor_arg_struct(self.structs.Dwarf_uint16(''))) - add('DW_OP_call4', - self._make_visitor_arg_struct(self.structs.Dwarf_uint32(''))) - add('DW_OP_call_ref', - self._make_visitor_arg_struct(self.structs.Dwarf_offset(''))) + # Use dispatch table to parse args. + arg_parser = self._dispatch_table[op] + args = arg_parser(stream) + + parsed.append(DWARFExprOp(op=op, op_name=op_name, args=args)) + + return parsed + + +def _init_dispatch_table(structs): + """Creates a dispatch table for parsing args of an op. + + Returns a dict mapping opcode to a function. The function accepts a stream + and return a list of parsed arguments for the opcode from the stream; + the stream is advanced by the function as needed. + """ + table = {} + def add(opcode_name, func): + table[DW_OP_name2opcode[opcode_name]] = func + + def parse_noargs(): + return lambda stream: [] + + def parse_op_addr(): + return lambda stream: [struct_parse(structs.Dwarf_target_addr(''), + stream)] + + def parse_arg_struct(arg_struct): + return lambda stream: [struct_parse(arg_struct, stream)] + + def parse_arg_struct2(arg1_struct, arg2_struct): + return lambda stream: [struct_parse(arg1_struct, stream), + struct_parse(arg2_struct, stream)] + + # ULEB128, then an expression of that length + def parse_nestedexpr(): + def parse(stream): + size = struct_parse(structs.Dwarf_uleb128(''), stream) + nested_expr_blob = read_blob(stream, size) + return [DWARFExprParser(structs).parse_expr(nested_expr_blob)] + return parse + + # ULEB128, then a blob of that size + def parse_blob(): + return lambda stream: [read_blob(stream, struct_parse(structs.Dwarf_uleb128(''), stream))] + + # ULEB128 with datatype DIE offset, then byte, then a blob of that size + def parse_typedblob(): + return lambda stream: [struct_parse(structs.Dwarf_uleb128(''), stream), read_blob(stream, struct_parse(structs.Dwarf_uint8(''), stream))] + + add('DW_OP_addr', parse_op_addr()) + add('DW_OP_addrx', parse_arg_struct(structs.Dwarf_uleb128(''))) + add('DW_OP_const1u', parse_arg_struct(structs.Dwarf_uint8(''))) + add('DW_OP_const1s', parse_arg_struct(structs.Dwarf_int8(''))) + add('DW_OP_const2u', parse_arg_struct(structs.Dwarf_uint16(''))) + add('DW_OP_const2s', parse_arg_struct(structs.Dwarf_int16(''))) + add('DW_OP_const4u', parse_arg_struct(structs.Dwarf_uint32(''))) + add('DW_OP_const4s', parse_arg_struct(structs.Dwarf_int32(''))) + add('DW_OP_const8u', parse_arg_struct(structs.Dwarf_uint64(''))) + add('DW_OP_const8s', parse_arg_struct(structs.Dwarf_int64(''))) + add('DW_OP_constu', parse_arg_struct(structs.Dwarf_uleb128(''))) + add('DW_OP_consts', parse_arg_struct(structs.Dwarf_sleb128(''))) + add('DW_OP_pick', parse_arg_struct(structs.Dwarf_uint8(''))) + add('DW_OP_plus_uconst', parse_arg_struct(structs.Dwarf_uleb128(''))) + add('DW_OP_bra', parse_arg_struct(structs.Dwarf_int16(''))) + add('DW_OP_skip', parse_arg_struct(structs.Dwarf_int16(''))) + + for opname in [ 'DW_OP_deref', 'DW_OP_dup', 'DW_OP_drop', 'DW_OP_over', + 'DW_OP_swap', 'DW_OP_swap', 'DW_OP_rot', 'DW_OP_xderef', + 'DW_OP_abs', 'DW_OP_and', 'DW_OP_div', 'DW_OP_minus', + 'DW_OP_mod', 'DW_OP_mul', 'DW_OP_neg', 'DW_OP_not', + 'DW_OP_or', 'DW_OP_plus', 'DW_OP_shl', 'DW_OP_shr', + 'DW_OP_shra', 'DW_OP_xor', 'DW_OP_eq', 'DW_OP_ge', + 'DW_OP_gt', 'DW_OP_le', 'DW_OP_lt', 'DW_OP_ne', 'DW_OP_nop', + 'DW_OP_push_object_address', 'DW_OP_form_tls_address', + 'DW_OP_call_frame_cfa', 'DW_OP_stack_value', + 'DW_OP_GNU_push_tls_address']: + add(opname, parse_noargs()) + + for n in range(0, 32): + add('DW_OP_lit%s' % n, parse_noargs()) + add('DW_OP_reg%s' % n, parse_noargs()) + add('DW_OP_breg%s' % n, parse_arg_struct(structs.Dwarf_sleb128(''))) + add('DW_OP_fbreg', parse_arg_struct(structs.Dwarf_sleb128(''))) + add('DW_OP_regx', parse_arg_struct(structs.Dwarf_uleb128(''))) + add('DW_OP_bregx', parse_arg_struct2(structs.Dwarf_uleb128(''), + structs.Dwarf_sleb128(''))) + add('DW_OP_piece', parse_arg_struct(structs.Dwarf_uleb128(''))) + add('DW_OP_bit_piece', parse_arg_struct2(structs.Dwarf_uleb128(''), + structs.Dwarf_uleb128(''))) + add('DW_OP_deref_size', parse_arg_struct(structs.Dwarf_int8(''))) + add('DW_OP_xderef_size', parse_arg_struct(structs.Dwarf_int8(''))) + add('DW_OP_call2', parse_arg_struct(structs.Dwarf_uint16(''))) + add('DW_OP_call4', parse_arg_struct(structs.Dwarf_uint32(''))) + add('DW_OP_call_ref', parse_arg_struct(structs.Dwarf_offset(''))) + add('DW_OP_implicit_value', parse_blob()) + add('DW_OP_GNU_entry_value', parse_nestedexpr()) + add('DW_OP_GNU_const_type', parse_typedblob()) + add('DW_OP_GNU_regval_type', parse_arg_struct2(structs.Dwarf_uleb128(''), + structs.Dwarf_uleb128(''))) + add('DW_OP_GNU_deref_type', parse_arg_struct2(structs.Dwarf_uint8(''), + structs.Dwarf_uleb128(''))) + add('DW_OP_GNU_implicit_pointer', parse_arg_struct2(structs.Dwarf_offset(''), + structs.Dwarf_sleb128(''))) + add('DW_OP_GNU_parameter_ref', parse_arg_struct(structs.Dwarf_offset(''))) + add('DW_OP_GNU_convert', parse_arg_struct(structs.Dwarf_uleb128(''))) + return table diff --git a/elftools/dwarf/dwarfinfo.py b/elftools/dwarf/dwarfinfo.py index c5d0e1b1..2fc12f4e 100644 --- a/elftools/dwarf/dwarfinfo.py +++ b/elftools/dwarf/dwarfinfo.py @@ -7,6 +7,7 @@ # This code is in the public domain #------------------------------------------------------------------------------- from collections import namedtuple +from bisect import bisect_right from ..common.exceptions import DWARFError from ..common.utils import (struct_parse, dwarf_assert, @@ -19,6 +20,7 @@ from .locationlists import LocationLists from .ranges import RangeLists from .aranges import ARanges +from .namelut import NameLUT # Describes a debug section @@ -67,7 +69,12 @@ def __init__(self, debug_str_sec, debug_loc_sec, debug_ranges_sec, - debug_line_sec): + debug_line_sec, + debug_pubtypes_sec, + debug_pubnames_sec, + debug_addr_sec, + debug_str_offsets_sec, + debug_line_str_sec): """ config: A DwarfConfig object @@ -86,6 +93,9 @@ def __init__(self, self.debug_loc_sec = debug_loc_sec self.debug_ranges_sec = debug_ranges_sec self.debug_line_sec = debug_line_sec + self.debug_line_str_sec = debug_line_str_sec + self.debug_pubtypes_sec = debug_pubtypes_sec + self.debug_pubnames_sec = debug_pubnames_sec # This is the DWARFStructs the context uses, so it doesn't depend on # DWARF format and address_size (these are determined per CU) - set them @@ -98,6 +108,11 @@ def __init__(self, # Cache for abbrev tables: a dict keyed by offset self._abbrevtable_cache = {} + # Cache of compile units and map of their offsets for bisect lookup. + # Access with .iter_CUs(), .get_CU_containing(), and/or .get_CU_at(). + self._cu_cache = [] + self._cu_offsets_map = [] + @property def has_debug_info(self): """ Return whether this contains debug information. @@ -107,6 +122,84 @@ def has_debug_info(self): """ return bool(self.debug_info_sec) + def get_DIE_from_lut_entry(self, lut_entry): + """ Get the DIE from the pubnames or putbtypes lookup table entry. + + lut_entry: + A NameLUTEntry object from a NameLUT instance (see + .get_pubmames and .get_pubtypes methods). + """ + cu = self.get_CU_at(lut_entry.cu_ofs) + return self.get_DIE_from_refaddr(lut_entry.die_ofs, cu) + + def get_DIE_from_refaddr(self, refaddr, cu=None): + """ Given a .debug_info section offset of a DIE, return the DIE. + + refaddr: + The refaddr may come from a DW_FORM_ref_addr attribute. + + cu: + The compile unit object, if known. If None a search + from the closest offset less than refaddr will be performed. + """ + if cu is None: + cu = self.get_CU_containing(refaddr) + return cu.get_DIE_from_refaddr(refaddr) + + def get_CU_containing(self, refaddr): + """ Find the CU that includes the given reference address in the + .debug_info section. + + refaddr: + Either a refaddr of a DIE (possibly from a DW_FORM_ref_addr + attribute) or the section offset of a CU (possibly from an + aranges table). + + This function will parse and cache CUs until the search criteria + is met, starting from the closest known offset lessthan or equal + to the given address. + """ + dwarf_assert( + self.has_debug_info, + 'CU lookup but no debug info section') + dwarf_assert( + 0 <= refaddr < self.debug_info_sec.size, + "refaddr %s beyond .debug_info size" % refaddr) + + # The CU containing the DIE we desire will be to the right of the + # DIE insert point. If we have a CU address, then it will be a + # match but the right insert minus one will still be the item. + # The first CU starts at offset 0, so start there if cache is empty. + i = bisect_right(self._cu_offsets_map, refaddr) + start = self._cu_offsets_map[i - 1] if i > 0 else 0 + + # parse CUs until we find one containing the desired address + for cu in self._parse_CUs_iter(start): + if cu.cu_offset <= refaddr < cu.cu_offset + cu.size: + return cu + + raise ValueError("CU for reference address %s not found" % refaddr) + + def get_CU_at(self, offset): + """ Given a CU header offset, return the parsed CU. + + offset: + The offset may be from an accelerated access table such as + the public names, public types, address range table, or + prior use. + + This function will directly parse the CU doing no validation of + the offset beyond checking the size of the .debug_info section. + """ + dwarf_assert( + self.has_debug_info, + 'CU lookup but no debug info section') + dwarf_assert( + 0 <= offset < self.debug_info_sec.size, + "offset %s beyond .debug_info size" % offset) + + return self._cached_CU_at_offset(offset) + def iter_CUs(self): """ Yield all the compile units (CompileUnit objects) in the debug info """ @@ -140,6 +233,12 @@ def get_string_from_table(self, offset): """ return parse_cstring_from_stream(self.debug_str_sec.stream, offset) + def get_string_from_linetable(self, offset): + """ Obtain a string from the string table section, given an offset + relative to the section. + """ + return parse_cstring_from_stream(self.debug_line_str_sec.stream, offset) + def line_program_for_CU(self, CU): """ Given a CU object, fetch the line program it points to from the .debug_line section. @@ -185,6 +284,38 @@ def EH_CFI_entries(self): for_eh_frame=True) return cfi.get_entries() + def get_pubtypes(self): + """ + Returns a NameLUT object that contains information read from the + .debug_pubtypes section in the ELF file. + + NameLUT is essentially a dictionary containing the CU/DIE offsets of + each symbol. See the NameLUT doc string for more details. + """ + + if self.debug_pubtypes_sec: + return NameLUT(self.debug_pubtypes_sec.stream, + self.debug_pubtypes_sec.size, + self.structs) + else: + return None + + def get_pubnames(self): + """ + Returns a NameLUT object that contains information read from the + .debug_pubnames section in the ELF file. + + NameLUT is essentially a dictionary containing the CU/DIE offsets of + each symbol. See the NameLUT doc string for more details. + """ + + if self.debug_pubnames_sec: + return NameLUT(self.debug_pubnames_sec.stream, + self.debug_pubnames_sec.size, + self.structs) + else: + return None + def get_aranges(self): """ Get an ARanges object representing the .debug_aranges section of the DWARF data, or None if the section doesn't exist @@ -216,15 +347,21 @@ def range_lists(self): #------ PRIVATE ------# - def _parse_CUs_iter(self): - """ Parse CU entries from debug_info. Yield CUs in order of appearance. + def _parse_CUs_iter(self, offset=0): + """ Iterate CU objects in order of appearance in the debug_info section. + + offset: + The offset of the first CU to yield. Additional iterations + will return the sequential unit objects. + + See .iter_CUs(), .get_CU_containing(), and .get_CU_at(). """ if self.debug_info_sec is None: return - offset = 0 while offset < self.debug_info_sec.size: - cu = self._parse_CU_at_offset(offset) + cu = self._cached_CU_at_offset(offset) + if not cu: return @@ -236,6 +373,32 @@ def _parse_CUs_iter(self): cu.structs.initial_length_field_size()) yield cu + def _cached_CU_at_offset(self, offset): + """ Return the CU with unit header at the given offset into the + debug_info section from the cache. If not present, the unit is + header is parsed and the object is installed in the cache. + + offset: + The offset of the unit header in the .debug_info section + to of the unit to fetch from the cache. + + See get_CU_at(). + """ + # Find the insert point for the requested offset. With bisect_right, + # if this entry is present in the cache it will be the prior entry. + i = bisect_right(self._cu_offsets_map, offset) + if i >= 1 and offset == self._cu_offsets_map[i - 1]: + return self._cu_cache[i - 1] + + # Parse the CU and insert the offset and object into the cache. + # The ._cu_offsets_map[] contains just the numeric offsets for the + # bisect_right search while the parallel indexed ._cu_cache[] holds + # the object references. + cu = self._parse_CU_at_offset(offset) + self._cu_offsets_map.insert(i, offset) + self._cu_cache.insert(i, cu) + return cu + def _parse_CU_at_offset(self, offset): """ Parse and return a CU at the given offset in the debug_info stream. """ @@ -251,30 +414,31 @@ def _parse_CU_at_offset(self, offset): self.structs.Dwarf_uint32(''), self.debug_info_sec.stream, offset) dwarf_format = 64 if initial_length == 0xFFFFFFFF else 32 - # At this point we still haven't read the whole header, so we don't - # know the address_size. Therefore, we're going to create structs - # with a default address_size=4. If, after parsing the header, we - # find out address_size is actually 8, we just create a new structs - # object for this CU. + + # Temporary structs for parsing the header + # The structs for the rest of the CU depend on the header data. # cu_structs = DWARFStructs( little_endian=self.config.little_endian, dwarf_format=dwarf_format, - address_size=4) + address_size=4, + dwarf_version=2) cu_header = struct_parse( cu_structs.Dwarf_CU_header, self.debug_info_sec.stream, offset) - if cu_header['address_size'] == 8: - cu_structs = DWARFStructs( - little_endian=self.config.little_endian, - dwarf_format=dwarf_format, - address_size=8) + + # structs for the rest of the CU, taking into account bitness and DWARF version + cu_structs = DWARFStructs( + little_endian=self.config.little_endian, + dwarf_format=dwarf_format, + address_size=cu_header['address_size'], + dwarf_version=cu_header['version']) # PE file must have a section size with a given multiple, so the unused portion # may be filled with zeros. if cu_header['version'] == 0: - return None - + return None + cu_die_offset = self.debug_info_sec.stream.tell() dwarf_assert( self._is_supported_version(cu_header['version']), @@ -289,7 +453,7 @@ def _parse_CU_at_offset(self, offset): def _is_supported_version(self, version): """ DWARF version supported by this parser """ - return 2 <= version <= 4 + return 2 <= version <= 5 def _parse_line_program_at_offset(self, debug_line_offset, structs): """ Given an offset to the .debug_line section, parse the line program @@ -311,4 +475,3 @@ def _parse_line_program_at_offset(self, debug_line_offset, structs): structs=structs, program_start_offset=self.debug_line_sec.stream.tell(), program_end_offset=end_offset) - diff --git a/elftools/dwarf/enums.py b/elftools/dwarf/enums.py index 903e7d51..bfeca590 100644 --- a/elftools/dwarf/enums.py +++ b/elftools/dwarf/enums.py @@ -11,79 +11,96 @@ ENUM_DW_TAG = dict( - DW_TAG_null = 0x00, - DW_TAG_array_type = 0x01, - DW_TAG_class_type = 0x02, - DW_TAG_entry_point = 0x03, - DW_TAG_enumeration_type = 0x04, - DW_TAG_formal_parameter = 0x05, - DW_TAG_imported_declaration = 0x08, - DW_TAG_label = 0x0a, - DW_TAG_lexical_block = 0x0b, - DW_TAG_member = 0x0d, - DW_TAG_pointer_type = 0x0f, - DW_TAG_reference_type = 0x10, - DW_TAG_compile_unit = 0x11, - DW_TAG_string_type = 0x12, - DW_TAG_structure_type = 0x13, - DW_TAG_subroutine_type = 0x15, - DW_TAG_typedef = 0x16, - DW_TAG_union_type = 0x17, - DW_TAG_unspecified_parameters = 0x18, - DW_TAG_variant = 0x19, - DW_TAG_common_block = 0x1a, - DW_TAG_common_inclusion = 0x1b, - DW_TAG_inheritance = 0x1c, - DW_TAG_inlined_subroutine = 0x1d, - DW_TAG_module = 0x1e, - DW_TAG_ptr_to_member_type = 0x1f, - DW_TAG_set_type = 0x20, - DW_TAG_subrange_type = 0x21, - DW_TAG_with_stmt = 0x22, - DW_TAG_access_declaration = 0x23, - DW_TAG_base_type = 0x24, - DW_TAG_catch_block = 0x25, - DW_TAG_const_type = 0x26, - DW_TAG_constant = 0x27, - DW_TAG_enumerator = 0x28, - DW_TAG_file_type = 0x29, - DW_TAG_friend = 0x2a, - DW_TAG_namelist = 0x2b, - DW_TAG_namelist_item = 0x2c, - DW_TAG_namelist_items = 0x2c, - DW_TAG_packed_type = 0x2d, - DW_TAG_subprogram = 0x2e, + DW_TAG_null = 0x00, + DW_TAG_array_type = 0x01, + DW_TAG_class_type = 0x02, + DW_TAG_entry_point = 0x03, + DW_TAG_enumeration_type = 0x04, + DW_TAG_formal_parameter = 0x05, + DW_TAG_global_subroutine = 0x06, + DW_TAG_global_variable = 0x07, + DW_TAG_imported_declaration = 0x08, + DW_TAG_label = 0x0a, + DW_TAG_lexical_block = 0x0b, + DW_TAG_local_variable = 0x0c, + DW_TAG_member = 0x0d, + DW_TAG_pointer_type = 0x0f, + DW_TAG_reference_type = 0x10, + DW_TAG_compile_unit = 0x11, + DW_TAG_string_type = 0x12, + DW_TAG_structure_type = 0x13, + DW_TAG_subroutine = 0x14, + DW_TAG_subroutine_type = 0x15, + DW_TAG_typedef = 0x16, + DW_TAG_union_type = 0x17, + DW_TAG_unspecified_parameters = 0x18, + DW_TAG_variant = 0x19, + DW_TAG_common_block = 0x1a, + DW_TAG_common_inclusion = 0x1b, + DW_TAG_inheritance = 0x1c, + DW_TAG_inlined_subroutine = 0x1d, + DW_TAG_module = 0x1e, + DW_TAG_ptr_to_member_type = 0x1f, + DW_TAG_set_type = 0x20, + DW_TAG_subrange_type = 0x21, + DW_TAG_with_stmt = 0x22, + DW_TAG_access_declaration = 0x23, + DW_TAG_base_type = 0x24, + DW_TAG_catch_block = 0x25, + DW_TAG_const_type = 0x26, + DW_TAG_constant = 0x27, + DW_TAG_enumerator = 0x28, + DW_TAG_file_type = 0x29, + DW_TAG_friend = 0x2a, + DW_TAG_namelist = 0x2b, + DW_TAG_namelist_item = 0x2c, + DW_TAG_namelist_items = 0x2c, + DW_TAG_packed_type = 0x2d, + DW_TAG_subprogram = 0x2e, # The DWARF standard defines these as _parameter, not _param, but we # maintain compatibility with readelf. - DW_TAG_template_type_param = 0x2f, - DW_TAG_template_value_param = 0x30, + DW_TAG_template_type_param = 0x2f, + DW_TAG_template_value_param = 0x30, - DW_TAG_thrown_type = 0x31, - DW_TAG_try_block = 0x32, - DW_TAG_variant_part = 0x33, - DW_TAG_variable = 0x34, - DW_TAG_volatile_type = 0x35, - DW_TAG_dwarf_procedure = 0x36, - DW_TAG_restrict_type = 0x37, - DW_TAG_interface_type = 0x38, - DW_TAG_namespace = 0x39, - DW_TAG_imported_module = 0x3a, - DW_TAG_unspecified_type = 0x3b, - DW_TAG_partial_unit = 0x3c, - DW_TAG_imported_unit = 0x3d, - DW_TAG_mutable_type = 0x3e, - DW_TAG_condition = 0x3f, - DW_TAG_shared_type = 0x40, - DW_TAG_type_unit = 0x41, - DW_TAG_rvalue_reference_type = 0x42, + DW_TAG_thrown_type = 0x31, + DW_TAG_try_block = 0x32, + DW_TAG_variant_part = 0x33, + DW_TAG_variable = 0x34, + DW_TAG_volatile_type = 0x35, + DW_TAG_dwarf_procedure = 0x36, + DW_TAG_restrict_type = 0x37, + DW_TAG_interface_type = 0x38, + DW_TAG_namespace = 0x39, + DW_TAG_imported_module = 0x3a, + DW_TAG_unspecified_type = 0x3b, + DW_TAG_partial_unit = 0x3c, + DW_TAG_imported_unit = 0x3d, + DW_TAG_mutable_type = 0x3e, + DW_TAG_condition = 0x3f, + DW_TAG_shared_type = 0x40, + DW_TAG_type_unit = 0x41, + DW_TAG_rvalue_reference_type = 0x42, + DW_TAG_atomic_type = 0x47, + DW_TAG_call_site = 0x48, + DW_TAG_call_site_parameter = 0x49, + DW_TAG_skeleton_unit = 0x4a, + DW_TAG_immutable_type = 0x4b, - DW_TAG_lo_user = 0x4080, - DW_TAG_GNU_call_site = 0x4109, - DW_TAG_GNU_call_site_parameter = 0x410a, - DW_TAG_hi_user = 0xffff, - _default_ = Pass, + + DW_TAG_lo_user = 0x4080, + DW_TAG_GNU_template_template_param = 0x4106, + DW_TAG_GNU_template_parameter_pack = 0x4107, + DW_TAG_GNU_formal_parameter_pack = 0x4108, + DW_TAG_GNU_call_site = 0x4109, + DW_TAG_GNU_call_site_parameter = 0x410a, + + DW_TAG_APPLE_property = 0x4200, + + DW_TAG_hi_user = 0xffff, + + _default_ = Pass, ) @@ -94,104 +111,142 @@ ENUM_DW_AT = dict( - DW_AT_null = 0x00, - DW_AT_sibling = 0x01, - DW_AT_location = 0x02, - DW_AT_name = 0x03, - DW_AT_ordering = 0x09, - DW_AT_subscr_data = 0x0a, - DW_AT_byte_size = 0x0b, - DW_AT_bit_offset = 0x0c, - DW_AT_bit_size = 0x0d, - DW_AT_element_list = 0x0f, - DW_AT_stmt_list = 0x10, - DW_AT_low_pc = 0x11, - DW_AT_high_pc = 0x12, - DW_AT_language = 0x13, - DW_AT_member = 0x14, - DW_AT_discr = 0x15, - DW_AT_discr_value = 0x16, - DW_AT_visibility = 0x17, - DW_AT_import = 0x18, - DW_AT_string_length = 0x19, - DW_AT_common_reference = 0x1a, - DW_AT_comp_dir = 0x1b, - DW_AT_const_value = 0x1c, - DW_AT_containing_type = 0x1d, - DW_AT_default_value = 0x1e, - DW_AT_inline = 0x20, - DW_AT_is_optional = 0x21, - DW_AT_lower_bound = 0x22, - DW_AT_producer = 0x25, - DW_AT_prototyped = 0x27, - DW_AT_return_addr = 0x2a, - DW_AT_start_scope = 0x2c, - DW_AT_bit_stride = 0x2e, - DW_AT_stride_size = 0x2e, - DW_AT_upper_bound = 0x2f, - DW_AT_abstract_origin = 0x31, - DW_AT_accessibility = 0x32, - DW_AT_address_class = 0x33, - DW_AT_artificial = 0x34, - DW_AT_base_types = 0x35, - DW_AT_calling_convention = 0x36, - DW_AT_count = 0x37, - DW_AT_data_member_location = 0x38, - DW_AT_decl_column = 0x39, - DW_AT_decl_file = 0x3a, - DW_AT_decl_line = 0x3b, - DW_AT_declaration = 0x3c, - DW_AT_discr_list = 0x3d, - DW_AT_encoding = 0x3e, - DW_AT_external = 0x3f, - DW_AT_frame_base = 0x40, - DW_AT_friend = 0x41, - DW_AT_identifier_case = 0x42, - DW_AT_macro_info = 0x43, - DW_AT_namelist_item = 0x44, - DW_AT_priority = 0x45, - DW_AT_segment = 0x46, - DW_AT_specification = 0x47, - DW_AT_static_link = 0x48, - DW_AT_type = 0x49, - DW_AT_use_location = 0x4a, - DW_AT_variable_parameter = 0x4b, - DW_AT_virtuality = 0x4c, - DW_AT_vtable_elem_location = 0x4d, - DW_AT_allocated = 0x4e, - DW_AT_associated = 0x4f, - DW_AT_data_location = 0x50, - DW_AT_byte_stride = 0x51, - DW_AT_stride = 0x51, - DW_AT_entry_pc = 0x52, - DW_AT_use_UTF8 = 0x53, - DW_AT_extension = 0x54, - DW_AT_ranges = 0x55, - DW_AT_trampoline = 0x56, - DW_AT_call_column = 0x57, - DW_AT_call_file = 0x58, - DW_AT_call_line = 0x59, - DW_AT_description = 0x5a, - DW_AT_binary_scale = 0x5b, - DW_AT_decimal_scale = 0x5c, - DW_AT_small = 0x5d, - DW_AT_decimal_sign = 0x5e, - DW_AT_digit_count = 0x5f, - DW_AT_picture_string = 0x60, - DW_AT_mutable = 0x61, - DW_AT_threads_scaled = 0x62, - DW_AT_explicit = 0x63, - DW_AT_object_pointer = 0x64, - DW_AT_endianity = 0x65, - DW_AT_elemental = 0x66, - DW_AT_pure = 0x67, - DW_AT_recursive = 0x68, - DW_AT_signature = 0x69, - DW_AT_main_subprogram = 0x6a, - DW_AT_data_bit_offset = 0x6b, - DW_AT_const_expr = 0x6c, - DW_AT_enum_class = 0x6d, - DW_AT_linkage_name = 0x6e, + DW_AT_null = 0x00, + DW_AT_sibling = 0x01, + DW_AT_location = 0x02, + DW_AT_name = 0x03, + DW_AT_fund_type = 0x05, + DW_AT_mod_fund_type = 0x06, + DW_AT_user_def_type = 0x07, + DW_AT_mod_u_d_type = 0x08, + DW_AT_ordering = 0x09, + DW_AT_subscr_data = 0x0a, + DW_AT_byte_size = 0x0b, + DW_AT_bit_offset = 0x0c, + DW_AT_bit_size = 0x0d, + DW_AT_element_list = 0x0f, + DW_AT_stmt_list = 0x10, + DW_AT_low_pc = 0x11, + DW_AT_high_pc = 0x12, + DW_AT_language = 0x13, + DW_AT_member = 0x14, + DW_AT_discr = 0x15, + DW_AT_discr_value = 0x16, + DW_AT_visibility = 0x17, + DW_AT_import = 0x18, + DW_AT_string_length = 0x19, + DW_AT_common_reference = 0x1a, + DW_AT_comp_dir = 0x1b, + DW_AT_const_value = 0x1c, + DW_AT_containing_type = 0x1d, + DW_AT_default_value = 0x1e, + DW_AT_inline = 0x20, + DW_AT_is_optional = 0x21, + DW_AT_lower_bound = 0x22, + DW_AT_program = 0x23, + DW_AT_private = 0x24, + DW_AT_producer = 0x25, + DW_AT_protected = 0x26, + DW_AT_prototyped = 0x27, + DW_AT_public = 0x28, + DW_AT_return_addr = 0x2a, + DW_AT_start_scope = 0x2c, + DW_AT_bit_stride = 0x2e, + DW_AT_stride_size = 0x2e, + DW_AT_upper_bound = 0x2f, + DW_AT_virtual = 0x30, + DW_AT_abstract_origin = 0x31, + DW_AT_accessibility = 0x32, + DW_AT_address_class = 0x33, + DW_AT_artificial = 0x34, + DW_AT_base_types = 0x35, + DW_AT_calling_convention = 0x36, + DW_AT_count = 0x37, + DW_AT_data_member_location = 0x38, + DW_AT_decl_column = 0x39, + DW_AT_decl_file = 0x3a, + DW_AT_decl_line = 0x3b, + DW_AT_declaration = 0x3c, + DW_AT_discr_list = 0x3d, + DW_AT_encoding = 0x3e, + DW_AT_external = 0x3f, + DW_AT_frame_base = 0x40, + DW_AT_friend = 0x41, + DW_AT_identifier_case = 0x42, + DW_AT_macro_info = 0x43, + DW_AT_namelist_item = 0x44, + DW_AT_priority = 0x45, + DW_AT_segment = 0x46, + DW_AT_specification = 0x47, + DW_AT_static_link = 0x48, + DW_AT_type = 0x49, + DW_AT_use_location = 0x4a, + DW_AT_variable_parameter = 0x4b, + DW_AT_virtuality = 0x4c, + DW_AT_vtable_elem_location = 0x4d, + DW_AT_allocated = 0x4e, + DW_AT_associated = 0x4f, + DW_AT_data_location = 0x50, + DW_AT_byte_stride = 0x51, + DW_AT_stride = 0x51, + DW_AT_entry_pc = 0x52, + DW_AT_use_UTF8 = 0x53, + DW_AT_extension = 0x54, + DW_AT_ranges = 0x55, + DW_AT_trampoline = 0x56, + DW_AT_call_column = 0x57, + DW_AT_call_file = 0x58, + DW_AT_call_line = 0x59, + DW_AT_description = 0x5a, + DW_AT_binary_scale = 0x5b, + DW_AT_decimal_scale = 0x5c, + DW_AT_small = 0x5d, + DW_AT_decimal_sign = 0x5e, + DW_AT_digit_count = 0x5f, + DW_AT_picture_string = 0x60, + DW_AT_mutable = 0x61, + DW_AT_threads_scaled = 0x62, + DW_AT_explicit = 0x63, + DW_AT_object_pointer = 0x64, + DW_AT_endianity = 0x65, + DW_AT_elemental = 0x66, + DW_AT_pure = 0x67, + DW_AT_recursive = 0x68, + DW_AT_signature = 0x69, + DW_AT_main_subprogram = 0x6a, + DW_AT_data_bit_offset = 0x6b, + DW_AT_const_expr = 0x6c, + DW_AT_enum_class = 0x6d, + DW_AT_linkage_name = 0x6e, + DW_AT_string_length_bit_size = 0x6f, + DW_AT_string_length_byte_size = 0x70, + DW_AT_rank = 0x71, + DW_AT_str_offsets_base = 0x72, + DW_AT_addr_base = 0x73, + DW_AT_rnglists_base = 0x74, + DW_AT_dwo_name = 0x76, + DW_AT_reference = 0x77, + DW_AT_rvalue_reference = 0x78, + DW_AT_macros = 0x79, + DW_AT_call_all_calls = 0x7a, + DW_AT_call_all_source_calls = 0x7b, + DW_AT_call_all_tail_calls = 0x7c, + DW_AT_call_return_pc = 0x7d, + DW_AT_call_value = 0x7e, + DW_AT_call_origin = 0x7f, + DW_AT_call_parameter = 0x80, + DW_AT_call_pc = 0x81, + DW_AT_call_tail_call = 0x82, + DW_AT_call_target = 0x83, + DW_AT_call_target_clobbered = 0x84, + DW_AT_call_data_location = 0x85, + DW_AT_call_data_value = 0x86, + DW_AT_noreturn = 0x87, + DW_AT_alignment = 0x88, + DW_AT_export_symbols = 0x89, + DW_AT_deleted = 0x8a, + DW_AT_defaulted = 0x8b, + DW_AT_loclists_base = 0x8c, DW_AT_MIPS_fde = 0x2001, DW_AT_MIPS_loop_begin = 0x2002, @@ -219,6 +274,7 @@ DW_AT_body_end = 0x2106, DW_AT_GNU_vector = 0x2107, DW_AT_GNU_template_name = 0x2110, + DW_AT_GNU_odr_signature = 0x210f, DW_AT_GNU_call_site_value = 0x2111, DW_AT_GNU_call_site_data_value = 0x2112, @@ -228,6 +284,17 @@ DW_AT_GNU_all_tail_call_sites = 0x2116, DW_AT_GNU_all_call_sites = 0x2117, DW_AT_GNU_all_source_call_sites = 0x2118, + DW_AT_GNU_macros = 0x2119, + DW_AT_GNU_deleted = 0x211a, + DW_AT_GNU_dwo_id = 0x2131, + DW_AT_GNU_pubnames = 0x2134, + DW_AT_GNU_pubtypes = 0x2135, + DW_AT_GNU_discriminator = 0x2136, + + DW_AT_LLVM_include_path = 0x3e00, + DW_AT_LLVM_config_macros = 0x3e01, + DW_AT_LLVM_isysroot = 0x3e02, + DW_AT_LLVM_tag_offset = 0x3e03, DW_AT_APPLE_optimized = 0x3fe1, DW_AT_APPLE_flags = 0x3fe2, @@ -248,36 +315,57 @@ ENUM_DW_FORM = dict( - DW_FORM_null = 0x00, - DW_FORM_addr = 0x01, - DW_FORM_block2 = 0x03, - DW_FORM_block4 = 0x04, - DW_FORM_data2 = 0x05, - DW_FORM_data4 = 0x06, - DW_FORM_data8 = 0x07, - DW_FORM_string = 0x08, - DW_FORM_block = 0x09, - DW_FORM_block1 = 0x0a, - DW_FORM_data1 = 0x0b, - DW_FORM_flag = 0x0c, - DW_FORM_sdata = 0x0d, - DW_FORM_strp = 0x0e, - DW_FORM_udata = 0x0f, - DW_FORM_ref_addr = 0x10, - DW_FORM_ref1 = 0x11, - DW_FORM_ref2 = 0x12, - DW_FORM_ref4 = 0x13, - DW_FORM_ref8 = 0x14, - DW_FORM_ref_udata = 0x15, - DW_FORM_indirect = 0x16, - DW_FORM_sec_offset = 0x17, - DW_FORM_exprloc = 0x18, - DW_FORM_flag_present = 0x19, - DW_FORM_ref_sig8 = 0x20, + DW_FORM_null = 0x00, + DW_FORM_addr = 0x01, + DW_FORM_ref = 0x02, + DW_FORM_block2 = 0x03, + DW_FORM_block4 = 0x04, + DW_FORM_data2 = 0x05, + DW_FORM_data4 = 0x06, + DW_FORM_data8 = 0x07, + DW_FORM_string = 0x08, + DW_FORM_block = 0x09, + DW_FORM_block1 = 0x0a, + DW_FORM_data1 = 0x0b, + DW_FORM_flag = 0x0c, + DW_FORM_sdata = 0x0d, + DW_FORM_strp = 0x0e, + DW_FORM_udata = 0x0f, + DW_FORM_ref_addr = 0x10, + DW_FORM_ref1 = 0x11, + DW_FORM_ref2 = 0x12, + DW_FORM_ref4 = 0x13, + DW_FORM_ref8 = 0x14, + DW_FORM_ref_udata = 0x15, + DW_FORM_indirect = 0x16, + DW_FORM_sec_offset = 0x17, + DW_FORM_exprloc = 0x18, + DW_FORM_flag_present = 0x19, + DW_FORM_strx = 0x1a, + DW_FORM_addrx = 0x1b, + DW_FORM_ref_sup4 = 0x1c, + DW_FORM_strp_sup = 0x1d, + DW_FORM_data16 = 0x1e, + DW_FORM_line_strp = 0x1f, + DW_FORM_ref_sig8 = 0x20, + DW_FORM_implicit_const = 0x21, + DW_FORM_loclistx = 0x22, + DW_FORM_rnglistx = 0x23, + DW_FORM_ref_sup8 = 0x24, + DW_FORM_strx1 = 0x25, + DW_FORM_strx2 = 0x26, + DW_FORM_strx3 = 0x27, + DW_FORM_strx4 = 0x28, + DW_FORM_addrx1 = 0x29, + DW_FORM_addrx2 = 0x2a, + DW_FORM_addrx3 = 0x2b, + DW_FORM_addrx4 = 0x2c, - DW_FORM_GNU_strp_alt = 0x1f21, - DW_FORM_GNU_ref_alt = 0x1f20, - _default_ = Pass, + DW_FORM_GNU_addr_index = 0x1f01, + DW_FORM_GNU_str_index = 0x1f02, + DW_FORM_GNU_ref_alt = 0x1f20, + DW_FORM_GNU_strp_alt = 0x1f21, + _default_ = Pass, ) # Inverse mapping for ENUM_DW_FORM diff --git a/elftools/dwarf/lineprogram.py b/elftools/dwarf/lineprogram.py index 8996b5ca..dbde7baf 100644 --- a/elftools/dwarf/lineprogram.py +++ b/elftools/dwarf/lineprogram.py @@ -10,7 +10,7 @@ import copy from collections import namedtuple -from ..common.utils import struct_parse +from ..common.utils import struct_parse, dwarf_assert from .constants import * @@ -58,12 +58,14 @@ def __init__(self, default_is_stmt): self.prologue_end = False self.epilogue_begin = False self.isa = 0 + self.discriminator = 0 def __repr__(self): a = ['\n' @@ -76,7 +78,7 @@ class LineProgram(object): """ def __init__(self, header, stream, structs, program_start_offset, program_end_offset): - """ + """ header: The header of this line program. Note: LineProgram may modify its header by appending file entries if DW_LNE_define_file @@ -115,7 +117,7 @@ def get_entries(self): return self._decoded_entries #------ PRIVATE ------# - + def __getitem__(self, name): """ Implement dict-like access to header entries """ @@ -130,6 +132,7 @@ def add_entry_new_state(cmd, args, is_extended=False): # After adding, clear some state registers. entries.append(LineProgramEntry( cmd, is_extended, args, copy.copy(state))) + state.discriminator = 0 state.basic_block = False state.prologue_end = False state.epilogue_begin = False @@ -141,7 +144,7 @@ def add_entry_old_state(cmd, args, is_extended=False): offset = self.program_start_offset while offset < self.program_end_offset: opcode = struct_parse( - self.structs.Dwarf_uint8(''), + self.structs.Dwarf_uint8(''), self.stream, offset) @@ -156,7 +159,7 @@ def add_entry_old_state(cmd, args, is_extended=False): adjusted_opcode = opcode - self['opcode_base'] operation_advance = adjusted_opcode // self['line_range'] address_addend = ( - self['minimum_instruction_length'] * + self['minimum_instruction_length'] * ((state.op_index + operation_advance) // maximum_operations_per_instruction)) state.address += address_addend @@ -175,9 +178,10 @@ def add_entry_old_state(cmd, args, is_extended=False): if ex_opcode == DW_LNE_end_sequence: state.end_sequence = True + state.is_stmt = 0 add_entry_new_state(ex_opcode, [], is_extended=True) # reset state - state = LineState(self.header['default_is_stmt']) + state = LineState(self.header['default_is_stmt']) elif ex_opcode == DW_LNE_set_address: operand = struct_parse(self.structs.Dwarf_target_addr(''), self.stream) @@ -188,6 +192,10 @@ def add_entry_old_state(cmd, args, is_extended=False): self.structs.Dwarf_lineprog_file_entry, self.stream) self['file_entry'].append(operand) add_entry_old_state(ex_opcode, [operand], is_extended=True) + elif ex_opcode == DW_LNE_set_discriminator: + operand = struct_parse(self.structs.Dwarf_uleb128(''), + self.stream) + state.discriminator = operand else: # Unknown, but need to roll forward the stream because the # length is specified. Seek forward inst_len - 1 because @@ -252,4 +260,3 @@ def add_entry_old_state(cmd, args, is_extended=False): opcode,)) offset = self.stream.tell() return entries - diff --git a/elftools/dwarf/locationlists.py b/elftools/dwarf/locationlists.py index 3d97af3c..e6c735f5 100644 --- a/elftools/dwarf/locationlists.py +++ b/elftools/dwarf/locationlists.py @@ -11,10 +11,9 @@ from ..common.utils import struct_parse - -LocationEntry = namedtuple('LocationEntry', 'begin_offset end_offset loc_expr') -BaseAddressEntry = namedtuple('BaseAddressEntry', 'base_address') - +LocationExpr = namedtuple('LocationExpr', 'loc_expr') +LocationEntry = namedtuple('LocationEntry', 'entry_offset begin_offset end_offset loc_expr') +BaseAddressEntry = namedtuple('BaseAddressEntry', 'entry_offset base_address') class LocationLists(object): """ A single location list is a Python list consisting of LocationEntry or @@ -47,6 +46,7 @@ def iter_location_lists(self): def _parse_location_list_from_stream(self): lst = [] while True: + entry_offset = self.stream.tell() begin_offset = struct_parse( self.structs.Dwarf_target_addr(''), self.stream) end_offset = struct_parse( @@ -56,7 +56,7 @@ def _parse_location_list_from_stream(self): break elif begin_offset == self._max_addr: # Base address selection entry - lst.append(BaseAddressEntry(base_address=end_offset)) + lst.append(BaseAddressEntry(entry_offset=entry_offset, base_address=end_offset)) else: # Location list entry expr_len = struct_parse( @@ -65,7 +65,66 @@ def _parse_location_list_from_stream(self): self.stream) for i in range(expr_len)] lst.append(LocationEntry( + entry_offset=entry_offset, begin_offset=begin_offset, end_offset=end_offset, loc_expr=loc_expr)) return lst + +class LocationParser(object): + """ A parser for location information in DIEs. + Handles both location information contained within the attribute + itself (represented as a LocationExpr object) and references to + location lists in the .debug_loc section (represented as a + list). + """ + def __init__(self, location_lists): + self.location_lists = location_lists + + @staticmethod + def attribute_has_location(attr, dwarf_version): + """ Checks if a DIE attribute contains location information. + """ + return (LocationParser._attribute_is_loclistptr_class(attr) and + (LocationParser._attribute_has_loc_expr(attr, dwarf_version) or + LocationParser._attribute_has_loc_list(attr, dwarf_version))) + + def parse_from_attribute(self, attr, dwarf_version): + """ Parses a DIE attribute and returns either a LocationExpr or + a list. + """ + if self.attribute_has_location(attr, dwarf_version): + if self._attribute_has_loc_expr(attr, dwarf_version): + return LocationExpr(attr.value) + elif self._attribute_has_loc_list(attr, dwarf_version): + return self.location_lists.get_location_list_at_offset( + attr.value) + else: + raise ValueError("Attribute does not have location information") + + #------ PRIVATE ------# + + @staticmethod + def _attribute_has_loc_expr(attr, dwarf_version): + return ((dwarf_version < 4 and attr.form.startswith('DW_FORM_block') and + not attr.name == 'DW_AT_const_value') or + attr.form == 'DW_FORM_exprloc') + + @staticmethod + def _attribute_has_loc_list(attr, dwarf_version): + return ((dwarf_version < 4 and + attr.form in ('DW_FORM_data4', 'DW_FORM_data8') and + not attr.name == 'DW_AT_const_value') or + attr.form == 'DW_FORM_sec_offset') + + @staticmethod + def _attribute_is_loclistptr_class(attr): + return (attr.name in ( 'DW_AT_location', 'DW_AT_string_length', + 'DW_AT_const_value', 'DW_AT_return_addr', + 'DW_AT_data_member_location', + 'DW_AT_frame_base', 'DW_AT_segment', + 'DW_AT_static_link', 'DW_AT_use_location', + 'DW_AT_vtable_elem_location', + 'DW_AT_GNU_call_site_value', + 'DW_AT_GNU_call_site_target', + 'DW_AT_GNU_call_site_data_value')) diff --git a/elftools/dwarf/namelut.py b/elftools/dwarf/namelut.py new file mode 100755 index 00000000..fd12aad3 --- /dev/null +++ b/elftools/dwarf/namelut.py @@ -0,0 +1,198 @@ +#------------------------------------------------------------------------------- +# elftools: dwarf/namelut.py +# +# DWARF pubtypes/pubnames section decoding (.debug_pubtypes, .debug_pubnames) +# +# Vijay Ramasami (rvijayc@gmail.com) +# This code is in the public domain +#------------------------------------------------------------------------------- +import os +import collections +from collections import OrderedDict +from ..common.utils import struct_parse +from ..common.py3compat import Mapping +from bisect import bisect_right +import math +from ..construct import CString, Struct, If + +NameLUTEntry = collections.namedtuple('NameLUTEntry', 'cu_ofs die_ofs') + +class NameLUT(Mapping): + """ + A "Name LUT" holds any of the tables specified by .debug_pubtypes or + .debug_pubnames sections. This is basically a dictionary where the key is + the symbol name (either a public variable, function or a type), and the + value is the tuple (cu_offset, die_offset) corresponding to the variable. + The die_offset is an absolute offset (meaning, it can be used to search the + CU by iterating until a match is obtained). + + An ordered dictionary is used to preserve the CU order (i.e, items are + stored on a per-CU basis (as it was originally in the .debug_* section). + + Usage: + + The NameLUT walks and talks like a dictionary and hence it can be used as + such. Some examples below: + + # get the pubnames (a NameLUT from DWARF info). + pubnames = dwarf_info.get_pubnames() + + # lookup a variable. + entry1 = pubnames["var_name1"] + entry2 = pubnames.get("var_name2", default=) + print(entry2.cu_ofs) + ... + + # iterate over items. + for (name, entry) in pubnames.items(): + # do stuff with name, entry.cu_ofs, entry.die_ofs + + # iterate over items on a per-CU basis. + import itertools + for cu_ofs, item_list in itertools.groupby(pubnames.items(), + key = lambda x: x[1].cu_ofs): + # items are now grouped by cu_ofs. + # item_list is an iterator yeilding NameLUTEntry'ies belonging + # to cu_ofs. + # We can parse the CU at cu_offset and use the parsed CU results + # to parse the pubname DIEs in the CU listed by item_list. + for item in item_list: + # work with item which is part of the CU with cu_ofs. + + """ + + def __init__(self, stream, size, structs): + + self._stream = stream + self._size = size + self._structs = structs + # entries are lazily loaded on demand. + self._entries = None + # CU headers (for readelf). + self._cu_headers = None + + def get_entries(self): + """ + Returns the parsed NameLUT entries. The returned object is a dictionary + with the symbol name as the key and NameLUTEntry(cu_ofs, die_ofs) as + the value. + + This is useful when dealing with very large ELF files with millions of + entries. The returned entries can be pickled to a file and restored by + calling set_entries on subsequent loads. + """ + if self._entries is None: + self._entries, self._cu_headers = self._get_entries() + return self._entries + + def set_entries(self, entries, cu_headers): + """ + Set the NameLUT entries from an external source. The input is a + dictionary with the symbol name as the key and NameLUTEntry(cu_ofs, + die_ofs) as the value. + + This option is useful when dealing with very large ELF files with + millions of entries. The entries can be parsed once and pickled to a + file and can be restored via this function on subsequent loads. + """ + self._entries = entries + self._cu_headers = cu_headers + + def __len__(self): + """ + Returns the number of entries in the NameLUT. + """ + if self._entries is None: + self._entries, self._cu_headers = self._get_entries() + return len(self._entries) + + def __getitem__(self, name): + """ + Returns a namedtuple - NameLUTEntry(cu_ofs, die_ofs) - that corresponds + to the given symbol name. + """ + if self._entries is None: + self._entries, self._cu_headers = self._get_entries() + return self._entries.get(name) + + def __iter__(self): + """ + Returns an iterator to the NameLUT dictionary. + """ + if self._entries is None: + self._entries, self._cu_headers = self._get_entries() + return iter(self._entries) + + def items(self): + """ + Returns the NameLUT dictionary items. + """ + if self._entries is None: + self._entries, self._cu_headers = self._get_entries() + return self._entries.items() + + def get(self, name, default=None): + """ + Returns NameLUTEntry(cu_ofs, die_ofs) for the provided symbol name or + None if the symbol does not exist in the corresponding section. + """ + if self._entries is None: + self._entries, self._cu_headers = self._get_entries() + return self._entries.get(name, default) + + def get_cu_headers(self): + """ + Returns all CU headers. Mainly required for readelf. + """ + if self._cu_headers is None: + self._entries, self._cu_headers = self._get_entries() + + return self._cu_headers + + def _get_entries(self): + """ + Parse the (name, cu_ofs, die_ofs) information from this section and + store as a dictionary. + """ + + self._stream.seek(0) + entries = OrderedDict() + cu_headers = [] + offset = 0 + # According to 6.1.1. of DWARFv4, each set of names is terminated by + # an offset field containing zero (and no following string). Because + # of sequential parsing, every next entry may be that terminator. + # So, field "name" is conditional. + entry_struct = Struct("Dwarf_offset_name_pair", + self._structs.Dwarf_offset('die_ofs'), + If(lambda ctx: ctx['die_ofs'], CString('name'))) + + # each run of this loop will fetch one CU worth of entries. + while offset < self._size: + + # read the header for this CU. + namelut_hdr = struct_parse(self._structs.Dwarf_nameLUT_header, + self._stream, offset) + cu_headers.append(namelut_hdr) + # compute the next offset. + offset = (offset + namelut_hdr.unit_length + + self._structs.initial_length_field_size()) + + # before inner loop, latch data that will be used in the inner + # loop to avoid attribute access and other computation. + hdr_cu_ofs = namelut_hdr.debug_info_offset + + # while die_ofs of the entry is non-zero (which indicates the end) ... + while True: + entry = struct_parse(entry_struct, self._stream) + + # if it is zero, this is the terminating record. + if entry.die_ofs == 0: + break + # add this entry to the look-up dictionary. + entries[entry.name.decode('utf-8')] = NameLUTEntry( + cu_ofs = hdr_cu_ofs, + die_ofs = hdr_cu_ofs + entry.die_ofs) + + # return the entries parsed so far. + return (entries, cu_headers) diff --git a/elftools/dwarf/ranges.py b/elftools/dwarf/ranges.py index 9a216ee9..5f99473e 100644 --- a/elftools/dwarf/ranges.py +++ b/elftools/dwarf/ranges.py @@ -57,12 +57,9 @@ def _parse_range_list_from_stream(self): elif begin_offset == self._max_addr: # Base address selection entry lst.append(BaseAddressEntry(base_address=end_offset)) - else: + else: # Range entry lst.append(RangeEntry( begin_offset=begin_offset, end_offset=end_offset)) return lst - - - diff --git a/elftools/dwarf/structs.py b/elftools/dwarf/structs.py index f3b6ef3b..79e0d8f2 100644 --- a/elftools/dwarf/structs.py +++ b/elftools/dwarf/structs.py @@ -11,7 +11,7 @@ UBInt8, UBInt16, UBInt32, UBInt64, ULInt8, ULInt16, ULInt32, ULInt64, SBInt8, SBInt16, SBInt32, SBInt64, SLInt8, SLInt16, SLInt32, SLInt64, Adapter, Struct, ConstructError, If, Enum, Array, PrefixedArray, - CString, Embed, StaticField + CString, Embed, StaticField, IfThenElse ) from ..common.construct_utils import RepeatUntilExcluding, ULEB128, SLEB128 from .enums import * @@ -34,6 +34,9 @@ class DWARFStructs(object): Dwarf_offset: 32-bit or 64-bit word, depending on dwarf_format + Dwarf_length: + 32-bit or 64-bit word, depending on dwarf_format + Dwarf_target_addr: 32-bit or 64-bit word, depending on address size @@ -86,7 +89,7 @@ def __init__(self, section 7.5.1) """ assert dwarf_format == 32 or dwarf_format == 64 - assert address_size == 8 or address_size == 4 + assert address_size == 8 or address_size == 4, str(address_size) self.little_endian = little_endian self.dwarf_format = dwarf_format self.address_size = address_size @@ -105,6 +108,7 @@ def _create_structs(self): self.Dwarf_uint32 = ULInt32 self.Dwarf_uint64 = ULInt64 self.Dwarf_offset = ULInt32 if self.dwarf_format == 32 else ULInt64 + self.Dwarf_length = ULInt32 if self.dwarf_format == 32 else ULInt64 self.Dwarf_target_addr = ( ULInt32 if self.address_size == 4 else ULInt64) self.Dwarf_int8 = SLInt8 @@ -117,6 +121,7 @@ def _create_structs(self): self.Dwarf_uint32 = UBInt32 self.Dwarf_uint64 = UBInt64 self.Dwarf_offset = UBInt32 if self.dwarf_format == 32 else UBInt64 + self.Dwarf_length = UBInt32 if self.dwarf_format == 32 else UBInt64 self.Dwarf_target_addr = ( UBInt32 if self.address_size == 4 else UBInt64) self.Dwarf_int8 = SBInt8 @@ -132,6 +137,9 @@ def _create_structs(self): self._create_lineprog_header() self._create_callframe_entry_headers() self._create_aranges_header() + self._create_nameLUT_header() + self._create_string_offsets_table_header() + self._create_address_table_header() def _create_initial_length(self): def _InitialLength(name): @@ -154,8 +162,16 @@ def _create_cu_header(self): self.Dwarf_CU_header = Struct('Dwarf_CU_header', self.Dwarf_initial_length('unit_length'), self.Dwarf_uint16('version'), - self.Dwarf_offset('debug_abbrev_offset'), - self.Dwarf_uint8('address_size')) + # DWARFv5 reverses the order of address_size and debug_abbrev_offset. + IfThenElse('', lambda ctx: ctx['version'] >= 5, + Embed(Struct('', + self.Dwarf_uint8('unit_type'), + self.Dwarf_uint8('address_size'), + self.Dwarf_offset('debug_abbrev_offset'))), + Embed(Struct('', + self.Dwarf_offset('debug_abbrev_offset'), + self.Dwarf_uint8('address_size'))), + )) def _create_abbrev_declaration(self): self.Dwarf_abbrev_declaration = Struct('Dwarf_abbrev_entry', @@ -166,11 +182,18 @@ def _create_abbrev_declaration(self): obj.name == 'DW_AT_null' and obj.form == 'DW_FORM_null', Struct('attr_spec', Enum(self.Dwarf_uleb128('name'), **ENUM_DW_AT), - Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM)))) + Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM), + If(lambda ctx: ctx['form'] == 'DW_FORM_implicit_const', + self.Dwarf_sleb128('value'))))) def _create_dw_form(self): self.Dwarf_dw_form = dict( DW_FORM_addr=self.Dwarf_target_addr(''), + DW_FORM_addrx=self.Dwarf_uleb128(''), + DW_FORM_addrx1=self.Dwarf_uint8(''), + DW_FORM_addrx2=self.Dwarf_uint16(''), + # DW_FORM_addrx3=self.Dwarf_uint24(''), # TODO + DW_FORM_addrx4=self.Dwarf_uint32(''), DW_FORM_block1=self._make_block_struct(self.Dwarf_uint8), DW_FORM_block2=self._make_block_struct(self.Dwarf_uint16), @@ -187,14 +210,20 @@ def _create_dw_form(self): DW_FORM_string=CString(''), DW_FORM_strp=self.Dwarf_offset(''), + DW_FORM_line_strp=self.Dwarf_offset(''), + DW_FORM_strx1=self.Dwarf_uint8(''), + DW_FORM_strx2=self.Dwarf_uint16(''), + # DW_FORM_strx3=self.Dwarf_uint24(''), # TODO + DW_FORM_strx4=self.Dwarf_uint64(''), DW_FORM_flag=self.Dwarf_uint8(''), + DW_FORM_ref=self.Dwarf_uint32(''), DW_FORM_ref1=self.Dwarf_uint8(''), DW_FORM_ref2=self.Dwarf_uint16(''), DW_FORM_ref4=self.Dwarf_uint32(''), DW_FORM_ref8=self.Dwarf_uint64(''), DW_FORM_ref_udata=self.Dwarf_uleb128(''), - DW_FORM_ref_addr=self.Dwarf_offset(''), + DW_FORM_ref_addr=self.Dwarf_target_addr('') if self.dwarf_version == 2 else self.Dwarf_offset(''), DW_FORM_indirect=self.Dwarf_uleb128(''), @@ -218,6 +247,30 @@ def _create_aranges_header(self): self.Dwarf_uint8('segment_size') ) + def _create_nameLUT_header(self): + self.Dwarf_nameLUT_header = Struct("Dwarf_nameLUT_header", + self.Dwarf_initial_length('unit_length'), + self.Dwarf_uint16('version'), + self.Dwarf_offset('debug_info_offset'), + self.Dwarf_length('debug_info_length') + ) + + def _create_string_offsets_table_header(self): + self.Dwarf_string_offsets_table_header = Struct( + "Dwarf_string_offets_table_header", + self.Dwarf_initial_length('unit_length'), + self.Dwarf_uint16('version'), + self.Dwarf_uint16('padding'), + ) + + def _create_address_table_header(self): + self.Dwarf_address_table_header = Struct("Dwarf_address_table_header", + self.Dwarf_initial_length('unit_length'), + self.Dwarf_uint16('version'), + self.Dwarf_uint8('address_size'), + self.Dwarf_uint8('segment_selector_size'), + ) + def _create_lineprog_header(self): # A file entry is terminated by a NULL byte, so we don't want to parse # past it. Therefore an If is used. diff --git a/elftools/ehabi/__init__.py b/elftools/ehabi/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/elftools/ehabi/constants.py b/elftools/ehabi/constants.py new file mode 100644 index 00000000..2921b97e --- /dev/null +++ b/elftools/ehabi/constants.py @@ -0,0 +1 @@ +EHABI_INDEX_ENTRY_SIZE = 8 diff --git a/elftools/ehabi/decoder.py b/elftools/ehabi/decoder.py new file mode 100644 index 00000000..ce20f656 --- /dev/null +++ b/elftools/ehabi/decoder.py @@ -0,0 +1,284 @@ +# ------------------------------------------------------------------------------- +# elftools: ehabi/decoder.py +# +# Decode ARM exception handler bytecode. +# +# LeadroyaL (leadroyal@qq.com) +# This code is in the public domain +# ------------------------------------------------------------------------------- +from collections import namedtuple + + +class EHABIBytecodeDecoder(object): + """ Decoder of a sequence of ARM exception handler abi bytecode. + + Reference: + https://github.com/llvm/llvm-project/blob/master/llvm/tools/llvm-readobj/ARMEHABIPrinter.h + https://developer.arm.com/documentation/ihi0038/b/ + + Accessible attributes: + + mnemonic_array: + MnemonicItem array. + + Parameters: + + bytecode_array: + Integer array, raw data of bytecode. + + """ + + def __init__(self, bytecode_array): + self._bytecode_array = bytecode_array + self._index = None + self.mnemonic_array = None + self._decode() + + def _decode(self): + """ Decode bytecode array, put result into mnemonic_array. + """ + self._index = 0 + self.mnemonic_array = [] + while self._index < len(self._bytecode_array): + for mask, value, handler in self.ring: + if (self._bytecode_array[self._index] & mask) == value: + start_idx = self._index + mnemonic = handler(self) + end_idx = self._index + self.mnemonic_array.append( + MnemonicItem(self._bytecode_array[start_idx: end_idx], mnemonic)) + break + + def _decode_00xxxxxx(self): + # SW.startLine() << format("0x%02X ; vsp = vsp + %u\n", Opcode, + # ((Opcode & 0x3f) << 2) + 4); + opcode = self._bytecode_array[self._index] + self._index += 1 + return 'vsp = vsp + %u' % (((opcode & 0x3f) << 2) + 4) + + def _decode_01xxxxxx(self): + # SW.startLine() << format("0x%02X ; vsp = vsp - %u\n", Opcode, + # ((Opcode & 0x3f) << 2) + 4); + opcode = self._bytecode_array[self._index] + self._index += 1 + return 'vsp = vsp - %u' % (((opcode & 0x3f) << 2) + 4) + + gpr_register_names = ("r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", + "r8", "r9", "r10", "fp", "ip", "sp", "lr", "pc") + + def _calculate_range(self, start, count): + return ((1 << (count + 1)) - 1) << start + + def _printGPR(self, gpr_mask): + hits = [self.gpr_register_names[i] for i in range(32) if gpr_mask & (1 << i) != 0] + return '{%s}' % ', '.join(hits) + + def _print_registers(self, vfp_mask, prefix): + hits = [prefix + str(i) for i in range(32) if vfp_mask & (1 << i) != 0] + return '{%s}' % ', '.join(hits) + + def _decode_1000iiii_iiiiiiii(self): + op0 = self._bytecode_array[self._index] + self._index += 1 + op1 = self._bytecode_array[self._index] + self._index += 1 + # uint16_t GPRMask = (Opcode1 << 4) | ((Opcode0 & 0x0f) << 12); + # SW.startLine() + # << format("0x%02X 0x%02X ; %s", + # Opcode0, Opcode1, GPRMask ? "pop " : "refuse to unwind"); + # if (GPRMask) + # PrintGPR(GPRMask); + gpr_mask = (op1 << 4) | ((op0 & 0x0f) << 12) + if gpr_mask == 0: + return 'refuse to unwind' + else: + return 'pop %s' % self._printGPR(gpr_mask) + + def _decode_10011101(self): + self._index += 1 + return 'reserved (ARM MOVrr)' + + def _decode_10011111(self): + self._index += 1 + return 'reserved (WiMMX MOVrr)' + + def _decode_1001nnnn(self): + # SW.startLine() << format("0x%02X ; vsp = r%u\n", Opcode, (Opcode & 0x0f)); + opcode = self._bytecode_array[self._index] + self._index += 1 + return 'vsp = r%u' % (opcode & 0x0f) + + def _decode_10100nnn(self): + # SW.startLine() << format("0x%02X ; pop ", Opcode); + # PrintGPR((((1 << ((Opcode & 0x7) + 1)) - 1) << 4)); + opcode = self._bytecode_array[self._index] + self._index += 1 + return 'pop %s' % self._printGPR(self._calculate_range(4, opcode & 0x07)) + + def _decode_10101nnn(self): + # SW.startLine() << format("0x%02X ; pop ", Opcode); + # PrintGPR((((1 << ((Opcode & 0x7) + 1)) - 1) << 4) | (1 << 14)); + opcode = self._bytecode_array[self._index] + self._index += 1 + return 'pop %s' % self._printGPR(self._calculate_range(4, opcode & 0x07) | (1 << 14)) + + def _decode_10110000(self): + # SW.startLine() << format("0x%02X ; finish\n", Opcode); + self._index += 1 + return 'finish' + + def _decode_10110001_0000iiii(self): + # SW.startLine() + # << format("0x%02X 0x%02X ; %s", Opcode0, Opcode1, + # ((Opcode1 & 0xf0) || Opcode1 == 0x00) ? "spare" : "pop "); + # if (((Opcode1 & 0xf0) == 0x00) && Opcode1) + # PrintGPR((Opcode1 & 0x0f)); + self._index += 1 # skip constant byte + op1 = self._bytecode_array[self._index] + self._index += 1 + if (op1 & 0xf0) != 0 or op1 == 0x00: + return 'spare' + else: + return 'pop %s' % self._printGPR((op1 & 0x0f)) + + def _decode_10110010_uleb128(self): + # SmallVector ULEB; + # do { ULEB.push_back(Opcodes[OI ^ 3]); } while (Opcodes[OI++ ^ 3] & 0x80); + # uint64_t Value = 0; + # for (unsigned BI = 0, BE = ULEB.size(); BI != BE; ++BI) + # Value = Value | ((ULEB[BI] & 0x7f) << (7 * BI)); + # OS << format("; vsp = vsp + %" PRIu64 "\n", 0x204 + (Value << 2)); + self._index += 1 # skip constant byte + uleb_buffer = [self._bytecode_array[self._index]] + self._index += 1 + while self._bytecode_array[self._index] & 0x80 == 0: + uleb_buffer.append(self._bytecode_array[self._index]) + self._index += 1 + value = 0 + for b in reversed(uleb_buffer): + value = (value << 7) + (b & 0x7F) + return 'vsp = vsp + %u' % (0x204 + (value << 2)) + + def _decode_10110011_sssscccc(self): + # these two decoders are equal + return self._decode_11001001_sssscccc() + + def _decode_101101nn(self): + return self._spare() + + def _decode_10111nnn(self): + # SW.startLine() << format("0x%02X ; pop ", Opcode); + # PrintRegisters((((1 << ((Opcode & 0x07) + 1)) - 1) << 8), "d"); + opcode = self._bytecode_array[self._index] + self._index += 1 + return 'pop %s' % self._print_registers(self._calculate_range(8, opcode & 0x07), "d") + + def _decode_11000110_sssscccc(self): + # SW.startLine() << format("0x%02X 0x%02X ; pop ", Opcode0, Opcode1); + # uint8_t Start = ((Opcode1 & 0xf0) >> 4); + # uint8_t Count = ((Opcode1 & 0x0f) >> 0); + # PrintRegisters((((1 << (Count + 1)) - 1) << Start), "wR"); + self._index += 1 # skip constant byte + op1 = self._bytecode_array[self._index] + self._index += 1 + start = ((op1 & 0xf0) >> 4) + count = ((op1 & 0x0f) >> 0) + return 'pop %s' % self._print_registers(self._calculate_range(start, count), "wR") + + def _decode_11000111_0000iiii(self): + # SW.startLine() + # << format("0x%02X 0x%02X ; %s", Opcode0, Opcode1, + # ((Opcode1 & 0xf0) || Opcode1 == 0x00) ? "spare" : "pop "); + # if ((Opcode1 & 0xf0) == 0x00 && Opcode1) + # PrintRegisters(Opcode1 & 0x0f, "wCGR"); + self._index += 1 # skip constant byte + op1 = self._bytecode_array[self._index] + self._index += 1 + if (op1 & 0xf0) != 0 or op1 == 0x00: + return 'spare' + else: + return 'pop %s' % self._print_registers(op1 & 0x0f, "wCGR") + + def _decode_11001000_sssscccc(self): + # SW.startLine() << format("0x%02X 0x%02X ; pop ", Opcode0, Opcode1); + # uint8_t Start = 16 + ((Opcode1 & 0xf0) >> 4); + # uint8_t Count = ((Opcode1 & 0x0f) >> 0); + # PrintRegisters((((1 << (Count + 1)) - 1) << Start), "d"); + self._index += 1 # skip constant byte + op1 = self._bytecode_array[self._index] + self._index += 1 + start = 16 + ((op1 & 0xf0) >> 4) + count = ((op1 & 0x0f) >> 0) + return 'pop %s' % self._print_registers(self._calculate_range(start, count), "d") + + def _decode_11001001_sssscccc(self): + # SW.startLine() << format("0x%02X 0x%02X ; pop ", Opcode0, Opcode1); + # uint8_t Start = ((Opcode1 & 0xf0) >> 4); + # uint8_t Count = ((Opcode1 & 0x0f) >> 0); + # PrintRegisters((((1 << (Count + 1)) - 1) << Start), "d"); + self._index += 1 # skip constant byte + op1 = self._bytecode_array[self._index] + self._index += 1 + start = ((op1 & 0xf0) >> 4) + count = ((op1 & 0x0f) >> 0) + return 'pop %s' % self._print_registers(self._calculate_range(start, count), "d") + + def _decode_11001yyy(self): + return self._spare() + + def _decode_11000nnn(self): + # SW.startLine() << format("0x%02X ; pop ", Opcode); + # PrintRegisters((((1 << ((Opcode & 0x07) + 1)) - 1) << 10), "wR"); + opcode = self._bytecode_array[self._index] + self._index += 1 + return 'pop %s' % self._print_registers(self._calculate_range(10, opcode & 0x07), "wR") + + def _decode_11010nnn(self): + # these two decoders are equal + return self._decode_10111nnn() + + def _decode_11xxxyyy(self): + return self._spare() + + def _spare(self): + self._index += 1 + return 'spare' + + _DECODE_RECIPE_TYPE = namedtuple('_DECODE_RECIPE_TYPE', 'mask value handler') + + ring = ( + _DECODE_RECIPE_TYPE(mask=0xc0, value=0x00, handler=_decode_00xxxxxx), + _DECODE_RECIPE_TYPE(mask=0xc0, value=0x40, handler=_decode_01xxxxxx), + _DECODE_RECIPE_TYPE(mask=0xf0, value=0x80, handler=_decode_1000iiii_iiiiiiii), + _DECODE_RECIPE_TYPE(mask=0xff, value=0x9d, handler=_decode_10011101), + _DECODE_RECIPE_TYPE(mask=0xff, value=0x9f, handler=_decode_10011111), + _DECODE_RECIPE_TYPE(mask=0xf0, value=0x90, handler=_decode_1001nnnn), + _DECODE_RECIPE_TYPE(mask=0xf8, value=0xa0, handler=_decode_10100nnn), + _DECODE_RECIPE_TYPE(mask=0xf8, value=0xa8, handler=_decode_10101nnn), + _DECODE_RECIPE_TYPE(mask=0xff, value=0xb0, handler=_decode_10110000), + _DECODE_RECIPE_TYPE(mask=0xff, value=0xb1, handler=_decode_10110001_0000iiii), + _DECODE_RECIPE_TYPE(mask=0xff, value=0xb2, handler=_decode_10110010_uleb128), + _DECODE_RECIPE_TYPE(mask=0xff, value=0xb3, handler=_decode_10110011_sssscccc), + _DECODE_RECIPE_TYPE(mask=0xfc, value=0xb4, handler=_decode_101101nn), + _DECODE_RECIPE_TYPE(mask=0xf8, value=0xb8, handler=_decode_10111nnn), + _DECODE_RECIPE_TYPE(mask=0xff, value=0xc6, handler=_decode_11000110_sssscccc), + _DECODE_RECIPE_TYPE(mask=0xff, value=0xc7, handler=_decode_11000111_0000iiii), + _DECODE_RECIPE_TYPE(mask=0xff, value=0xc8, handler=_decode_11001000_sssscccc), + _DECODE_RECIPE_TYPE(mask=0xff, value=0xc9, handler=_decode_11001001_sssscccc), + _DECODE_RECIPE_TYPE(mask=0xc8, value=0xc8, handler=_decode_11001yyy), + _DECODE_RECIPE_TYPE(mask=0xf8, value=0xc0, handler=_decode_11000nnn), + _DECODE_RECIPE_TYPE(mask=0xf8, value=0xd0, handler=_decode_11010nnn), + _DECODE_RECIPE_TYPE(mask=0xc0, value=0xc0, handler=_decode_11xxxyyy), + ) + + +class MnemonicItem(object): + """ Single mnemonic item. + """ + + def __init__(self, bytecode, mnemonic): + self.bytecode = bytecode + self.mnemonic = mnemonic + + def __repr__(self): + return '%s ; %s' % (' '.join(['0x%02x' % x for x in self.bytecode]), self.mnemonic) diff --git a/elftools/ehabi/ehabiinfo.py b/elftools/ehabi/ehabiinfo.py new file mode 100644 index 00000000..415566cf --- /dev/null +++ b/elftools/ehabi/ehabiinfo.py @@ -0,0 +1,209 @@ +# ------------------------------------------------------------------------------- +# elftools: ehabi/ehabiinfo.py +# +# Decoder for ARM exception handler bytecode. +# +# LeadroyaL (leadroyal@qq.com) +# This code is in the public domain +# ------------------------------------------------------------------------------- + +from ..common.utils import struct_parse + +from .decoder import EHABIBytecodeDecoder +from .constants import EHABI_INDEX_ENTRY_SIZE +from .structs import EHABIStructs + + +class EHABIInfo(object): + """ ARM exception handler abi information class. + + Parameters: + + arm_idx_section: + elf.sections.Section object, section which type is SHT_ARM_EXIDX. + + little_endian: + bool, endianness of elf file. + """ + + def __init__(self, arm_idx_section, little_endian): + self._arm_idx_section = arm_idx_section + self._struct = EHABIStructs(little_endian) + self._num_entry = None + + def section_name(self): + return self._arm_idx_section.name + + def section_offset(self): + return self._arm_idx_section['sh_offset'] + + def num_entry(self): + """ Number of exception handler entry in the section. + """ + if self._num_entry is None: + self._num_entry = self._arm_idx_section['sh_size'] // EHABI_INDEX_ENTRY_SIZE + return self._num_entry + + def get_entry(self, n): + """ Get the exception handler entry at index #n. (EHABIEntry object or a subclass) + """ + if n >= self.num_entry(): + raise IndexError('Invalid entry %d/%d' % (n, self._num_entry)) + eh_index_entry_offset = self.section_offset() + n * EHABI_INDEX_ENTRY_SIZE + eh_index_data = struct_parse(self._struct.EH_index_struct, self._arm_idx_section.stream, eh_index_entry_offset) + word0, word1 = eh_index_data['word0'], eh_index_data['word1'] + + if word0 & 0x80000000 != 0: + return CorruptEHABIEntry('Corrupt ARM exception handler table entry: %x' % n) + + function_offset = arm_expand_prel31(word0, self.section_offset() + n * EHABI_INDEX_ENTRY_SIZE) + + if word1 == 1: + # 0x1 means cannot unwind + return CannotUnwindEHABIEntry(function_offset) + elif word1 & 0x80000000 == 0: + # highest bit is zero, point to .ARM.extab data + eh_table_offset = arm_expand_prel31(word1, self.section_offset() + n * EHABI_INDEX_ENTRY_SIZE + 4) + eh_index_data = struct_parse(self._struct.EH_table_struct, self._arm_idx_section.stream, eh_table_offset) + word0 = eh_index_data['word0'] + if word0 & 0x80000000 == 0: + # highest bit is one, generic model + return GenericEHABIEntry(function_offset, arm_expand_prel31(word0, eh_table_offset)) + else: + # highest bit is one, arm compact model + # highest half must be 0b1000 for compact model + if word0 & 0x70000000 != 0: + return CorruptEHABIEntry('Corrupt ARM compact model table entry: %x' % n) + per_index = (word0 >> 24) & 0x7f + if per_index == 0: + # arm compact model 0 + opcode = [(word0 & 0xFF0000) >> 16, (word0 & 0xFF00) >> 8, word0 & 0xFF] + return EHABIEntry(function_offset, per_index, opcode) + elif per_index == 1 or per_index == 2: + # arm compact model 1/2 + more_word = (word0 >> 16) & 0xff + opcode = [(word0 >> 8) & 0xff, (word0 >> 0) & 0xff] + self._arm_idx_section.stream.seek(eh_table_offset + 4) + for i in range(more_word): + r = struct_parse(self._struct.EH_table_struct, self._arm_idx_section.stream)['word0'] + opcode.append((r >> 24) & 0xFF) + opcode.append((r >> 16) & 0xFF) + opcode.append((r >> 8) & 0xFF) + opcode.append((r >> 0) & 0xFF) + return EHABIEntry(function_offset, per_index, opcode, eh_table_offset=eh_table_offset) + else: + return CorruptEHABIEntry('Unknown ARM compact model %d at table entry: %x' % (per_index, n)) + else: + # highest bit is one, compact model must be 0 + if word1 & 0x7f000000 != 0: + return CorruptEHABIEntry('Corrupt ARM compact model table entry: %x' % n) + opcode = [(word1 & 0xFF0000) >> 16, (word1 & 0xFF00) >> 8, word1 & 0xFF] + return EHABIEntry(function_offset, 0, opcode) + + +class EHABIEntry(object): + """ Exception handler abi entry. + + Accessible attributes: + + function_offset: + Integer. + None if corrupt. (Reference: CorruptEHABIEntry) + + personality: + Integer. + None if corrupt or unwindable. (Reference: CorruptEHABIEntry, CannotUnwindEHABIEntry) + 0/1/2 for ARM personality compact format. + Others for generic personality. + + bytecode_array: + Integer array. + None if corrupt or unwindable or generic personality. + (Reference: CorruptEHABIEntry, CannotUnwindEHABIEntry, GenericEHABIEntry) + + eh_table_offset: + Integer. + Only entries who point to .ARM.extab contains this field, otherwise return None. + + unwindable: + bool. Whether this function is unwindable. + + corrupt: + bool. Whether this entry is corrupt. + + """ + + def __init__(self, + function_offset, + personality, + bytecode_array, + eh_table_offset=None, + unwindable=True, + corrupt=False): + self.function_offset = function_offset + self.personality = personality + self.bytecode_array = bytecode_array + self.eh_table_offset = eh_table_offset + self.unwindable = unwindable + self.corrupt = corrupt + + def mnmemonic_array(self): + if self.bytecode_array: + return EHABIBytecodeDecoder(self.bytecode_array).mnemonic_array + else: + return None + + def __repr__(self): + return "" % ( + self.function_offset, + self.personality, + "eh_table_offset=0x%x, " % self.eh_table_offset if self.eh_table_offset else "", + self.bytecode_array) + + +class CorruptEHABIEntry(EHABIEntry): + """ This entry is corrupt. Attribute #corrupt will be True. + """ + + def __init__(self, reason): + super(CorruptEHABIEntry, self).__init__(function_offset=None, personality=None, bytecode_array=None, + corrupt=True) + self.reason = reason + + def __repr__(self): + return "" % self.reason + + +class CannotUnwindEHABIEntry(EHABIEntry): + """ This function cannot be unwind. Attribute #unwindable will be False. + """ + + def __init__(self, function_offset): + super(CannotUnwindEHABIEntry, self).__init__(function_offset, personality=None, bytecode_array=None, + unwindable=False) + + def __repr__(self): + return "" % self.function_offset + + +class GenericEHABIEntry(EHABIEntry): + """ This entry is generic model rather than ARM compact model.Attribute #bytecode_array will be None. + """ + + def __init__(self, function_offset, personality): + super(GenericEHABIEntry, self).__init__(function_offset, personality, bytecode_array=None) + + def __repr__(self): + return "" % (self.function_offset, self.personality) + + +def arm_expand_prel31(address, place): + """ + address: uint32 + place: uint32 + return: uint64 + """ + location = address & 0x7fffffff + if location & 0x04000000: + location |= 0xffffffff80000000 + return location + place & 0xffffffffffffffff diff --git a/elftools/ehabi/structs.py b/elftools/ehabi/structs.py new file mode 100644 index 00000000..35ceaf34 --- /dev/null +++ b/elftools/ehabi/structs.py @@ -0,0 +1,47 @@ +# ------------------------------------------------------------------------------- +# elftools: ehabi/structs.py +# +# Encapsulation of Construct structs for parsing an EHABI, adjusted for +# correct endianness and word-size. +# +# LeadroyaL (leadroyal@qq.com) +# This code is in the public domain +# ------------------------------------------------------------------------------- + +from ..construct import UBInt32, ULInt32, Struct + + +class EHABIStructs(object): + """ Accessible attributes: + + EH_index_struct: + Struct of item in section .ARM.exidx. + + EH_table_struct: + Struct of item in section .ARM.extab. + """ + + def __init__(self, little_endian): + self._little_endian = little_endian + self._create_structs() + + def _create_structs(self): + if self._little_endian: + self.EHABI_uint32 = ULInt32 + else: + self.EHABI_uint32 = UBInt32 + self._create_exception_handler_index() + self._create_exception_handler_table() + + def _create_exception_handler_index(self): + self.EH_index_struct = Struct( + 'EH_index', + self.EHABI_uint32('word0'), + self.EHABI_uint32('word1') + ) + + def _create_exception_handler_table(self): + self.EH_table_struct = Struct( + 'EH_table', + self.EHABI_uint32('word0'), + ) diff --git a/elftools/elf/constants.py b/elftools/elf/constants.py index f9023c9b..fc55aaca 100644 --- a/elftools/elf/constants.py +++ b/elftools/elf/constants.py @@ -27,6 +27,10 @@ class E_FLAGS(object): EF_ARM_ABI_FLOAT_SOFT=0x00000200 EF_ARM_ABI_FLOAT_HARD=0x00000400 + EF_PPC64_ABI_V0=0 + EF_PPC64_ABI_V1=1 + EF_PPC64_ABI_V2=2 + EF_MIPS_NOREORDER=1 EF_MIPS_PIC=2 EF_MIPS_CPIC=4 @@ -71,6 +75,7 @@ class SHN_INDICES(object): SHN_ABS=0xfff1 SHN_COMMON=0xfff2 SHN_HIRESERVE=0xffff + SHN_XINDEX=0xffff class SH_FLAGS(object): @@ -92,6 +97,27 @@ class SH_FLAGS(object): SHF_MASKPROC=0xf0000000 +class RH_FLAGS(object): + """ Flag values for the DT_MIPS_FLAGS dynamic table entries + """ + RHF_NONE=0x00000000 + RHF_QUICKSTART=0x00000001 + RHF_NOTPOT=0x00000002 + RHF_NO_LIBRARY_REPLACEMENT=0x00000004 + RHF_NO_MOVE=0x00000008 + RHF_SGI_ONLY=0x00000010 + RHF_GUARANTEE_INIT=0x00000020 + RHF_DELTA_C_PLUS_PLUS=0x00000040 + RHF_GUARANTEE_START_INIT=0x00000080 + RHF_PIXIE=0x00000100 + RHF_DEFAULT_DELAY_LOAD=0x00000200 + RHF_REQUICKSTART=0x00000400 + RHF_REQUICKSTARTED=0x00000800 + RHF_CORD=0x00001000 + RHF_NO_UNRES_UNDEF=0x00002000 + RHF_RLD_ORDER_SAFE=0x00004000 + + class P_FLAGS(object): """ Flag values for the p_flags field of program headers """ @@ -122,4 +148,4 @@ class SUNW_SYMINFO_FLAGS(object): class VER_FLAGS(object): VER_FLG_BASE=0x1 VER_FLG_WEAK=0x2 - VER_FLG_INFO=0x4 + VER_FLG_INFO=0x4 diff --git a/elftools/elf/descriptions.py b/elftools/elf/descriptions.py index b76238a1..0ccc9a19 100644 --- a/elftools/elf/descriptions.py +++ b/elftools/elf/descriptions.py @@ -9,10 +9,11 @@ from .enums import ( ENUM_D_TAG, ENUM_E_VERSION, ENUM_P_TYPE_BASE, ENUM_SH_TYPE_BASE, ENUM_RELOC_TYPE_i386, ENUM_RELOC_TYPE_x64, - ENUM_RELOC_TYPE_ARM, ENUM_RELOC_TYPE_AARCH64, ENUM_RELOC_TYPE_MIPS, - ENUM_ATTR_TAG_ARM) -from .constants import P_FLAGS, SH_FLAGS, SUNW_SYMINFO_FLAGS, VER_FLAGS -from ..common.py3compat import iteritems + ENUM_RELOC_TYPE_ARM, ENUM_RELOC_TYPE_AARCH64, ENUM_RELOC_TYPE_PPC64, + ENUM_RELOC_TYPE_MIPS, ENUM_ATTR_TAG_ARM, ENUM_DT_FLAGS, ENUM_DT_FLAGS_1) +from .constants import ( + P_FLAGS, RH_FLAGS, SH_FLAGS, SUNW_SYMINFO_FLAGS, VER_FLAGS) +from ..common.py3compat import bytes2hex, iteritems def describe_ei_class(x): @@ -34,7 +35,13 @@ def describe_ei_osabi(x): return _DESCR_EI_OSABI.get(x, _unknown) -def describe_e_type(x): +def describe_e_type(x, elffile=None): + if elffile is not None and x == 'ET_DYN': + # Detect whether this is a normal SO or a PIE executable + dynamic = elffile.get_section_by_name('.dynamic') + for t in dynamic.iter_tags('DT_FLAGS_1'): + if t.entry.d_val & ENUM_DT_FLAGS_1['DF_1_PIE']: + return 'DYN (Position-Independent Executable file)' return _DESCR_E_TYPE.get(x, _unknown) @@ -62,12 +69,28 @@ def describe_p_flags(x): return s +def describe_rh_flags(x): + return ' '.join( + _DESCR_RH_FLAGS[flag] + for flag in (RH_FLAGS.RHF_NONE, RH_FLAGS.RHF_QUICKSTART, + RH_FLAGS.RHF_NOTPOT, RH_FLAGS.RHF_NO_LIBRARY_REPLACEMENT, + RH_FLAGS.RHF_NO_MOVE, RH_FLAGS.RHF_SGI_ONLY, + RH_FLAGS.RHF_GUARANTEE_INIT, + RH_FLAGS.RHF_DELTA_C_PLUS_PLUS, + RH_FLAGS.RHF_GUARANTEE_START_INIT, RH_FLAGS.RHF_PIXIE, + RH_FLAGS.RHF_DEFAULT_DELAY_LOAD, + RH_FLAGS.RHF_REQUICKSTART, RH_FLAGS.RHF_REQUICKSTARTED, + RH_FLAGS.RHF_CORD, RH_FLAGS.RHF_NO_UNRES_UNDEF, + RH_FLAGS.RHF_RLD_ORDER_SAFE) + if x & flag) + + def describe_sh_type(x): if x in _DESCR_SH_TYPE: return _DESCR_SH_TYPE.get(x) elif (x >= ENUM_SH_TYPE_BASE['SHT_LOOS'] and x < ENUM_SH_TYPE_BASE['SHT_GNU_versym']): - return 'loos+%lx' % (x - ENUM_SH_TYPE_BASE['SHT_LOOS']) + return 'loos+0x%lx' % (x - ENUM_SH_TYPE_BASE['SHT_LOOS']) else: return _unknown @@ -78,8 +101,12 @@ def describe_sh_flags(x): SH_FLAGS.SHF_WRITE, SH_FLAGS.SHF_ALLOC, SH_FLAGS.SHF_EXECINSTR, SH_FLAGS.SHF_MERGE, SH_FLAGS.SHF_STRINGS, SH_FLAGS.SHF_INFO_LINK, SH_FLAGS.SHF_LINK_ORDER, SH_FLAGS.SHF_OS_NONCONFORMING, - SH_FLAGS.SHF_GROUP, SH_FLAGS.SHF_TLS, SH_FLAGS.SHF_EXCLUDE): + SH_FLAGS.SHF_GROUP, SH_FLAGS.SHF_TLS, SH_FLAGS.SHF_MASKOS, + SH_FLAGS.SHF_EXCLUDE): s += _DESCR_SH_FLAGS[flag] if (x & flag) else '' + if not x & SH_FLAGS.SHF_EXCLUDE: + if x & SH_FLAGS.SHF_MASKPROC: + s += 'p' return s @@ -95,6 +122,17 @@ def describe_symbol_visibility(x): return _DESCR_ST_VISIBILITY.get(x, _unknown) +def describe_symbol_local(x): + return '[: ' + str(1 << x) + ']' + + +def describe_symbol_other(x): + vis = describe_symbol_visibility(x['visibility']) + if x['local'] > 1 and x['local'] < 7: + return vis + ' ' + describe_symbol_local(x['local']) + return vis + + def describe_symbol_shndx(x): return _DESCR_ST_SHNDX.get(x, '%3s' % x) @@ -109,6 +147,8 @@ def describe_reloc_type(x, elffile): return _DESCR_RELOC_TYPE_ARM.get(x, _unknown) elif arch == 'AArch64': return _DESCR_RELOC_TYPE_AARCH64.get(x, _unknown) + elif arch == '64-bit PowerPC': + return _DESCR_RELOC_TYPE_PPC64.get(x, _unknown) elif arch == 'MIPS': return _DESCR_RELOC_TYPE_MIPS.get(x, _unknown) else: @@ -119,6 +159,16 @@ def describe_dyn_tag(x): return _DESCR_D_TAG.get(x, _unknown) +def describe_dt_flags(x): + return ' '.join(key[3:] for key, val in + sorted(ENUM_DT_FLAGS.items(), key=lambda t: t[1]) if x & val) + + +def describe_dt_flags_1(x): + return ' '.join(key[5:] for key, val in + sorted(ENUM_DT_FLAGS_1.items(), key=lambda t: t[1]) if x & val) + + def describe_syminfo_flags(x): return ''.join(_DESCR_SYMINFO_FLAGS[flag] for flag in ( SUNW_SYMINFO_FLAGS.SYMINFO_FLG_CAP, @@ -148,21 +198,30 @@ def describe_note(x): n_desc = x['n_desc'] desc = '' if x['n_type'] == 'NT_GNU_ABI_TAG': - desc = '\n OS: %s, ABI: %d.%d.%d' % ( - _DESCR_NOTE_ABI_TAG_OS.get(n_desc['abi_os'], _unknown), - n_desc['abi_major'], n_desc['abi_minor'], n_desc['abi_tiny']) + if x['n_name'] == 'Android': + desc = '\n description data: %s ' % bytes2hex(x['n_descdata']) + else: + desc = '\n OS: %s, ABI: %d.%d.%d' % ( + _DESCR_NOTE_ABI_TAG_OS.get(n_desc['abi_os'], _unknown), + n_desc['abi_major'], n_desc['abi_minor'], n_desc['abi_tiny']) elif x['n_type'] == 'NT_GNU_BUILD_ID': desc = '\n Build ID: %s' % (n_desc) + elif x['n_type'] == 'NT_GNU_GOLD_VERSION': + desc = '\n Version: %s' % (n_desc) + elif x['n_type'] == 'NT_GNU_PROPERTY_TYPE_0': + desc = '\n Properties: ' + describe_note_gnu_properties(x['n_desc']) else: - desc = '\n description data: {}'.format(' '.join( - '{:02x}'.format(ord(byte)) for byte in n_desc - )) - - note_type = (x['n_type'] if isinstance(x['n_type'], str) - else 'Unknown note type:') - note_type_desc = ('0x%.8x' % x['n_type'] - if isinstance(x['n_type'], int) else - _DESCR_NOTE_N_TYPE.get(x['n_type'], _unknown)) + desc = '\n description data: {}'.format(bytes2hex(n_desc)) + + if x['n_type'] == 'NT_GNU_ABI_TAG' and x['n_name'] == 'Android': + note_type = 'NT_VERSION' + note_type_desc = 'version' + else: + note_type = (x['n_type'] if isinstance(x['n_type'], str) + else 'Unknown note type:') + note_type_desc = ('0x%.8x' % x['n_type'] + if isinstance(x['n_type'], int) else + _DESCR_NOTE_N_TYPE.get(x['n_type'], _unknown)) return '%s (%s)%s' % (note_type, note_type_desc, desc) @@ -184,7 +243,7 @@ def describe_attr_tag_arm(tag, val, extra): elif tag == 'TAG_NODEFAULTS': return _DESCR_ATTR_TAG_ARM[tag] + 'True' - + s = _DESCR_ATTR_TAG_ARM[tag] s += '"%s"' % val if val else '' return s @@ -193,6 +252,41 @@ def describe_attr_tag_arm(tag, val, extra): return _DESCR_ATTR_TAG_ARM[tag] + d_entry[val] +def describe_note_gnu_property_x86_feature_1(value): + descs = [] + for mask, desc in _DESCR_NOTE_GNU_PROPERTY_X86_FEATURE_1_FLAGS: + if value & mask: + descs.append(desc) + return 'x86 feature: ' + ', '.join(descs) + +def describe_note_gnu_properties(properties): + descriptions = [] + for prop in properties: + t, d, sz = prop.pr_type, prop.pr_data, prop.pr_datasz + if t == 'GNU_PROPERTY_STACK_SIZE': + if type(d) is int: + prop_desc = 'stack size: 0x%x' % d + else: + prop_desc = 'stack size: ' % sz + elif t == 'GNU_PROPERTY_NO_COPY_ON_PROTECTED': + if sz != 0: + prop_desc = ' ' % sz + else: + prop_desc = 'no copy on protected' + elif t == 'GNU_PROPERTY_X86_FEATURE_1_AND': + if sz != 4: + prop_desc = ' ' % sz + else: + prop_desc = describe_note_gnu_property_x86_feature_1(d) + elif _DESCR_NOTE_GNU_PROPERTY_TYPE_LOPROC <= t <= _DESCR_NOTE_GNU_PROPERTY_TYPE_HIPROC: + prop_desc = '' % (t, bytes2hex(d, sep=' ')) + elif _DESCR_NOTE_GNU_PROPERTY_TYPE_LOUSER <= t <= _DESCR_NOTE_GNU_PROPERTY_TYPE_HIUSER: + prop_desc = '' % (t, bytes2hex(d, sep=' ')) + else: + prop_desc = '' % (t, bytes2hex(d, sep=' ')) + descriptions.append(prop_desc) + return '\n '.join(descriptions) + #------------------------------------------------------------------------------- _unknown = '' @@ -232,6 +326,7 @@ def describe_attr_tag_arm(tag, val, extra): ELFOSABI_SORTIX='Sortix', ELFOSABI_ARM_AEABI='ARM - EABI', ELFOSABI_ARM='ARM - ABI', + ELFOSABI_CELL_LV2='CellOS Lv-2', ELFOSABI_STANDALONE='Standalone App', ) @@ -264,6 +359,7 @@ def describe_attr_tag_arm(tag, val, extra): EM_AARCH64='AArch64', EM_BLACKFIN='Analog Devices Blackfin', EM_PPC='PowerPC', + EM_PPC64='PowerPC64', RESERVED='RESERVED', ) @@ -279,6 +375,7 @@ def describe_attr_tag_arm(tag, val, extra): PT_GNU_EH_FRAME='GNU_EH_FRAME', PT_GNU_STACK='GNU_STACK', PT_GNU_RELRO='GNU_RELRO', + PT_GNU_PROPERTY='GNU_PROPERTY', PT_ARM_ARCHEXT='ARM_ARCHEXT', PT_ARM_EXIDX='EXIDX', # binutils calls this EXIDX, not ARM_EXIDX PT_AARCH64_ARCHEXT='AARCH64_ARCHEXT', @@ -315,11 +412,12 @@ def describe_attr_tag_arm(tag, val, extra): SHT_GNU_HASH='GNU_HASH', SHT_GROUP='GROUP', SHT_SYMTAB_SHNDX='SYMTAB SECTION INDICIES', + SHT_RELR='RELR', SHT_GNU_verdef='VERDEF', SHT_GNU_verneed='VERNEED', SHT_GNU_versym='VERSYM', SHT_GNU_LIBLIST='GNU_LIBLIST', - SHT_ARM_EXIDX='EXIDX', # binutils calls this EXIDX, not ARM_EXIDX + SHT_ARM_EXIDX='ARM_EXIDX', SHT_ARM_PREEMPTMAP='ARM_PREEMPTMAP', SHT_ARM_ATTRIBUTES='ARM_ATTRIBUTES', SHT_ARM_DEBUGOVERLAY='ARM_DEBUGOVERLAY', @@ -358,6 +456,7 @@ def describe_attr_tag_arm(tag, val, extra): SHT_MIPS_EH_REGION='MIPS_EH_REGION', SHT_MIPS_XLATE_OLD='MIPS_XLATE_OLD', SHT_MIPS_PDR_EXCEPTION='MIPS_PDR_EXCEPTION', + SHT_MIPS_ABIFLAGS='MIPS_ABIFLAGS', ) @@ -372,10 +471,31 @@ def describe_attr_tag_arm(tag, val, extra): SH_FLAGS.SHF_OS_NONCONFORMING: 'O', SH_FLAGS.SHF_GROUP: 'G', SH_FLAGS.SHF_TLS: 'T', + SH_FLAGS.SHF_MASKOS: 'o', SH_FLAGS.SHF_EXCLUDE: 'E', } +_DESCR_RH_FLAGS = { + RH_FLAGS.RHF_NONE: 'NONE', + RH_FLAGS.RHF_QUICKSTART: 'QUICKSTART', + RH_FLAGS.RHF_NOTPOT: 'NOTPOT', + RH_FLAGS.RHF_NO_LIBRARY_REPLACEMENT: 'NO_LIBRARY_REPLACEMENT', + RH_FLAGS.RHF_NO_MOVE: 'NO_MOVE', + RH_FLAGS.RHF_SGI_ONLY: 'SGI_ONLY', + RH_FLAGS.RHF_GUARANTEE_INIT: 'GUARANTEE_INIT', + RH_FLAGS.RHF_DELTA_C_PLUS_PLUS: 'DELTA_C_PLUS_PLUS', + RH_FLAGS.RHF_GUARANTEE_START_INIT: 'GUARANTEE_START_INIT', + RH_FLAGS.RHF_PIXIE: 'PIXIE', + RH_FLAGS.RHF_DEFAULT_DELAY_LOAD: 'DEFAULT_DELAY_LOAD', + RH_FLAGS.RHF_REQUICKSTART: 'REQUICKSTART', + RH_FLAGS.RHF_REQUICKSTARTED: 'REQUICKSTARTED', + RH_FLAGS.RHF_CORD: 'CORD', + RH_FLAGS.RHF_NO_UNRES_UNDEF: 'NO_UNRES_UNDEF', + RH_FLAGS.RHF_RLD_ORDER_SAFE: 'RLD_ORDER_SAFE', +} + + _DESCR_ST_INFO_TYPE = dict( STT_NOTYPE='NOTYPE', STT_OBJECT='OBJECT', @@ -451,6 +571,7 @@ def describe_attr_tag_arm(tag, val, extra): NT_GNU_HWCAP='DSO-supplied software HWCAP info', NT_GNU_BUILD_ID='unique build ID bitstring', NT_GNU_GOLD_VERSION='gold version', + NT_GNU_PROPERTY_TYPE_0='program properties' ) @@ -464,6 +585,25 @@ def describe_attr_tag_arm(tag, val, extra): ELF_NOTE_OS_SYLLABLE='Syllable', ) + +# Values in GNU .note.gnu.property notes (n_type=='NT_GNU_PROPERTY_TYPE_0') have +# different formats which need to be parsed/described differently +_DESCR_NOTE_GNU_PROPERTY_TYPE_LOPROC=0xc0000000 +_DESCR_NOTE_GNU_PROPERTY_TYPE_HIPROC=0xdfffffff +_DESCR_NOTE_GNU_PROPERTY_TYPE_LOUSER=0xe0000000 +_DESCR_NOTE_GNU_PROPERTY_TYPE_HIUSER=0xffffffff + + +# Bit masks for GNU_PROPERTY_X86_FEATURE_1_xxx flags in the form +# (mask, flag_description) in the desired output order +_DESCR_NOTE_GNU_PROPERTY_X86_FEATURE_1_FLAGS = ( + (1, 'IBT'), + (2, 'SHSTK'), + (4, 'LAM_U48'), + (8, 'LAM_U57'), +) + + def _reverse_dict(d, low_priority=()): """ This is a tiny helper function to "reverse" the keys/values of a dictionary @@ -484,6 +624,7 @@ def _reverse_dict(d, low_priority=()): _DESCR_RELOC_TYPE_x64 = _reverse_dict(ENUM_RELOC_TYPE_x64) _DESCR_RELOC_TYPE_ARM = _reverse_dict(ENUM_RELOC_TYPE_ARM) _DESCR_RELOC_TYPE_AARCH64 = _reverse_dict(ENUM_RELOC_TYPE_AARCH64) +_DESCR_RELOC_TYPE_PPC64 = _reverse_dict(ENUM_RELOC_TYPE_PPC64) _DESCR_RELOC_TYPE_MIPS = _reverse_dict(ENUM_RELOC_TYPE_MIPS) _low_priority_D_TAG = ( diff --git a/elftools/elf/dynamic.py b/elftools/elf/dynamic.py index 92822844..2f85333e 100644 --- a/elftools/elf/dynamic.py +++ b/elftools/elf/dynamic.py @@ -8,10 +8,14 @@ #------------------------------------------------------------------------------- import itertools +from collections import defaultdict +from .hash import ELFHashTable, GNUHashTable from .sections import Section, Symbol +from .enums import ENUM_D_TAG from .segments import Segment +from .relocation import RelocationTable from ..common.exceptions import ELFError -from ..common.utils import struct_parse, parse_cstring_from_stream +from ..common.utils import elf_assert, struct_parse, parse_cstring_from_stream class _DynamicStringTable(object): @@ -25,8 +29,8 @@ def __init__(self, stream, table_offset): def get_string(self, offset): """ Get the string stored at the given offset in this string table. """ - return parse_cstring_from_stream(self._stream, - self._table_offset + offset) + s = parse_cstring_from_stream(self._stream, self._table_offset + offset) + return s.decode('utf-8') if s else '' class DynamicTag(object): @@ -69,13 +73,32 @@ def __str__(self): class Dynamic(object): """ Shared functionality between dynamic sections and segments. """ - def __init__(self, stream, elffile, stringtable, position): + def __init__(self, stream, elffile, stringtable, position, empty): + """ + stream: + The file-like object from which to load data + + elffile: + The parent elffile object + + stringtable: + A stringtable reference to use for parsing string references in + entries + + position: + The file offset of the dynamic segment/section + + empty: + Whether this is a degenerate case with zero entries. Normally, every + dynamic table will have at least one entry, the DT_NULL terminator. + """ self.elffile = elffile self.elfstructs = elffile.structs self._stream = stream - self._num_tags = -1 + self._num_tags = -1 if not empty else 0 self._offset = position self._tagsize = self.elfstructs.Elf_Dyn.sizeof() + self._empty = empty # Do not access this directly yourself; use _get_stringtable() instead. self._stringtable = stringtable @@ -121,6 +144,8 @@ def _get_stringtable(self): def _iter_tags(self, type=None): """ Yield all raw tags (limit to |type| if specified) """ + if self._empty: + return for n in itertools.count(): tag = self._get_tag(n) if type is None or tag['d_tag'] == type: @@ -137,6 +162,8 @@ def iter_tags(self, type=None): def _get_tag(self, n): """ Get the raw tag at index #n from the file """ + if self._num_tags != -1 and n >= self._num_tags: + raise IndexError(n) offset = self._offset + n * self._tagsize return struct_parse( self.elfstructs.Elf_Dyn, @@ -149,7 +176,7 @@ def get_tag(self, n): return DynamicTag(self._get_tag(n), self._get_stringtable()) def num_tags(self): - """ Number of dynamic tags in the file + """ Number of dynamic tags in the file, including the DT_NULL tag """ if self._num_tags != -1: return self._num_tags @@ -160,6 +187,41 @@ def num_tags(self): self._num_tags = n + 1 return self._num_tags + def get_relocation_tables(self): + """ Load all available relocation tables from DYNAMIC tags. + + Returns a dictionary mapping found table types (REL, RELA, + JMPREL) to RelocationTable objects. + """ + + result = {} + + if list(self.iter_tags('DT_REL')): + result['REL'] = RelocationTable(self.elffile, + self.get_table_offset('DT_REL')[1], + next(self.iter_tags('DT_RELSZ'))['d_val'], False) + + relentsz = next(self.iter_tags('DT_RELENT'))['d_val'] + elf_assert(result['REL'].entry_size == relentsz, + 'Expected DT_RELENT to be %s' % relentsz) + + if list(self.iter_tags('DT_RELA')): + result['RELA'] = RelocationTable(self.elffile, + self.get_table_offset('DT_RELA')[1], + next(self.iter_tags('DT_RELASZ'))['d_val'], True) + + relentsz = next(self.iter_tags('DT_RELAENT'))['d_val'] + elf_assert(result['RELA'].entry_size == relentsz, + 'Expected DT_RELAENT to be %s' % relentsz) + + if list(self.iter_tags('DT_JMPREL')): + result['JMPREL'] = RelocationTable(self.elffile, + self.get_table_offset('DT_JMPREL')[1], + next(self.iter_tags('DT_PLTRELSZ'))['d_val'], + next(self.iter_tags('DT_PLTREL'))['d_val'] == ENUM_D_TAG['DT_RELA']) + + return result + class DynamicSection(Section, Dynamic): """ ELF dynamic table section. Knows how to process the list of tags. @@ -168,7 +230,7 @@ def __init__(self, header, name, elffile): Section.__init__(self, header, name, elffile) stringtable = elffile.get_section(header['sh_link']) Dynamic.__init__(self, self.stream, self.elffile, stringtable, - self['sh_offset']) + self['sh_offset'], self['sh_type'] == 'SHT_NOBITS') class DynamicSegment(Segment, Dynamic): @@ -188,48 +250,103 @@ def __init__(self, header, stream, elffile): stringtable = elffile.get_section(section['sh_link']) break Segment.__init__(self, header, stream) - Dynamic.__init__(self, stream, elffile, stringtable, self['p_offset']) + Dynamic.__init__(self, stream, elffile, stringtable, self['p_offset'], + self['p_filesz'] == 0) + self._symbol_size = self.elfstructs.Elf_Sym.sizeof() + self._num_symbols = None + self._symbol_name_map = None + + def num_symbols(self): + """ Number of symbols in the table recovered from DT_SYMTAB + """ + if self._num_symbols is not None: + return self._num_symbols + + # Check if a DT_GNU_HASH tag exists and recover the number of symbols + # from the corresponding hash table + _, gnu_hash_offset = self.get_table_offset('DT_GNU_HASH') + if gnu_hash_offset is not None: + hash_section = GNUHashTable(self.elffile, gnu_hash_offset, self) + self._num_symbols = hash_section.get_number_of_symbols() + + # If DT_GNU_HASH did not exist, maybe we can use DT_HASH + if self._num_symbols is None: + _, hash_offset = self.get_table_offset('DT_HASH') + if hash_offset is not None: + # Get the hash table from the DT_HASH offset + hash_section = ELFHashTable(self.elffile, hash_offset, self) + self._num_symbols = hash_section.get_number_of_symbols() + + if self._num_symbols is None: + # Find closest higher pointer than tab_ptr. We'll use that to mark + # the end of the symbol table. + tab_ptr, tab_offset = self.get_table_offset('DT_SYMTAB') + if tab_ptr is None or tab_offset is None: + raise ELFError('Segment does not contain DT_SYMTAB.') + nearest_ptr = None + for tag in self.iter_tags(): + tag_ptr = tag['d_ptr'] + if tag['d_tag'] == 'DT_SYMENT': + if self._symbol_size != tag['d_val']: + # DT_SYMENT is the size of one symbol entry. It must be + # the same as returned by Elf_Sym.sizeof. + raise ELFError('DT_SYMENT (%d) != Elf_Sym (%d).' % + (tag['d_val'], self._symbol_size)) + if (tag_ptr > tab_ptr and + (nearest_ptr is None or nearest_ptr > tag_ptr)): + nearest_ptr = tag_ptr + + if nearest_ptr is None: + # Use the end of segment that contains DT_SYMTAB. + for segment in self.elffile.iter_segments(): + if (segment['p_vaddr'] <= tab_ptr and + tab_ptr <= (segment['p_vaddr'] + segment['p_filesz'])): + nearest_ptr = segment['p_vaddr'] + segment['p_filesz'] + + end_ptr = nearest_ptr + self._num_symbols = (end_ptr - tab_ptr) // self._symbol_size + + if self._num_symbols is None: + raise ELFError('Cannot determine the end of DT_SYMTAB.') - def iter_symbols(self): - """ Yield all symbols in this dynamic segment. The symbols are usually - the same as returned by SymbolTableSection.iter_symbols. However, - in stripped binaries, SymbolTableSection might have been removed. - This method reads from the mandatory dynamic tag DT_SYMTAB. + return self._num_symbols + + def get_symbol(self, index): + """ Get the symbol at index #index from the table (Symbol object) """ tab_ptr, tab_offset = self.get_table_offset('DT_SYMTAB') if tab_ptr is None or tab_offset is None: raise ELFError('Segment does not contain DT_SYMTAB.') - symbol_size = self.elfstructs.Elf_Sym.sizeof() - - # Find closest higher pointer than tab_ptr. We'll use that to mark the - # end of the symbol table. - nearest_ptr = None - for tag in self.iter_tags(): - tag_ptr = tag['d_ptr'] - if tag['d_tag'] == 'DT_SYMENT': - if symbol_size != tag['d_val']: - # DT_SYMENT is the size of one symbol entry. It must be the - # same as returned by Elf_Sym.sizeof. - raise ELFError('DT_SYMENT (%d) != Elf_Sym (%d).' % - (tag['d_val'], symbol_size)) - if (tag_ptr > tab_ptr and - (nearest_ptr is None or nearest_ptr > tag_ptr)): - nearest_ptr = tag_ptr - - if nearest_ptr is None: - # Use the end of segment that contains DT_SYMTAB. - for segment in self.elffile.iter_segments(): - if (segment['p_vaddr'] <= tab_ptr and - tab_ptr <= (segment['p_vaddr'] + segment['p_filesz'])): - nearest_ptr = segment['p_vaddr'] + segment['p_filesz'] - - if nearest_ptr is None: - raise ELFError('Cannot determine the end of DT_SYMTAB.') + symbol = struct_parse( + self.elfstructs.Elf_Sym, + self._stream, + stream_pos=tab_offset + index * self._symbol_size) string_table = self._get_stringtable() - for i in range((nearest_ptr - tab_ptr) // symbol_size): - symbol = struct_parse(self.elfstructs.Elf_Sym, self._stream, - i * symbol_size + tab_offset) - symbol_name = string_table.get_string(symbol['st_name']) - yield Symbol(symbol, symbol_name) + symbol_name = string_table.get_string(symbol["st_name"]) + + return Symbol(symbol, symbol_name) + + def get_symbol_by_name(self, name): + """ Get a symbol(s) by name. Return None if no symbol by the given name + exists. + """ + # The first time this method is called, construct a name to number + # mapping + # + if self._symbol_name_map is None: + self._symbol_name_map = defaultdict(list) + for i, sym in enumerate(self.iter_symbols()): + self._symbol_name_map[sym.name].append(i) + symnums = self._symbol_name_map.get(name) + return [self.get_symbol(i) for i in symnums] if symnums else None + + def iter_symbols(self): + """ Yield all symbols in this dynamic segment. The symbols are usually + the same as returned by SymbolTableSection.iter_symbols. However, + in stripped binaries, SymbolTableSection might have been removed. + This method reads from the mandatory dynamic tag DT_SYMTAB. + """ + for i in range(self.num_symbols()): + yield(self.get_symbol(i)) diff --git a/elftools/elf/elffile.py b/elftools/elf/elffile.py index 289a5f97..10367ad7 100644 --- a/elftools/elf/elffile.py +++ b/elftools/elf/elffile.py @@ -14,9 +14,13 @@ import resource PAGESIZE = resource.getpagesize() except ImportError: - # Windows system - import mmap - PAGESIZE = mmap.PAGESIZE + try: + # Windows system + import mmap + PAGESIZE = mmap.PAGESIZE + except ImportError: + # Jython + PAGESIZE = 4096 from ..common.py3compat import BytesIO from ..common.exceptions import ELFError @@ -24,16 +28,19 @@ from .structs import ELFStructs from .sections import ( Section, StringTableSection, SymbolTableSection, - SUNWSyminfoTableSection, NullSection, NoteSection, - StabSection, ARMAttributesSection) + SymbolTableIndexSection, SUNWSyminfoTableSection, NullSection, + NoteSection, StabSection, ARMAttributesSection) from .dynamic import DynamicSection, DynamicSegment -from .relocation import RelocationSection, RelocationHandler +from .relocation import (RelocationSection, RelocationHandler, + RelrRelocationSection) from .gnuversions import ( GNUVerNeedSection, GNUVerDefSection, GNUVerSymSection) from .segments import Segment, InterpSegment, NoteSegment from ..dwarf.dwarfinfo import DWARFInfo, DebugSectionDescriptor, DwarfConfig - +from ..ehabi.ehabiinfo import EHABIInfo +from .hash import ELFHashSection, GNUHashSection +from .constants import SHN_INDICES class ELFFile(object): """ Creation: the constructor accepts a stream (file-like object) with the @@ -78,12 +85,25 @@ def __init__(self, stream): self.stream.seek(0) self.e_ident_raw = self.stream.read(16) - self._file_stringtable_section = self._get_file_stringtable() + self._section_header_stringtable = \ + self._get_section_header_stringtable() self._section_name_map = None def num_sections(self): """ Number of sections in the file """ + if self['e_shoff'] == 0: + return 0 + # From the ELF ABI documentation at + # https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.sheader.html: + # "e_shnum normally tells how many entries the section header table + # contains. [...] If the number of sections is greater than or equal to + # SHN_LORESERVE (0xff00), e_shnum has the value SHN_UNDEF (0) and the + # actual number of section header table entries is contained in the + # sh_size field of the section header at index 0 (otherwise, the sh_size + # member of the initial entry contains 0)." + if self['e_shnum'] == 0: + return self._get_section_header(0)['sh_size'] return self['e_shnum'] def get_section(self, n): @@ -101,22 +121,47 @@ def get_section_by_name(self, name): # mapping # if self._section_name_map is None: - self._section_name_map = {} - for i, sec in enumerate(self.iter_sections()): - self._section_name_map[sec.name] = i + self._make_section_name_map() secnum = self._section_name_map.get(name, None) return None if secnum is None else self.get_section(secnum) - def iter_sections(self): - """ Yield all the sections in the file + def get_section_index(self, section_name): + """ Gets the index of the section by name. Return None if no such + section name exists. + """ + # The first time this method is called, construct a name to number + # mapping + # + if self._section_name_map is None: + self._make_section_name_map() + return self._section_name_map.get(section_name, None) + + def iter_sections(self, type=None): + """ Yield all the sections in the file. If the optional |type| + parameter is passed, this method will only yield sections of the + given type. The parameter value must be a string containing the + name of the type as defined in the ELF specification, e.g. + 'SHT_SYMTAB'. """ for i in range(self.num_sections()): - yield self.get_section(i) + section = self.get_section(i) + if type is None or section['sh_type'] == type: + yield section def num_segments(self): """ Number of segments in the file """ - return self['e_phnum'] + # From: https://github.com/hjl-tools/x86-psABI/wiki/X86-psABI + # Section: 4.1.2 Number of Program Headers + # If the number of program headers is greater than or equal to + # PN_XNUM (0xffff), this member has the value PN_XNUM + # (0xffff). The actual number of program header table entries + # is contained in the sh_info field of the section header at + # index 0. + if self['e_phnum'] < 0xffff: + return self['e_phnum'] + else: + return self.get_section(0)['sh_info'] def get_segment(self, n): """ Get the segment at index #n from the file (Segment object) @@ -124,11 +169,17 @@ def get_segment(self, n): segment_header = self._get_segment_header(n) return self._make_segment(segment_header) - def iter_segments(self): - """ Yield all the segments in the file + def iter_segments(self, type=None): + """ Yield all the segments in the file. If the optional |type| + parameter is passed, this method will only yield segments of the + given type. The parameter value must be a string containing the + name of the type as defined in the ELF specification, e.g. + 'PT_LOAD'. """ for i in range(self.num_segments()): - yield self.get_segment(i) + segment = self.get_segment(i) + if type is None or segment['p_type'] == type: + yield segment def address_offsets(self, start, size=1): """ Yield a file offset for each ELF segment containing a memory region. @@ -137,10 +188,8 @@ def address_offsets(self, start, size=1): offset of the region is yielded. """ end = start + size - for seg in self.iter_segments(): - # consider LOAD only to prevent same address being yielded twice - if seg['p_type'] != 'PT_LOAD': - continue + # consider LOAD only to prevent same address being yielded twice + for seg in self.iter_segments(type='PT_LOAD'): if (start >= seg['p_vaddr'] and end <= seg['p_vaddr'] + seg['p_filesz']): yield start - seg['p_vaddr'] + seg['p_offset'] @@ -150,7 +199,7 @@ def has_dwarf_info(self): We assume that if it has the .debug_info or .zdebug_info section, it has all the other required sections as well. """ - return (self.get_section_by_name('.debug_info') or + return bool(self.get_section_by_name('.debug_info') or self.get_section_by_name('.zdebug_info') or self.get_section_by_name('.eh_frame')) @@ -167,7 +216,10 @@ def get_dwarf_info(self, relocate_dwarf_sections=True): section_names = ('.debug_info', '.debug_aranges', '.debug_abbrev', '.debug_str', '.debug_line', '.debug_frame', - '.debug_loc', '.debug_ranges') + '.debug_loc', '.debug_ranges', '.debug_pubtypes', + '.debug_pubnames', '.debug_addr', + '.debug_str_offsets', '.debug_line_str') + compressed = bool(self.get_section_by_name('.zdebug_info')) if compressed: @@ -178,8 +230,9 @@ def get_dwarf_info(self, relocate_dwarf_sections=True): (debug_info_sec_name, debug_aranges_sec_name, debug_abbrev_sec_name, debug_str_sec_name, debug_line_sec_name, debug_frame_sec_name, - debug_loc_sec_name, debug_ranges_sec_name, - eh_frame_sec_name) = section_names + debug_loc_sec_name, debug_ranges_sec_name, debug_pubtypes_name, + debug_pubnames_name, debug_addr_name, debug_str_offsets_name, + debug_line_str_name, eh_frame_sec_name) = section_names debug_sections = {} for secname in section_names: @@ -207,24 +260,237 @@ def get_dwarf_info(self, relocate_dwarf_sections=True): debug_str_sec=debug_sections[debug_str_sec_name], debug_loc_sec=debug_sections[debug_loc_sec_name], debug_ranges_sec=debug_sections[debug_ranges_sec_name], - debug_line_sec=debug_sections[debug_line_sec_name]) + debug_line_sec=debug_sections[debug_line_sec_name], + debug_pubtypes_sec=debug_sections[debug_pubtypes_name], + debug_pubnames_sec=debug_sections[debug_pubnames_name], + debug_addr_sec=debug_sections[debug_addr_name], + debug_str_offsets_sec=debug_sections[debug_str_offsets_name], + debug_line_str_sec=debug_sections[debug_line_str_name] + ) + + def has_ehabi_info(self): + """ Check whether this file appears to have arm exception handler index table. + """ + return any(self.iter_sections(type='SHT_ARM_EXIDX')) + + def get_ehabi_infos(self): + """ Generally, shared library and executable contain 1 .ARM.exidx section. + Object file contains many .ARM.exidx sections. + So we must traverse every section and filter sections whose type is SHT_ARM_EXIDX. + """ + _ret = [] + if self['e_type'] == 'ET_REL': + # TODO: support relocatable file + assert False, "Current version of pyelftools doesn't support relocatable file." + for section in self.iter_sections(type='SHT_ARM_EXIDX'): + _ret.append(EHABIInfo(section, self.little_endian)) + return _ret if len(_ret) > 0 else None def get_machine_arch(self): """ Return the machine architecture, as detected from the ELF header. - Not all architectures are supported at the moment. - """ - if self['e_machine'] == 'EM_X86_64': - return 'x64' - elif self['e_machine'] in ('EM_386', 'EM_486'): - return 'x86' - elif self['e_machine'] == 'EM_ARM': - return 'ARM' - elif self['e_machine'] == 'EM_AARCH64': - return 'AArch64' - elif self['e_machine'] == 'EM_MIPS': - return 'MIPS' + """ + architectures = { + 'EM_M32' : 'AT&T WE 32100', + 'EM_SPARC' : 'SPARC', + 'EM_386' : 'x86', + 'EM_68K' : 'Motorola 68000', + 'EM_88K' : 'Motorola 88000', + 'EM_IAMCU' : 'Intel MCU', + 'EM_860' : 'Intel 80860', + 'EM_MIPS' : 'MIPS', + 'EM_S370' : 'IBM System/370', + 'EM_MIPS_RS3_LE' : 'MIPS RS3000 Little-endian', + 'EM_PARISC' : 'Hewlett-Packard PA-RISC', + 'EM_VPP500' : 'Fujitsu VPP500', + 'EM_SPARC32PLUS' : 'Enhanced SPARC', + 'EM_960' : 'Intel 80960', + 'EM_PPC' : 'PowerPC', + 'EM_PPC64' : '64-bit PowerPC', + 'EM_S390' : 'IBM System/390', + 'EM_SPU' : 'IBM SPU/SPC', + 'EM_V800' : 'NEC V800', + 'EM_FR20' : 'Fujitsu FR20', + 'EM_RH32' : 'TRW RH-32', + 'EM_RCE' : 'Motorola RCE', + 'EM_ARM' : 'ARM', + 'EM_ALPHA' : 'Digital Alpha', + 'EM_SH' : 'Hitachi SH', + 'EM_SPARCV9' : 'SPARC Version 9', + 'EM_TRICORE' : 'Siemens TriCore embedded processor', + 'EM_ARC' : 'Argonaut RISC Core, Argonaut Technologies Inc.', + 'EM_H8_300' : 'Hitachi H8/300', + 'EM_H8_300H' : 'Hitachi H8/300H', + 'EM_H8S' : 'Hitachi H8S', + 'EM_H8_500' : 'Hitachi H8/500', + 'EM_IA_64' : 'Intel IA-64', + 'EM_MIPS_X' : 'MIPS-X', + 'EM_COLDFIRE' : 'Motorola ColdFire', + 'EM_68HC12' : 'Motorola M68HC12', + 'EM_MMA' : 'Fujitsu MMA', + 'EM_PCP' : 'Siemens PCP', + 'EM_NCPU' : 'Sony nCPU', + 'EM_NDR1' : 'Denso NDR1', + 'EM_STARCORE' : 'Motorola Star*Core', + 'EM_ME16' : 'Toyota ME16', + 'EM_ST100' : 'STMicroelectronics ST100', + 'EM_TINYJ' : 'Advanced Logic TinyJ', + 'EM_X86_64' : 'x64', + 'EM_PDSP' : 'Sony DSP', + 'EM_PDP10' : 'Digital Equipment PDP-10', + 'EM_PDP11' : 'Digital Equipment PDP-11', + 'EM_FX66' : 'Siemens FX66', + 'EM_ST9PLUS' : 'STMicroelectronics ST9+ 8/16 bit', + 'EM_ST7' : 'STMicroelectronics ST7 8-bit', + 'EM_68HC16' : 'Motorola MC68HC16', + 'EM_68HC11' : 'Motorola MC68HC11', + 'EM_68HC08' : 'Motorola MC68HC08', + 'EM_68HC05' : 'Motorola MC68HC05', + 'EM_SVX' : 'Silicon Graphics SVx', + 'EM_ST19' : 'STMicroelectronics ST19 8-bit', + 'EM_VAX' : 'Digital VAX', + 'EM_CRIS' : 'Axis Communications 32-bit', + 'EM_JAVELIN' : 'Infineon Technologies 32-bit', + 'EM_FIREPATH' : 'Element 14 64-bit DSP', + 'EM_ZSP' : 'LSI Logic 16-bit DSP', + 'EM_MMIX' : 'Donald Knuth\'s educational 64-bit', + 'EM_HUANY' : 'Harvard University machine-independent object files', + 'EM_PRISM' : 'SiTera Prism', + 'EM_AVR' : 'Atmel AVR 8-bit', + 'EM_FR30' : 'Fujitsu FR30', + 'EM_D10V' : 'Mitsubishi D10V', + 'EM_D30V' : 'Mitsubishi D30V', + 'EM_V850' : 'NEC v850', + 'EM_M32R' : 'Mitsubishi M32R', + 'EM_MN10300' : 'Matsushita MN10300', + 'EM_MN10200' : 'Matsushita MN10200', + 'EM_PJ' : 'picoJava', + 'EM_OPENRISC' : 'OpenRISC 32-bit', + 'EM_ARC_COMPACT' : 'ARC International ARCompact', + 'EM_XTENSA' : 'Tensilica Xtensa', + 'EM_VIDEOCORE' : 'Alphamosaic VideoCore', + 'EM_TMM_GPP' : 'Thompson Multimedia', + 'EM_NS32K' : 'National Semiconductor 32000 series', + 'EM_TPC' : 'Tenor Network TPC', + 'EM_SNP1K' : 'Trebia SNP 1000', + 'EM_ST200' : 'STMicroelectronics ST200', + 'EM_IP2K' : 'Ubicom IP2xxx', + 'EM_MAX' : 'MAX', + 'EM_CR' : 'National Semiconductor CompactRISC', + 'EM_F2MC16' : 'Fujitsu F2MC16', + 'EM_MSP430' : 'Texas Instruments msp430', + 'EM_BLACKFIN' : 'Analog Devices Blackfin', + 'EM_SE_C33' : 'Seiko Epson S1C33', + 'EM_SEP' : 'Sharp', + 'EM_ARCA' : 'Arca RISC', + 'EM_UNICORE' : 'PKU-Unity MPRC', + 'EM_EXCESS' : 'eXcess', + 'EM_DXP' : 'Icera Semiconductor Deep Execution Processor', + 'EM_ALTERA_NIOS2' : 'Altera Nios II', + 'EM_CRX' : 'National Semiconductor CompactRISC CRX', + 'EM_XGATE' : 'Motorola XGATE', + 'EM_C166' : 'Infineon C16x/XC16x', + 'EM_M16C' : 'Renesas M16C', + 'EM_DSPIC30F' : 'Microchip Technology dsPIC30F', + 'EM_CE' : 'Freescale Communication Engine RISC core', + 'EM_M32C' : 'Renesas M32C', + 'EM_TSK3000' : 'Altium TSK3000', + 'EM_RS08' : 'Freescale RS08', + 'EM_SHARC' : 'Analog Devices SHARC', + 'EM_ECOG2' : 'Cyan Technology eCOG2', + 'EM_SCORE7' : 'Sunplus S+core7 RISC', + 'EM_DSP24' : 'New Japan Radio (NJR) 24-bit DSP', + 'EM_VIDEOCORE3' : 'Broadcom VideoCore III', + 'EM_LATTICEMICO32' : 'Lattice FPGA RISC', + 'EM_SE_C17' : 'Seiko Epson C17', + 'EM_TI_C6000' : 'TI TMS320C6000', + 'EM_TI_C2000' : 'TI TMS320C2000', + 'EM_TI_C5500' : 'TI TMS320C55x', + 'EM_TI_ARP32' : 'TI Application Specific RISC, 32bit', + 'EM_TI_PRU' : 'TI Programmable Realtime Unit', + 'EM_MMDSP_PLUS' : 'STMicroelectronics 64bit VLIW', + 'EM_CYPRESS_M8C' : 'Cypress M8C', + 'EM_R32C' : 'Renesas R32C', + 'EM_TRIMEDIA' : 'NXP Semiconductors TriMedia', + 'EM_QDSP6' : 'QUALCOMM DSP6', + 'EM_8051' : 'Intel 8051', + 'EM_STXP7X' : 'STMicroelectronics STxP7x', + 'EM_NDS32' : 'Andes Technology RISC', + 'EM_ECOG1' : 'Cyan Technology eCOG1X', + 'EM_ECOG1X' : 'Cyan Technology eCOG1X', + 'EM_MAXQ30' : 'Dallas Semiconductor MAXQ30', + 'EM_XIMO16' : 'New Japan Radio (NJR) 16-bit', + 'EM_MANIK' : 'M2000 Reconfigurable RISC', + 'EM_CRAYNV2' : 'Cray Inc. NV2', + 'EM_RX' : 'Renesas RX', + 'EM_METAG' : 'Imagination Technologies META', + 'EM_MCST_ELBRUS' : 'MCST Elbrus', + 'EM_ECOG16' : 'Cyan Technology eCOG16', + 'EM_CR16' : 'National Semiconductor CompactRISC CR16 16-bit', + 'EM_ETPU' : 'Freescale', + 'EM_SLE9X' : 'Infineon Technologies SLE9X', + 'EM_L10M' : 'Intel L10M', + 'EM_K10M' : 'Intel K10M', + 'EM_AARCH64' : 'AArch64', + 'EM_AVR32' : 'Atmel 32-bit', + 'EM_STM8' : 'STMicroeletronics STM8 8-bit', + 'EM_TILE64' : 'Tilera TILE64', + 'EM_TILEPRO' : 'Tilera TILEPro', + 'EM_MICROBLAZE' : 'Xilinx MicroBlaze 32-bit RISC', + 'EM_CUDA' : 'NVIDIA CUDA', + 'EM_TILEGX' : 'Tilera TILE-Gx', + 'EM_CLOUDSHIELD' : 'CloudShield', + 'EM_COREA_1ST' : 'KIPO-KAIST Core-A 1st generation', + 'EM_COREA_2ND' : 'KIPO-KAIST Core-A 2nd generation', + 'EM_ARC_COMPACT2' : 'Synopsys ARCompact V2', + 'EM_OPEN8' : 'Open8 8-bit RISC', + 'EM_RL78' : 'Renesas RL78', + 'EM_VIDEOCORE5' : 'Broadcom VideoCore V', + 'EM_78KOR' : 'Renesas 78KOR', + 'EM_56800EX' : 'Freescale 56800EX', + 'EM_BA1' : 'Beyond BA1', + 'EM_BA2' : 'Beyond BA2', + 'EM_XCORE' : 'XMOS xCORE', + 'EM_MCHP_PIC' : 'Microchip 8-bit PIC', + 'EM_INTEL205' : 'Reserved by Intel', + 'EM_INTEL206' : 'Reserved by Intel', + 'EM_INTEL207' : 'Reserved by Intel', + 'EM_INTEL208' : 'Reserved by Intel', + 'EM_INTEL209' : 'Reserved by Intel', + 'EM_KM32' : 'KM211 KM32 32-bit', + 'EM_KMX32' : 'KM211 KMX32 32-bit', + 'EM_KMX16' : 'KM211 KMX16 16-bit', + 'EM_KMX8' : 'KM211 KMX8 8-bit', + 'EM_KVARC' : 'KM211 KVARC', + 'EM_CDP' : 'Paneve CDP', + 'EM_COGE' : 'Cognitive', + 'EM_COOL' : 'Bluechip Systems CoolEngine', + 'EM_NORC' : 'Nanoradio Optimized RISC', + 'EM_CSR_KALIMBA' : 'CSR Kalimba', + 'EM_Z80' : 'Zilog Z80', + 'EM_VISIUM' : 'VISIUMcore', + 'EM_FT32' : 'FTDI Chip FT32 32-bit RISC', + 'EM_MOXIE' : 'Moxie', + 'EM_AMDGPU' : 'AMD GPU', + 'EM_RISCV' : 'RISC-V', + 'EM_BPF' : 'Linux BPF - in-kernel virtual machine', + 'EM_CSKY' : 'C-SKY', + 'EM_FRV' : 'Fujitsu FR-V' + } + + return architectures.get(self['e_machine'], '') + + def get_shstrndx(self): + """ Find the string table section index for the section header table + """ + # From https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html: + # If the section name string table section index is greater than or + # equal to SHN_LORESERVE (0xff00), this member has the value SHN_XINDEX + # (0xffff) and the actual index of the section name string table section + # is contained in the sh_link field of the section header at index 0. + if self['e_shstrndx'] != SHN_INDICES.SHN_XINDEX: + return self['e_shstrndx'] else: - return '' + return self._get_section_header(0)['sh_link'] #-------------------------------- PRIVATE --------------------------------# @@ -295,7 +561,7 @@ def _get_section_name(self, section_header): string table """ name_offset = section_header['sh_name'] - return self._file_stringtable_section.get_string(name_offset) + return self._section_header_stringtable.get_string(name_offset) def _make_section(self, section_header): """ Create a section object of the appropriate type @@ -309,6 +575,8 @@ def _make_section(self, section_header): return NullSection(section_header, name, self) elif sectype in ('SHT_SYMTAB', 'SHT_DYNSYM', 'SHT_SUNW_LDYNSYM'): return self._make_symbol_table_section(section_header, name) + elif sectype == 'SHT_SYMTAB_SHNDX': + return self._make_symbol_table_index_section(section_header, name) elif sectype == 'SHT_SUNW_syminfo': return self._make_sunwsyminfo_table_section(section_header, name) elif sectype == 'SHT_GNU_verneed': @@ -327,9 +595,20 @@ def _make_section(self, section_header): return StabSection(section_header, name, self) elif sectype == 'SHT_ARM_ATTRIBUTES': return ARMAttributesSection(section_header, name, self) + elif sectype == 'SHT_HASH': + return self._make_elf_hash_section(section_header, name) + elif sectype == 'SHT_GNU_HASH': + return self._make_gnu_hash_section(section_header, name) + elif sectype == 'SHT_RELR': + return RelrRelocationSection(section_header, name, self) else: return Section(section_header, name, self) + def _make_section_name_map(self): + self._section_name_map = {} + for i, sec in enumerate(self.iter_sections()): + self._section_name_map[sec.name] = i + def _make_symbol_table_section(self, section_header, name): """ Create a SymbolTableSection """ @@ -340,6 +619,14 @@ def _make_symbol_table_section(self, section_header, name): elffile=self, stringtable=strtab_section) + def _make_symbol_table_index_section(self, section_header, name): + """ Create a SymbolTableIndexSection object + """ + linked_symtab_index = section_header['sh_link'] + return SymbolTableIndexSection( + section_header, name, elffile=self, + symboltable=linked_symtab_index) + def _make_sunwsyminfo_table_section(self, section_header, name): """ Create a SUNWSyminfoTableSection """ @@ -380,6 +667,20 @@ def _make_gnu_versym_section(self, section_header, name): elffile=self, symboltable=strtab_section) + def _make_elf_hash_section(self, section_header, name): + linked_symtab_index = section_header['sh_link'] + symtab_section = self.get_section(linked_symtab_index) + return ELFHashSection( + section_header, name, self, symtab_section + ) + + def _make_gnu_hash_section(self, section_header, name): + linked_symtab_index = section_header['sh_link'] + symtab_section = self.get_section(linked_symtab_index) + return GNUHashSection( + section_header, name, self, symtab_section + ) + def _get_segment_header(self, n): """ Find the header of segment #n, parse it and return the struct """ @@ -388,10 +689,11 @@ def _get_segment_header(self, n): self.stream, stream_pos=self._segment_offset(n)) - def _get_file_stringtable(self): - """ Find the file's string table section + def _get_section_header_stringtable(self): + """ Get the string table section corresponding to the section header + table. """ - stringtable_section_num = self['e_shstrndx'] + stringtable_section_num = self.get_shstrndx() return StringTableSection( header=self._get_section_header(stringtable_section_num), name='', @@ -422,7 +724,7 @@ def _read_dwarf_section(self, section, relocate_dwarf_sections): stream=section_stream, name=section.name, global_offset=section['sh_offset'], - size=section['sh_size'], + size=section.data_size, address=section['sh_addr']) @staticmethod diff --git a/elftools/elf/enums.py b/elftools/elf/enums.py index 34e2de70..666fd70f 100644 --- a/elftools/elf/enums.py +++ b/elftools/elf/enums.py @@ -53,6 +53,7 @@ ELFOSABI_SORTIX=53, ELFOSABI_ARM_AEABI=64, ELFOSABI_ARM=97, + ELFOSABI_CELL_LV2=102, ELFOSABI_STANDALONE=255, _default_=Pass, ) @@ -254,6 +255,9 @@ EM_MOXIE = 223, # Moxie processor family EM_AMDGPU = 224, # AMD GPU architecture EM_RISCV = 243, # RISC-V + EM_BPF = 247, # Linux BPF - in-kernel virtual machine + EM_CSKY = 252, # C-SKY + EM_FRV = 0x5441, # Fujitsu FR-V # Reservations # reserved 11-14 Reserved for future use # reserved 16 Reserved for future use @@ -291,7 +295,8 @@ SHT_PREINIT_ARRAY=16, SHT_GROUP=17, SHT_SYMTAB_SHNDX=18, - SHT_NUM=19, + SHT_RELR=19, + SHT_NUM=20, SHT_LOOS=0x60000000, SHT_GNU_ATTRIBUTES=0x6ffffff5, SHT_GNU_HASH=0x6ffffff6, @@ -361,7 +366,8 @@ SHT_MIPS_WHIRL=0x70000026, SHT_MIPS_EH_REGION=0x70000027, SHT_MIPS_XLATE_OLD=0x70000028, - SHT_MIPS_PDR_EXCEPTION=0x70000029)) + SHT_MIPS_PDR_EXCEPTION=0x70000029, + SHT_MIPS_ABIFLAGS=0x7000002a)) ENUM_ELFCOMPRESS_TYPE = dict( ELFCOMPRESS_ZLIB=1, @@ -396,6 +402,7 @@ PT_GNU_EH_FRAME=0x6474e550, PT_GNU_STACK=0x6474e551, PT_GNU_RELRO=0x6474e552, + PT_GNU_PROPERTY=0x6474e553, _default_=Pass, ) @@ -461,6 +468,10 @@ _default_=Pass, ) +ENUM_ST_LOCAL = dict( + _default_=Pass, +) + # st_shndx ENUM_ST_SHNDX = dict( SHN_UNDEF=0, @@ -505,8 +516,20 @@ DT_ENCODING=32, DT_PREINIT_ARRAY=32, DT_PREINIT_ARRAYSZ=33, - DT_NUM=34, + DT_SYMTAB_SHNDX=34, + DT_RELRSZ=35, + DT_RELR=36, + DT_RELRENT=37, + DT_NUM=38, DT_LOOS=0x6000000d, + DT_ANDROID_REL=0x6000000f, + DT_ANDROID_RELSZ=0x60000010, + DT_ANDROID_RELA=0x60000011, + DT_ANDROID_RELASZ=0x60000012, + DT_ANDROID_RELR=0x6fffe000, + DT_ANDROID_RELRSZ=0x6fffe001, + DT_ANDROID_RELRENT=0x6fffe003, + DT_ANDROID_RELRCOUNT=0x6fffe005, DT_HIOS=0x6ffff000, DT_LOPROC=0x70000000, DT_HIPROC=0x7fffffff, @@ -607,6 +630,45 @@ for k in ENUMMAP_EXTRA_D_TAG_MACHINE: ENUM_D_TAG.update(ENUMMAP_EXTRA_D_TAG_MACHINE[k]) +ENUM_DT_FLAGS = dict( + DF_ORIGIN=0x1, + DF_SYMBOLIC=0x2, + DF_TEXTREL=0x4, + DF_BIND_NOW=0x8, + DF_STATIC_TLS=0x10, +) + +ENUM_DT_FLAGS_1 = dict( + DF_1_NOW=0x1, + DF_1_GLOBAL=0x2, + DF_1_GROUP=0x4, + DF_1_NODELETE=0x8, + DF_1_LOADFLTR=0x10, + DF_1_INITFIRST=0x20, + DF_1_NOOPEN=0x40, + DF_1_ORIGIN=0x80, + DF_1_DIRECT=0x100, + DF_1_TRANS=0x200, + DF_1_INTERPOSE=0x400, + DF_1_NODEFLIB=0x800, + DF_1_NODUMP=0x1000, + DF_1_CONFALT=0x2000, + DF_1_ENDFILTEE=0x4000, + DF_1_DISPRELDNE=0x8000, + DF_1_DISPRELPND=0x10000, + DF_1_NODIRECT=0x20000, + DF_1_IGNMULDEF=0x40000, + DF_1_NOKSYMS=0x80000, + DF_1_NOHDR=0x100000, + DF_1_EDITED=0x200000, + DF_1_NORELOC=0x400000, + DF_1_SYMINTPOSE=0x800000, + DF_1_GLOBAUDIT=0x1000000, + DF_1_SINGLETON=0x2000000, + DF_1_STUB=0x4000000, + DF_1_PIE=0x8000000, +) + ENUM_RELOC_TYPE_MIPS = dict( R_MIPS_NONE=0, R_MIPS_16=1, @@ -746,6 +808,7 @@ R_X86_64_TLSDESC_CALL=35, R_X86_64_TLSDESC=36, R_X86_64_IRELATIVE=37, + R_X86_64_REX_GOTPCRELX=42, R_X86_64_GNU_VTINHERIT=250, R_X86_64_GNU_VTENTRY=251, _default_=Pass, @@ -784,6 +847,7 @@ NT_GNU_HWCAP=2, NT_GNU_BUILD_ID=3, NT_GNU_GOLD_VERSION=4, + NT_GNU_PROPERTY_TYPE_0=5, _default_=Pass, ) @@ -810,6 +874,22 @@ _default_=Pass, ) +# Values in GNU .note.gnu.property notes (n_type=='NT_GNU_PROPERTY_TYPE_0') +ENUM_NOTE_GNU_PROPERTY_TYPE = dict( + GNU_PROPERTY_STACK_SIZE=1, + GNU_PROPERTY_NO_COPY_ON_PROTECTED=2, + GNU_PROPERTY_X86_FEATURE_1_AND=0xc0000002, + _default_=Pass, +) + +ENUM_GNU_PROPERTY_X86_FEATURE_1_FLAGS = dict( + GNU_PROPERTY_X86_FEATURE_1_IBT=1, + GNU_PROPERTY_X86_FEATURE_1_SHSTK=2, + GNU_PROPERTY_X86_FEATURE_1_LAM_U48=4, + GNU_PROPERTY_X86_FEATURE_1_LAM_U57=8, + _default_=Pass +) + ENUM_RELOC_TYPE_ARM = dict( R_ARM_NONE=0, R_ARM_PC24=1, @@ -1098,3 +1178,130 @@ TAG_VIRTUALIZATION_USE=68, TAG_MPEXTENSION_USE_OLD=70, ) + +# https://openpowerfoundation.org/wp-content/uploads/2016/03/ABI64BitOpenPOWERv1.1_16July2015_pub4.pdf +# See 3.5.3 Relocation Types Table. +ENUM_RELOC_TYPE_PPC64 = dict( + R_PPC64_NONE=0, + R_PPC64_ADDR32=1, + R_PPC64_ADDR24=2, + R_PPC64_ADDR16=3, + R_PPC64_ADDR16_LO=4, + R_PPC64_ADDR16_HI=5, + R_PPC64_ADDR16_HA=6, + R_PPC64_ADDR14=7, + R_PPC64_ADDR14_BRTAKEN=8, + R_PPC64_ADDR14_BRNTAKEN=9, + R_PPC64_REL24=10, + R_PPC64_REL14=11, + R_PPC64_REL14_BRTAKEN=12, + R_PPC64_REL14_BRNTAKEN=13, + R_PPC64_GOT16=14, + R_PPC64_GOT16_LO=15, + R_PPC64_GOT16_HI=16, + R_PPC64_GOT16_HA=17, + R_PPC64_COPY=19, + R_PPC64_GLOB_DAT=20, + R_PPC64_JMP_SLOT=21, + R_PPC64_RELATIVE=22, + R_PPC64_UADDR32=24, + R_PPC64_UADDR16=25, + R_PPC64_REL32=26, + R_PPC64_PLT32=27, + R_PPC64_PLTREL32=28, + R_PPC64_PLT16_LO=29, + R_PPC64_PLT16_HI=30, + R_PPC64_PLT16_HA=31, + R_PPC64_SECTOFF=33, + R_PPC64_SECTOFF_LO=34, + R_PPC64_SECTOFF_HI=35, + R_PPC64_SECTOFF_HA=36, + R_PPC64_ADDR30=37, + R_PPC64_ADDR64=38, + R_PPC64_ADDR16_HIGHER=39, + R_PPC64_ADDR16_HIGHERA=40, + R_PPC64_ADDR16_HIGHEST=41, + R_PPC64_ADDR16_HIGHESTA=42, + R_PPC64_UADDR64=43, + R_PPC64_REL64=44, + R_PPC64_PLT64=45, + R_PPC64_PLTREL64=46, + R_PPC64_TOC16=47, + R_PPC64_TOC16_LO=48, + R_PPC64_TOC16_HI=49, + R_PPC64_TOC16_HA=50, + R_PPC64_TOC=51, + R_PPC64_PLTGOT16=52, + R_PPC64_PLTGOT16_LO=53, + R_PPC64_PLTGOT16_HI=54, + R_PPC64_PLTGOT16_HA=55, + R_PPC64_ADDR16_DS=56, + R_PPC64_ADDR16_LO_DS=57, + R_PPC64_GOT16_DS=58, + R_PPC64_GOT16_LO_DS=59, + R_PPC64_PLT16_LO_DS=60, + R_PPC64_SECTOFF_DS=61, + R_PPC64_SECTOFF_LO_DS=62, + R_PPC64_TOC16_DS=63, + R_PPC64_TOC16_LO_DS=64, + R_PPC64_PLTGOT16_DS=65, + R_PPC64_PLTGOT16_LO_DS=66, + R_PPC64_TLS=67, + R_PPC64_DTPMOD64=68, + R_PPC64_TPREL16=69, + R_PPC64_TPREL16_LO=70, + R_PPC64_TPREL16_HI=71, + R_PPC64_TPREL16_HA=72, + R_PPC64_TPREL64=73, + R_PPC64_DTPREL16=74, + R_PPC64_DTPREL16_LO=75, + R_PPC64_DTPREL16_HI=76, + R_PPC64_DTPREL16_HA=77, + R_PPC64_DTPREL64=78, + R_PPC64_GOT_TLSGD16=79, + R_PPC64_GOT_TLSGD16_LO=80, + R_PPC64_GOT_TLSGD16_HI=81, + R_PPC64_GOT_TLSGD16_HA=82, + R_PPC64_GOT_TLSLD16=83, + R_PPC64_GOT_TLSLD16_LO=84, + R_PPC64_GOT_TLSLD16_HI=85, + R_PPC64_GOT_TLSLD16_HA=86, + R_PPC64_GOT_TPREL16_DS=87, + R_PPC64_GOT_TPREL16_LO_DS=88, + R_PPC64_GOT_TPREL16_HI=89, + R_PPC64_GOT_TPREL16_HA=90, + R_PPC64_GOT_DTPREL16_DS=91, + R_PPC64_GOT_DTPREL16_LO_DS=92, + R_PPC64_GOT_DTPREL16_HI=93, + R_PPC64_GOT_DTPREL16_HA=94, + R_PPC64_TPREL16_DS=95, + R_PPC64_TPREL16_LO_DS=96, + R_PPC64_TPREL16_HIGHER=97, + R_PPC64_TPREL16_HIGHERA=98, + R_PPC64_TPREL16_HIGHEST=99, + R_PPC64_TPREL16_HIGHESTA=100, + R_PPC64_DTPREL16_DS=101, + R_PPC64_DTPREL16_LO_DS=102, + R_PPC64_DTPREL16_HIGHER=103, + R_PPC64_DTPREL16_HIGHERA=104, + R_PPC64_DTPREL16_HIGHEST=105, + R_PPC64_DTPREL16_HIGHESTA=106, + R_PPC64_TLSGD=107, + R_PPC64_TLSLD=108, + R_PPC64_TOCSAVE=109, + R_PPC64_ADDR16_HIGH=110, + R_PPC64_ADDR16_HIGHA=111, + R_PPC64_TPREL16_HIGH=112, + R_PPC64_TPREL16_HIGHA=113, + R_PPC64_DTPREL16_HIGH=114, + R_PPC64_DTPREL16_HIGHA=115, + R_PPC64_REL24_NOTOC=116, + R_PPC64_ADDR64_LOCAL=117, + R_PPC64_IRELATIVE=248, + R_PPC64_REL16=249, + R_PPC64_REL16_LO=250, + R_PPC64_REL16_HI=251, + R_PPC64_REL16_HA=252, + R_PPC64_GNU_VTINHERIT=253, + R_PPC64_GNU_VTENTRY=254, +) diff --git a/elftools/elf/hash.py b/elftools/elf/hash.py new file mode 100644 index 00000000..c8d1e17e --- /dev/null +++ b/elftools/elf/hash.py @@ -0,0 +1,186 @@ +#------------------------------------------------------------------------------- +# elftools: elf/hash.py +# +# ELF hash table sections +# +# Andreas Ziegler (andreas.ziegler@fau.de) +# This code is in the public domain +#------------------------------------------------------------------------------- + +import struct + +from ..common.utils import struct_parse +from .sections import Section + + +class ELFHashTable(object): + """ Representation of an ELF hash table to find symbols in the + symbol table - useful for super-stripped binaries without section + headers where only the start of the symbol table is known from the + dynamic segment. The layout and contents are nicely described at + https://flapenguin.me/2017/04/24/elf-lookup-dt-hash/. + + The symboltable argument needs to implement a get_symbol() method - + in a regular ELF file, this will be the linked symbol table section + as indicated by the sh_link attribute. For super-stripped binaries, + one should use the DynamicSegment object as the symboltable as it + supports symbol lookup without access to a symbol table section. + """ + + def __init__(self, elffile, start_offset, symboltable): + self.elffile = elffile + self._symboltable = symboltable + self.params = struct_parse(self.elffile.structs.Elf_Hash, + self.elffile.stream, + start_offset) + + def get_number_of_symbols(self): + """ Get the number of symbols from the hash table parameters. + """ + return self.params['nchains'] + + def get_symbol(self, name): + """ Look up a symbol from this hash table with the given name. + """ + if self.params['nbuckets'] == 0: + return None + hval = self.elf_hash(name) % self.params['nbuckets'] + symndx = self.params['buckets'][hval] + while symndx != 0: + sym = self._symboltable.get_symbol(symndx) + if sym.name == name: + return sym + symndx = self.params['chains'][symndx] + return None + + @staticmethod + def elf_hash(name): + """ Compute the hash value for a given symbol name. + """ + if not isinstance(name, bytes): + name = name.encode('utf-8') + h = 0 + x = 0 + for c in bytearray(name): + h = (h << 4) + c + x = h & 0xF0000000 + if x != 0: + h ^= (x >> 24) + h &= ~x + return h + + +class ELFHashSection(Section, ELFHashTable): + """ Section representation of an ELF hash table. In regular ELF files, this + allows us to use the common functions defined on Section objects when + dealing with the hash table. + """ + def __init__(self, header, name, elffile, symboltable): + Section.__init__(self, header, name, elffile) + ELFHashTable.__init__(self, elffile, self['sh_offset'], symboltable) + + +class GNUHashTable(object): + """ Representation of a GNU hash table to find symbols in the + symbol table - useful for super-stripped binaries without section + headers where only the start of the symbol table is known from the + dynamic segment. The layout and contents are nicely described at + https://flapenguin.me/2017/05/10/elf-lookup-dt-gnu-hash/. + + The symboltable argument needs to implement a get_symbol() method - + in a regular ELF file, this will be the linked symbol table section + as indicated by the sh_link attribute. For super-stripped binaries, + one should use the DynamicSegment object as the symboltable as it + supports symbol lookup without access to a symbol table section. + """ + def __init__(self, elffile, start_offset, symboltable): + self.elffile = elffile + self._symboltable = symboltable + self.params = struct_parse(self.elffile.structs.Gnu_Hash, + self.elffile.stream, + start_offset) + # Element sizes in the hash table + self._wordsize = self.elffile.structs.Elf_word('').sizeof() + self._xwordsize = self.elffile.structs.Elf_xword('').sizeof() + self._chain_pos = start_offset + 4 * self._wordsize + \ + self.params['bloom_size'] * self._xwordsize + \ + self.params['nbuckets'] * self._wordsize + + def get_number_of_symbols(self): + """ Get the number of symbols in the hash table by finding the bucket + with the highest symbol index and walking to the end of its chain. + """ + # Find highest index in buckets array + max_idx = max(self.params['buckets']) + if max_idx < self.params['symoffset']: + return self.params['symoffset'] + + # Position the stream at the start of the corresponding chain + max_chain_pos = self._chain_pos + \ + (max_idx - self.params['symoffset']) * self._wordsize + self.elffile.stream.seek(max_chain_pos) + hash_format = 'I' + + # Walk the chain to its end (lowest bit is set) + while True: + cur_hash = struct.unpack(hash_format, self.elffile.stream.read(self._wordsize))[0] + if cur_hash & 1: + return max_idx + 1 + + max_idx += 1 + + def _matches_bloom(self, H1): + """ Helper function to check if the given hash could be in the hash + table by testing it against the bloom filter. + """ + arch_bits = self.elffile.elfclass + H2 = H1 >> self.params['bloom_shift'] + word_idx = int(H1 / arch_bits) % self.params['bloom_size'] + BITMASK = (1 << (H1 % arch_bits)) | (1 << (H2 % arch_bits)) + return (self.params['bloom'][word_idx] & BITMASK) == BITMASK + + def get_symbol(self, name): + """ Look up a symbol from this hash table with the given name. + """ + namehash = self.gnu_hash(name) + if not self._matches_bloom(namehash): + return None + + symidx = self.params['buckets'][namehash % self.params['nbuckets']] + if symidx < self.params['symoffset']: + return None + + self.elffile.stream.seek(self._chain_pos + (symidx - self.params['symoffset']) * self._wordsize) + hash_format = 'I' + while True: + cur_hash = struct.unpack(hash_format, self.elffile.stream.read(self._wordsize))[0] + if cur_hash | 1 == namehash | 1: + symbol = self._symboltable.get_symbol(symidx) + if name == symbol.name: + return symbol + + if cur_hash & 1: + break + symidx += 1 + return None + + @staticmethod + def gnu_hash(key): + """ Compute the GNU-style hash value for a given symbol name. + """ + if not isinstance(key, bytes): + key = key.encode('utf-8') + h = 5381 + for c in bytearray(key): + h = h * 33 + c + return h & 0xFFFFFFFF + + +class GNUHashSection(Section, GNUHashTable): + """ Section representation of a GNU hash table. In regular ELF files, this + allows us to use the common functions defined on Section objects when + dealing with the hash table. + """ + def __init__(self, header, name, elffile, symboltable): + Section.__init__(self, header, name, elffile) + GNUHashTable.__init__(self, elffile, self['sh_offset'], symboltable) diff --git a/elftools/elf/notes.py b/elftools/elf/notes.py index d34198b8..13895364 100644 --- a/elftools/elf/notes.py +++ b/elftools/elf/notes.py @@ -6,7 +6,7 @@ # Eli Bendersky (eliben@gmail.com) # This code is in the public domain #------------------------------------------------------------------------------- -from ..common.py3compat import bytes2str +from ..common.py3compat import bytes2hex, bytes2str from ..common.utils import struct_parse, roundup from ..construct import CString @@ -29,17 +29,32 @@ def iter_notes(elffile, offset, size): CString('').parse(elffile.stream.read(disk_namesz))) offset += disk_namesz - desc_data = bytes2str(elffile.stream.read(note['n_descsz'])) + desc_data = elffile.stream.read(note['n_descsz']) + note['n_descdata'] = desc_data if note['n_type'] == 'NT_GNU_ABI_TAG': note['n_desc'] = struct_parse(elffile.structs.Elf_abi, elffile.stream, offset) elif note['n_type'] == 'NT_GNU_BUILD_ID': - note['n_desc'] = ''.join('%.2x' % ord(b) for b in desc_data) + note['n_desc'] = bytes2hex(desc_data) + elif note['n_type'] == 'NT_GNU_GOLD_VERSION': + note['n_desc'] = bytes2str(desc_data) elif note['n_type'] == 'NT_PRPSINFO': note['n_desc'] = struct_parse(elffile.structs.Elf_Prpsinfo, elffile.stream, offset) + elif note['n_type'] == 'NT_FILE': + note['n_desc'] = struct_parse(elffile.structs.Elf_Nt_File, + elffile.stream, + offset) + elif note['n_type'] == 'NT_GNU_PROPERTY_TYPE_0': + off = offset + props = [] + while off < end: + p = struct_parse(elffile.structs.Elf_Prop, elffile.stream, off) + off += roundup(p.pr_datasz + 8, 2 if elffile.elfclass == 32 else 3) + props.append(p) + note['n_desc'] = props else: note['n_desc'] = desc_data offset += roundup(note['n_descsz'], 2) diff --git a/elftools/elf/relocation.py b/elftools/elf/relocation.py index 633bbf58..4008e282 100644 --- a/elftools/elf/relocation.py +++ b/elftools/elf/relocation.py @@ -12,7 +12,10 @@ from ..common.utils import elf_assert, struct_parse from .sections import Section from .enums import ( - ENUM_RELOC_TYPE_i386, ENUM_RELOC_TYPE_x64, ENUM_RELOC_TYPE_MIPS) + ENUM_RELOC_TYPE_i386, ENUM_RELOC_TYPE_x64, ENUM_RELOC_TYPE_MIPS, + ENUM_RELOC_TYPE_ARM, ENUM_RELOC_TYPE_AARCH64, ENUM_RELOC_TYPE_PPC64, + ENUM_D_TAG) +from ..construct import Container class Relocation(object): @@ -44,43 +47,44 @@ def __str__(self): return self.__repr__() -class RelocationSection(Section): - """ ELF relocation section. Serves as a collection of Relocation entries. +class RelocationTable(object): + """ Shared functionality between relocation sections and relocation tables """ - def __init__(self, header, name, elffile): - super(RelocationSection, self).__init__(header, name, elffile) - if self.header['sh_type'] == 'SHT_REL': - expected_size = self.structs.Elf_Rel.sizeof() - self.entry_struct = self.structs.Elf_Rel - elif self.header['sh_type'] == 'SHT_RELA': - expected_size = self.structs.Elf_Rela.sizeof() - self.entry_struct = self.structs.Elf_Rela + + def __init__(self, elffile, offset, size, is_rela): + self._stream = elffile.stream + self._elffile = elffile + self._elfstructs = elffile.structs + self._size = size + self._offset = offset + self._is_rela = is_rela + + if is_rela: + self.entry_struct = self._elfstructs.Elf_Rela else: - elf_assert(False, 'Unknown relocation type section') + self.entry_struct = self._elfstructs.Elf_Rel - elf_assert( - self.header['sh_entsize'] == expected_size, - 'Expected sh_entsize of SHT_REL section to be %s' % expected_size) + self.entry_size = self.entry_struct.sizeof() def is_RELA(self): """ Is this a RELA relocation section? If not, it's REL. """ - return self.header['sh_type'] == 'SHT_RELA' + return self._is_rela def num_relocations(self): """ Number of relocations in the section """ - return self['sh_size'] // self['sh_entsize'] + return self._size // self.entry_size def get_relocation(self, n): """ Get the relocation at index #n from the section (Relocation object) """ - entry_offset = self['sh_offset'] + n * self['sh_entsize'] + entry_offset = self._offset + n * self.entry_size entry = struct_parse( self.entry_struct, - self.stream, + self._stream, stream_pos=entry_offset) - return Relocation(entry, self.elffile) + return Relocation(entry, self._elffile) def iter_relocations(self): """ Yield all the relocations in the section @@ -89,6 +93,95 @@ def iter_relocations(self): yield self.get_relocation(i) +class RelocationSection(Section, RelocationTable): + """ ELF relocation section. Serves as a collection of Relocation entries. + """ + def __init__(self, header, name, elffile): + Section.__init__(self, header, name, elffile) + RelocationTable.__init__(self, self.elffile, + self['sh_offset'], self['sh_size'], header['sh_type'] == 'SHT_RELA') + + elf_assert(header['sh_type'] in ('SHT_REL', 'SHT_RELA'), + 'Unknown relocation type section') + elf_assert(header['sh_entsize'] == self.entry_size, + 'Expected sh_entsize of %s section to be %s' % ( + header['sh_type'], self.entry_size)) + +class RelrRelocationSection(Section): + """ RELR compressed relocation section. This stores relative relocations + in a compressed format. An entry with an even value serves as an + 'anchor' that defines a base address. Following this entry are one or + more bitmaps for consecutive addresses after the anchor which determine + if the corresponding relocation exists (if the bit is 1) or if it is + skipped. Addends are stored at the respective addresses (as in REL + relocations). + """ + def __init__(self, header, name, elffile): + Section.__init__(self, header, name, elffile) + self._offset = self['sh_offset'] + self._size = self['sh_size'] + self._relr_struct = self.elffile.structs.Elf_Relr + self._entrysize = self._relr_struct.sizeof() + self._cached_relocations = None + + def iter_relocations(self): + """ Yield all the relocations in the section + """ + limit = self._offset + self._size + relr = self._offset + # The addresses of relocations in a bitmap are calculated from a base + # value provided in an initial 'anchor' relocation. + base = None + while relr < limit: + entry = struct_parse(self._relr_struct, + self.elffile.stream, + stream_pos=relr) + entry_offset = entry['r_offset'] + if (entry_offset & 1) == 0: + # We found an anchor, take the current value as the base address + # for the following bitmaps and move the 'where' pointer to the + # beginning of the first bitmap. + base = entry_offset + base += self._entrysize + yield Relocation(entry, self.elffile) + else: + # We're processing a bitmap. + elf_assert(base is not None, 'RELR bitmap without base address') + i = 0 + while True: + # Iterate over all bits except the least significant one. + entry_offset = (entry_offset >> 1) + if entry_offset == 0: + break + # if the current LSB is set, we have a relocation at the + # corresponding address so generate a Relocation with the + # matching offset + if (entry_offset & 1) != 0: + calc_offset = base + i * self._entrysize + yield Relocation(Container(r_offset = calc_offset), + self.elffile) + i += 1 + # Advance 'base' past the current bitmap (8 == CHAR_BIT). There + # are 63 (or 31 for 32-bit ELFs) entries in each bitmap, and + # every bit corresponds to an ELF_addr-sized relocation. + base += (8 * self._entrysize - 1) * self.elffile.structs.Elf_addr('').sizeof() + # Advance to the next entry + relr += self._entrysize + + def num_relocations(self): + """ Number of relocations in the section + """ + if self._cached_relocations is None: + self._cached_relocations = list(self.iter_relocations()) + return len(self._cached_relocations) + + def get_relocation(self, n): + """ Get the relocation at index #n from the section (Relocation object) + """ + if self._cached_relocations is None: + self._cached_relocations = list(self.iter_relocations()) + return self._cached_relocations[n] + class RelocationHandler(object): """ Handles the logic of relocations in ELF files. """ @@ -151,6 +244,15 @@ def _do_apply_relocation(self, stream, reloc, symtab): raise ELFRelocationError( 'Unexpected RELA relocation for MIPS: %s' % reloc) recipe = self._RELOCATION_RECIPES_MIPS.get(reloc_type, None) + elif self.elffile.get_machine_arch() == 'ARM': + if reloc.is_RELA(): + raise ELFRelocationError( + 'Unexpected RELA relocation for ARM: %s' % reloc) + recipe = self._RELOCATION_RECIPES_ARM.get(reloc_type, None) + elif self.elffile.get_machine_arch() == 'AArch64': + recipe = self._RELOCATION_RECIPES_AARCH64.get(reloc_type, None) + elif self.elffile.get_machine_arch() == '64-bit PowerPC': + recipe = self._RELOCATION_RECIPES_PPC64.get(reloc_type, None) if recipe is None: raise ELFRelocationError( @@ -214,6 +316,28 @@ def _reloc_calc_sym_plus_addend(value, sym_value, offset, addend=0): def _reloc_calc_sym_plus_addend_pcrel(value, sym_value, offset, addend=0): return sym_value + addend - offset + def _arm_reloc_calc_sym_plus_value_pcrel(value, sym_value, offset, addend=0): + return sym_value // 4 + value - offset // 4 + + _RELOCATION_RECIPES_ARM = { + ENUM_RELOC_TYPE_ARM['R_ARM_ABS32']: _RELOCATION_RECIPE_TYPE( + bytesize=4, has_addend=False, + calc_func=_reloc_calc_sym_plus_value), + ENUM_RELOC_TYPE_ARM['R_ARM_CALL']: _RELOCATION_RECIPE_TYPE( + bytesize=4, has_addend=False, + calc_func=_arm_reloc_calc_sym_plus_value_pcrel), + } + + _RELOCATION_RECIPES_AARCH64 = { + ENUM_RELOC_TYPE_AARCH64['R_AARCH64_ABS64']: _RELOCATION_RECIPE_TYPE( + bytesize=8, has_addend=True, calc_func=_reloc_calc_sym_plus_addend), + ENUM_RELOC_TYPE_AARCH64['R_AARCH64_ABS32']: _RELOCATION_RECIPE_TYPE( + bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend), + ENUM_RELOC_TYPE_AARCH64['R_AARCH64_PREL32']: _RELOCATION_RECIPE_TYPE( + bytesize=4, has_addend=True, + calc_func=_reloc_calc_sym_plus_addend_pcrel), + } + # https://dmz-portal.mips.com/wiki/MIPS_relocation_types _RELOCATION_RECIPES_MIPS = { ENUM_RELOC_TYPE_MIPS['R_MIPS_NONE']: _RELOCATION_RECIPE_TYPE( @@ -223,6 +347,15 @@ def _reloc_calc_sym_plus_addend_pcrel(value, sym_value, offset, addend=0): calc_func=_reloc_calc_sym_plus_value), } + _RELOCATION_RECIPES_PPC64 = { + ENUM_RELOC_TYPE_PPC64['R_PPC64_ADDR32']: _RELOCATION_RECIPE_TYPE( + bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend), + ENUM_RELOC_TYPE_PPC64['R_PPC64_REL32']: _RELOCATION_RECIPE_TYPE( + bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend_pcrel), + ENUM_RELOC_TYPE_PPC64['R_PPC64_ADDR64']: _RELOCATION_RECIPE_TYPE( + bytesize=8, has_addend=True, calc_func=_reloc_calc_sym_plus_addend), + } + _RELOCATION_RECIPES_X86 = { ENUM_RELOC_TYPE_i386['R_386_NONE']: _RELOCATION_RECIPE_TYPE( bytesize=4, has_addend=False, calc_func=_reloc_calc_identity), @@ -247,3 +380,5 @@ def _reloc_calc_sym_plus_addend_pcrel(value, sym_value, offset, addend=0): ENUM_RELOC_TYPE_x64['R_X86_64_32S']: _RELOCATION_RECIPE_TYPE( bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend), } + + diff --git a/elftools/elf/sections.py b/elftools/elf/sections.py index 20e90562..9a97a09f 100644 --- a/elftools/elf/sections.py +++ b/elftools/elf/sections.py @@ -74,6 +74,10 @@ def data(self): Note that data is decompressed if the stored section data is compressed. """ + # If this section is NOBITS, there is no data. provide a dummy answer + if self.header['sh_type'] == 'SHT_NOBITS': + return b'\0'*self.data_size + # If this section is compressed, deflate it if self.compressed: c_type = self._compression_type @@ -137,7 +141,27 @@ def get_string(self, offset): """ table_offset = self['sh_offset'] s = parse_cstring_from_stream(self.stream, table_offset + offset) - return s.decode('utf-8') if s else '' + return s.decode('utf-8', errors='replace') if s else '' + + +class SymbolTableIndexSection(Section): + """ A section containing the section header table indices corresponding + to symbols in the linked symbol table. This section has to exist if the + symbol table contains an entry with a section header index set to + SHN_XINDEX (0xffff). The format of the section is described at + https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.sheader.html + """ + def __init__(self, header, name, elffile, symboltable): + super(SymbolTableIndexSection, self).__init__(header, name, elffile) + self.symboltable = symboltable + + def get_section_index(self, n): + """ Get the section header table index for the symbol with index #n. + The section contains an array of Elf32_word values with one entry + for every symbol in the associated symbol table. + """ + return struct_parse(self.elffile.structs.Elf_word(''), self.stream, + self['sh_offset'] + n * self['sh_entsize']) class SymbolTableSection(Section): @@ -267,7 +291,7 @@ def iter_stabs(self): while offset < end: stabs = struct_parse( self.structs.Elf_Stabs, - self.elffile.stream, + self.stream, stream_pos=offset) stabs['n_offset'] = offset offset += self.structs.Elf_Stabs.sizeof() @@ -287,14 +311,12 @@ def __init__(self, structs, stream): if self.tag != 'TAG_FILE': self.extra = [] - s_number = struct_parse(self.structs.Elf_uleb128('s_number'), - self.stream - ) + s_number = struct_parse(structs.Elf_uleb128('s_number'), stream) while s_number != 0: self.extra.append(s_number) - s_number = struct_parse(self.structs.Elf_uleb128('s_number'), - self.stream + s_number = struct_parse(structs.Elf_uleb128('s_number'), + stream ) elif self.tag in ('TAG_CPU_RAW_NAME', 'TAG_CPU_NAME', 'TAG_CONFORMANCE'): @@ -313,7 +335,7 @@ def __init__(self, structs, stream): if type(self.value.value) is not str: nul = struct_parse(structs.Elf_byte('nul'), stream) - elf_assert(null_byte == 0, + elf_assert(nul == 0, "Invalid terminating byte %r, expecting NUL." % nul) else: @@ -324,7 +346,7 @@ def tag(self): return self._tag['tag'] def __repr__(self): - s = '' % (self.tag, self.value) + s = '' % (self.tag, self.value) s += ' %s' % self.extra if self.extra is not None else '' return s @@ -372,7 +394,7 @@ def _make_attributes(self): yield ARMAttribute(self.structs, self.stream) def __repr__(self): - s = "" + s = "" return s % (self.header.tag[4:], self.header.value) diff --git a/elftools/elf/segments.py b/elftools/elf/segments.py index 16560bcd..0c318e17 100644 --- a/elftools/elf/segments.py +++ b/elftools/elf/segments.py @@ -40,15 +40,23 @@ def section_in_segment(self, section): sectype = section['sh_type'] secflags = section['sh_flags'] - # Only PT_LOAD, PT_GNU_RELR0 and PT_TLS segments can contain SHF_TLS + # Only PT_LOAD, PT_GNU_RELRO and PT_TLS segments can contain SHF_TLS # sections if ( secflags & SH_FLAGS.SHF_TLS and - segtype in ('PT_TLS', 'PT_GNU_RELR0', 'PT_LOAD')): - return False + segtype in ('PT_TLS', 'PT_GNU_RELRO', 'PT_LOAD')): + pass # PT_TLS segment contains only SHF_TLS sections, PT_PHDR no sections # at all - elif ( (secflags & SH_FLAGS.SHF_TLS) != 0 and + elif ( (secflags & SH_FLAGS.SHF_TLS) == 0 and segtype not in ('PT_TLS', 'PT_PHDR')): + pass + else: + return False + + # PT_LOAD and similar segments only have SHF_ALLOC sections. + if ( (secflags & SH_FLAGS.SHF_ALLOC) == 0 and + segtype in ('PT_LOAD', 'PT_DYNAMIC', 'PT_GNU_EH_FRAME', + 'PT_GNU_RELRO', 'PT_GNU_STACK')): return False # In ELF_SECTION_IN_SEGMENT_STRICT the flag check_vma is on, so if diff --git a/elftools/elf/structs.py b/elftools/elf/structs.py index 660f6872..b437eec9 100644 --- a/elftools/elf/structs.py +++ b/elftools/elf/structs.py @@ -11,9 +11,11 @@ UBInt8, UBInt16, UBInt32, UBInt64, ULInt8, ULInt16, ULInt32, ULInt64, SBInt32, SLInt32, SBInt64, SLInt64, - Struct, Array, Enum, Padding, BitStruct, BitField, Value, String, CString + Struct, Array, Enum, Padding, BitStruct, BitField, Value, String, CString, + Switch, Field ) from ..common.construct_utils import ULEB128 +from ..common.utils import roundup from .enums import * @@ -43,6 +45,17 @@ def __init__(self, little_endian=True, elfclass=32): assert elfclass == 32 or elfclass == 64 self.little_endian = little_endian self.elfclass = elfclass + self.e_type = None + self.e_machine = None + self.e_ident_osabi = None + + def __getstate__(self): + return self.little_endian, self.elfclass, self.e_type, self.e_machine, self.e_ident_osabi + + def __setstate__(self, state): + self.little_endian, self.elfclass, e_type, e_machine, e_osabi = state + self.create_basic_structs() + self.create_advanced_structs(e_type, e_machine, e_osabi) def create_basic_structs(self): """ Create word-size related structs and ehdr struct needed for @@ -76,20 +89,27 @@ def create_advanced_structs(self, e_type=None, e_machine=None, e_ident_osabi=Non """ Create all ELF structs except the ehdr. They may possibly depend on provided e_type and/or e_machine parsed from ehdr. """ - self._create_phdr(e_machine) - self._create_shdr(e_machine) + self.e_type = e_type + self.e_machine = e_machine + self.e_ident_osabi = e_ident_osabi + + self._create_phdr() + self._create_shdr() self._create_chdr() self._create_sym() self._create_rel() - self._create_dyn(e_machine, e_ident_osabi) + self._create_dyn() self._create_sunw_syminfo() self._create_gnu_verneed() self._create_gnu_verdef() self._create_gnu_versym() self._create_gnu_abi() + self._create_gnu_property() self._create_note(e_type) self._create_stabs() self._create_arm_attributes() + self._create_elf_hash() + self._create_gnu_hash() #-------------------------------- PRIVATE --------------------------------# @@ -125,13 +145,13 @@ def _create_leb128(self): def _create_ntbs(self): self.Elf_ntbs = CString - def _create_phdr(self, e_machine=None): + def _create_phdr(self): p_type_dict = ENUM_P_TYPE_BASE - if e_machine == 'EM_ARM': + if self.e_machine == 'EM_ARM': p_type_dict = ENUM_P_TYPE_ARM - elif e_machine == 'EM_AARCH64': + elif self.e_machine == 'EM_AARCH64': p_type_dict = ENUM_P_TYPE_AARCH64 - elif e_machine == 'EM_MIPS': + elif self.e_machine == 'EM_MIPS': p_type_dict = ENUM_P_TYPE_MIPS if self.elfclass == 32: @@ -157,17 +177,17 @@ def _create_phdr(self, e_machine=None): self.Elf_xword('p_align'), ) - def _create_shdr(self, e_machine=None): + def _create_shdr(self): """Section header parsing. Depends on e_machine because of machine-specific values in sh_type. """ sh_type_dict = ENUM_SH_TYPE_BASE - if e_machine == 'EM_ARM': + if self.e_machine == 'EM_ARM': sh_type_dict = ENUM_SH_TYPE_ARM - elif e_machine == 'EM_X86_64': + elif self.e_machine == 'EM_X86_64': sh_type_dict = ENUM_SH_TYPE_AMD64 - elif e_machine == 'EM_MIPS': + elif self.e_machine == 'EM_MIPS': sh_type_dict = ENUM_SH_TYPE_MIPS self.Elf_Shdr = Struct('Elf_Shdr', @@ -198,38 +218,69 @@ def _create_chdr(self): self.Elf_Chdr = Struct('Elf_Chdr', *fields) def _create_rel(self): - # r_info is also taken apart into r_info_sym and r_info_type. - # This is done in Value to avoid endianity issues while parsing. + # r_info is also taken apart into r_info_sym and r_info_type. This is + # done in Value to avoid endianity issues while parsing. if self.elfclass == 32: - r_info_sym = Value('r_info_sym', - lambda ctx: (ctx['r_info'] >> 8) & 0xFFFFFF) - r_info_type = Value('r_info_type', - lambda ctx: ctx['r_info'] & 0xFF) - else: # 64 - r_info_sym = Value('r_info_sym', - lambda ctx: (ctx['r_info'] >> 32) & 0xFFFFFFFF) - r_info_type = Value('r_info_type', - lambda ctx: ctx['r_info'] & 0xFFFFFFFF) + fields = [self.Elf_xword('r_info'), + Value('r_info_sym', + lambda ctx: (ctx['r_info'] >> 8) & 0xFFFFFF), + Value('r_info_type', + lambda ctx: ctx['r_info'] & 0xFF)] + elif self.e_machine == 'EM_MIPS': # ELF64 MIPS + fields = [ + # The MIPS ELF64 specification + # (https://www.linux-mips.org/pub/linux/mips/doc/ABI/elf64-2.4.pdf) + # provides a non-standard relocation structure definition. + self.Elf_word('r_sym'), + self.Elf_byte('r_ssym'), + self.Elf_byte('r_type3'), + self.Elf_byte('r_type2'), + self.Elf_byte('r_type'), + + # Synthetize usual fields for compatibility with other + # architectures. This allows relocation consumers (including + # our readelf tests) to work without worrying about MIPS64 + # oddities. + Value('r_info_sym', lambda ctx: ctx['r_sym']), + Value('r_info_ssym', lambda ctx: ctx['r_ssym']), + Value('r_info_type', lambda ctx: ctx['r_type']), + Value('r_info_type2', lambda ctx: ctx['r_type2']), + Value('r_info_type3', lambda ctx: ctx['r_type3']), + Value('r_info', + lambda ctx: (ctx['r_sym'] << 32) + | (ctx['r_ssym'] << 24) + | (ctx['r_type3'] << 16) + | (ctx['r_type2'] << 8) + | ctx['r_type']), + ] + else: # Other 64 ELFs + fields = [self.Elf_xword('r_info'), + Value('r_info_sym', + lambda ctx: (ctx['r_info'] >> 32) & 0xFFFFFFFF), + Value('r_info_type', + lambda ctx: ctx['r_info'] & 0xFFFFFFFF)] self.Elf_Rel = Struct('Elf_Rel', - self.Elf_addr('r_offset'), - self.Elf_xword('r_info'), - r_info_sym, - r_info_type, - ) + self.Elf_addr('r_offset'), + *fields) + + fields_and_addend = fields + [self.Elf_sxword('r_addend')] self.Elf_Rela = Struct('Elf_Rela', - self.Elf_addr('r_offset'), - self.Elf_xword('r_info'), - r_info_sym, - r_info_type, - self.Elf_sxword('r_addend'), + self.Elf_addr('r_offset'), + *fields_and_addend ) - def _create_dyn(self, e_machine=None, e_ident_osabi=None): + # Elf32_Relr is typedef'd as Elf32_Word, Elf64_Relr as Elf64_Xword + # (see the glibc patch, for example: + # https://sourceware.org/pipermail/libc-alpha/2021-October/132029.html) + # For us, this is the same as self.Elf_addr (or self.Elf_xword). + self.Elf_Relr = Struct('Elf_Relr', self.Elf_addr('r_offset')) + + def _create_dyn(self): d_tag_dict = dict(ENUM_D_TAG_COMMON) - if e_machine in ENUMMAP_EXTRA_D_TAG_MACHINE: - d_tag_dict.update(ENUMMAP_EXTRA_D_TAG_MACHINE[e_machine]) - elif e_ident_osabi == 'ELFOSABI_SOLARIS': + if self.e_machine in ENUMMAP_EXTRA_D_TAG_MACHINE: + d_tag_dict.update(ENUMMAP_EXTRA_D_TAG_MACHINE[self.e_machine]) + elif self.e_ident_osabi == 'ELFOSABI_SOLARIS': d_tag_dict.update(ENUM_D_TAG_SOLARIS) self.Elf_Dyn = Struct('Elf_Dyn', @@ -247,7 +298,10 @@ def _create_sym(self): # st_other is hierarchical. To access the visibility, # use container['st_other']['visibility'] st_other_struct = BitStruct('st_other', - Padding(5), + # https://openpowerfoundation.org/wp-content/uploads/2016/03/ABI64BitOpenPOWERv1.1_16July2015_pub4.pdf + # See 3.4.1 Symbol Values. + Enum(BitField('local', 3), **ENUM_ST_LOCAL), + Padding(2), Enum(BitField('visibility', 3), **ENUM_ST_VISIBILITY)) if self.elfclass == 32: self.Elf_Sym = Struct('Elf_Sym', @@ -326,8 +380,50 @@ def _create_gnu_abi(self): self.Elf_word('abi_tiny'), ) + def _create_gnu_property(self): + # Structure of GNU property notes is documented in + # https://github.com/hjl-tools/linux-abi/wiki/linux-abi-draft.pdf + def roundup_padding(ctx): + if self.elfclass == 32: + return roundup(ctx.pr_datasz, 2) - ctx.pr_datasz + return roundup(ctx.pr_datasz, 3) - ctx.pr_datasz + + def classify_pr_data(ctx): + if type(ctx.pr_type) is not str: + return None + if ctx.pr_type.startswith('GNU_PROPERTY_X86_'): + return ('GNU_PROPERTY_X86_*', 4, 0) + return (ctx.pr_type, ctx.pr_datasz, self.elfclass) + + self.Elf_Prop = Struct('Elf_Prop', + Enum(self.Elf_word('pr_type'), **ENUM_NOTE_GNU_PROPERTY_TYPE), + self.Elf_word('pr_datasz'), + Switch('pr_data', classify_pr_data, { + ('GNU_PROPERTY_STACK_SIZE', 4, 32): self.Elf_word('pr_data'), + ('GNU_PROPERTY_STACK_SIZE', 8, 64): self.Elf_word64('pr_data'), + ('GNU_PROPERTY_X86_*', 4, 0): self.Elf_word('pr_data'), + }, + default=Field('pr_data', lambda ctx: ctx.pr_datasz) + ), + Padding(roundup_padding) + ) + def _create_note(self, e_type=None): # Structure of "PT_NOTE" section + + self.Elf_ugid = self.Elf_half if self.elfclass == 32 and self.e_machine in { + 'EM_MN10300', + 'EM_ARM', + 'EM_CRIS', + 'EM_CYGNUS_FRV', + 'EM_386', + 'EM_M32R', + 'EM_68K', + 'EM_S390', + 'EM_SH', + 'EM_SPARC', + } else self.Elf_word + self.Elf_Nhdr = Struct('Elf_Nhdr', self.Elf_word('n_namesz'), self.Elf_word('n_descsz'), @@ -345,12 +441,12 @@ def _create_note(self, e_type=None): self.Elf_byte('pr_zomb'), self.Elf_byte('pr_nice'), self.Elf_xword('pr_flag'), - self.Elf_half('pr_uid'), - self.Elf_half('pr_gid'), - self.Elf_half('pr_pid'), - self.Elf_half('pr_ppid'), - self.Elf_half('pr_pgrp'), - self.Elf_half('pr_sid'), + self.Elf_ugid('pr_uid'), + self.Elf_ugid('pr_gid'), + self.Elf_word('pr_pid'), + self.Elf_word('pr_ppid'), + self.Elf_word('pr_pgrp'), + self.Elf_word('pr_sid'), String('pr_fname', 16), String('pr_psargs', 80), ) @@ -362,8 +458,8 @@ def _create_note(self, e_type=None): self.Elf_byte('pr_nice'), Padding(4), self.Elf_xword('pr_flag'), - self.Elf_word('pr_uid'), - self.Elf_word('pr_gid'), + self.Elf_ugid('pr_uid'), + self.Elf_ugid('pr_gid'), self.Elf_word('pr_pid'), self.Elf_word('pr_ppid'), self.Elf_word('pr_pgrp'), @@ -372,6 +468,21 @@ def _create_note(self, e_type=None): String('pr_psargs', 80), ) + # A PT_NOTE of type NT_FILE matching the definition in + # https://chromium.googlesource.com/ + # native_client/nacl-binutils/+/upstream/master/binutils/readelf.c + # Line 15121 + self.Elf_Nt_File = Struct('Elf_Nt_File', + self.Elf_xword("num_map_entries"), + self.Elf_xword("page_size"), + Array(lambda ctx: ctx.num_map_entries, + Struct('Elf_Nt_File_Entry', + self.Elf_addr('vm_start'), + self.Elf_addr('vm_end'), + self.Elf_offset('page_offset'))), + Array(lambda ctx: ctx.num_map_entries, + CString('filename'))) + def _create_stabs(self): # Structure of one stabs entry, see binutils/bfd/stabs.c # Names taken from https://sourceware.org/gdb/current/onlinedocs/stabs.html#Overview @@ -398,3 +509,29 @@ def _create_arm_attributes(self): Enum(self.Elf_uleb128('tag'), **ENUM_ATTR_TAG_ARM) ) + + def _create_elf_hash(self): + # Structure of the old SYSV-style hash table header. It is documented + # in the Oracle "Linker and Libraries Guide", Part IV ELF Application + # Binary Interface, Chapter 14 Object File Format, Section Hash Table + # Section: + # https://docs.oracle.com/cd/E53394_01/html/E54813/chapter6-48031.html + + self.Elf_Hash = Struct('Elf_Hash', + self.Elf_word('nbuckets'), + self.Elf_word('nchains'), + Array(lambda ctx: ctx['nbuckets'], self.Elf_word('buckets')), + Array(lambda ctx: ctx['nchains'], self.Elf_word('chains'))) + + def _create_gnu_hash(self): + # Structure of the GNU-style hash table header. Documentation for this + # table is mostly in the GLIBC source code, a good explanation of the + # format can be found in this blog post: + # https://flapenguin.me/2017/05/10/elf-lookup-dt-gnu-hash/ + self.Gnu_Hash = Struct('Gnu_Hash', + self.Elf_word('nbuckets'), + self.Elf_word('symoffset'), + self.Elf_word('bloom_size'), + self.Elf_word('bloom_shift'), + Array(lambda ctx: ctx['bloom_size'], self.Elf_xword('bloom')), + Array(lambda ctx: ctx['nbuckets'], self.Elf_word('buckets'))) diff --git a/examples/dwarf_decode_address.py b/examples/dwarf_decode_address.py index 7b7d3e01..e206ae9a 100644 --- a/examples/dwarf_decode_address.py +++ b/examples/dwarf_decode_address.py @@ -67,7 +67,7 @@ def decode_funcname(dwarfinfo, address): highpc_attr_class) continue - if lowpc <= address <= highpc: + if lowpc <= address < highpc: return DIE.attributes['DW_AT_name'].value except KeyError: continue @@ -83,7 +83,7 @@ def decode_file_line(dwarfinfo, address): prevstate = None for entry in lineprog.get_entries(): # We're interested in those entries where a new state is assigned - if entry.state is None or entry.state.end_sequence: + if entry.state is None: continue # Looking for a range of addresses in two consecutive states that # contain the required address. @@ -91,7 +91,16 @@ def decode_file_line(dwarfinfo, address): filename = lineprog['file_entry'][prevstate.file - 1].name line = prevstate.line return filename, line - prevstate = entry.state + if entry.state.end_sequence: + # For the state with `end_sequence`, `address` means the address + # of the first byte after the target machine instruction + # sequence and other information is meaningless. We clear + # prevstate so that it's not used in the next iteration. Address + # info is used in the above comparison to see if we need to use + # the line information for the prevstate. + prevstate = None + else: + prevstate = entry.state return None, None diff --git a/examples/dwarf_lineprogram_filenames.py b/examples/dwarf_lineprogram_filenames.py new file mode 100644 index 00000000..2dd0e704 --- /dev/null +++ b/examples/dwarf_lineprogram_filenames.py @@ -0,0 +1,95 @@ +#------------------------------------------------------------------------------- +# elftools example: dwarf_lineprogram_filenames.py +# +# In the .debug_line section, the Dwarf line program generates a matrix +# of address-source references. This example demonstrates accessing the state +# of each line program entry to retrieve the underlying filenames. +# +# William Woodruff (william@yossarian.net) +# This code is in the public domain +#------------------------------------------------------------------------------- +from __future__ import print_function +from collections import defaultdict +import os +import sys + +# If pyelftools is not installed, the example can also run from the root or +# examples/ dir of the source distribution. +sys.path[0:0] = ['.', '..'] + +from elftools.elf.elffile import ELFFile + + +def process_file(filename): + print('Processing file:', filename) + with open(filename, 'rb') as f: + elffile = ELFFile(f) + + if not elffile.has_dwarf_info(): + print(' file has no DWARF info') + return + + dwarfinfo = elffile.get_dwarf_info() + for CU in dwarfinfo.iter_CUs(): + print(' Found a compile unit at offset %s, length %s' % ( + CU.cu_offset, CU['unit_length'])) + + # Every compilation unit in the DWARF information may or may not + # have a corresponding line program in .debug_line. + line_program = dwarfinfo.line_program_for_CU(CU) + if line_program is None: + print(' DWARF info is missing a line program for this CU') + continue + + # Print a reverse mapping of filename -> #entries + line_entry_mapping(line_program) + + +def line_entry_mapping(line_program): + filename_map = defaultdict(int) + + # The line program, when decoded, returns a list of line program + # entries. Each entry contains a state, which we'll use to build + # a reverse mapping of filename -> #entries. + lp_entries = line_program.get_entries() + for lpe in lp_entries: + # We skip LPEs that don't have an associated file. + # This can happen if instructions in the compiled binary + # don't correspond directly to any original source file. + if not lpe.state or lpe.state.file == 0: + continue + filename = lpe_filename(line_program, lpe.state.file) + filename_map[filename] += 1 + + for filename, lpe_count in filename_map.items(): + print(" filename=%s -> %d entries" % (filename, lpe_count)) + + +def lpe_filename(line_program, file_index): + # Retrieving the filename associated with a line program entry + # involves two levels of indirection: we take the file index from + # the LPE to grab the file_entry from the line program header, + # then take the directory index from the file_entry to grab the + # directory name from the line program header. Finally, we + # join the (base) filename from the file_entry to the directory + # name to get the absolute filename. + lp_header = line_program.header + file_entries = lp_header["file_entry"] + + # File and directory indices are 1-indexed. + file_entry = file_entries[file_index - 1] + dir_index = file_entry["dir_index"] + + # A dir_index of 0 indicates that no absolute directory was recorded during + # compilation; return just the basename. + if dir_index == 0: + return file_entry.name.decode() + + directory = lp_header["include_directory"][dir_index - 1] + return os.path.join(directory, file_entry.name).decode() + + +if __name__ == '__main__': + if sys.argv[1] == '--test': + for filename in sys.argv[2:]: + process_file(filename) diff --git a/examples/dwarf_location_lists.py b/examples/dwarf_location_info.py similarity index 56% rename from examples/dwarf_location_lists.py rename to examples/dwarf_location_info.py index 06401d34..0ec9933f 100644 --- a/examples/dwarf_location_lists.py +++ b/examples/dwarf_location_info.py @@ -1,8 +1,20 @@ #------------------------------------------------------------------------------- -# elftools example: dwarf_location_lists.py +# elftools example: dwarf_location_info.py # -# Examine DIE entries which have location list values, and decode these -# location lists. +# Examine DIE entries which have either location list values or location +# expression values and decode that information. +# +# Location information can either be completely contained within a DIE +# (using 'DW_FORM_exprloc' in DWARFv4 or 'DW_FORM_block1' in earlier +# versions) or be a reference to a location list contained within +# the .debug_loc section (using 'DW_FORM_sec_offset' in DWARFv4 or +# 'DW_FORM_data4' / 'DW_FORM_data8' in earlier versions). +# +# The LocationParser object parses the DIE attributes and handles both +# formats. +# +# The directory 'test/testfiles_for_location_info' contains test files with +# location information represented in both DWARFv4 and DWARFv2 forms. # # Eli Bendersky (eliben@gmail.com) # This code is in the public domain @@ -14,13 +26,12 @@ # examples/ dir of the source distribution. sys.path[0:0] = ['.', '..'] - from elftools.common.py3compat import itervalues from elftools.elf.elffile import ELFFile from elftools.dwarf.descriptions import ( describe_DWARF_expr, set_global_machine_arch) -from elftools.dwarf.locationlists import LocationEntry - +from elftools.dwarf.locationlists import ( + LocationEntry, LocationExpr, LocationParser) def process_file(filename): print('Processing file:', filename) @@ -43,6 +54,10 @@ def process_file(filename): # register names contained in DWARF expressions. set_global_machine_arch(elffile.get_machine_arch()) + # Create a LocationParser object that parses the DIE attributes and + # creates objects representing the actual location information. + loc_parser = LocationParser(location_lists) + for CU in dwarfinfo.iter_CUs(): # DWARFInfo allows to iterate over the compile units contained in # the .debug_info section. CU is a CompileUnit object, with some @@ -58,20 +73,25 @@ def process_file(filename): # AttributeValue object (from elftools.dwarf.die), which we # can examine. for attr in itervalues(DIE.attributes): - if attribute_has_location_list(attr): - # This is a location list. Its value is an offset into - # the .debug_loc section, so we can use the location - # lists object to decode it. - loclist = location_lists.get_location_list_at_offset( - attr.value) - - print(' DIE %s. attr %s.\n%s' % ( - DIE.tag, - attr.name, - show_loclist(loclist, dwarfinfo, indent=' '))) - - -def show_loclist(loclist, dwarfinfo, indent): + # Check if this attribute contains location information + if loc_parser.attribute_has_location(attr, CU['version']): + print(' DIE %s. attr %s.' % (DIE.tag, attr.name)) + loc = loc_parser.parse_from_attribute(attr, + CU['version']) + # We either get a list (in case the attribute is a + # reference to the .debug_loc section) or a LocationExpr + # object (in case the attribute itself contains location + # information). + if isinstance(loc, LocationExpr): + print(' %s' % ( + describe_DWARF_expr(loc.loc_expr, + dwarfinfo.structs, CU.cu_offset))) + elif isinstance(loc, list): + print(show_loclist(loc, + dwarfinfo, + ' ', CU.cu_offset)) + +def show_loclist(loclist, dwarfinfo, indent, cu_offset): """ Display a location list nicely, decoding the DWARF expressions contained within. """ @@ -80,26 +100,11 @@ def show_loclist(loclist, dwarfinfo, indent): if isinstance(loc_entity, LocationEntry): d.append('%s <<%s>>' % ( loc_entity, - describe_DWARF_expr(loc_entity.loc_expr, dwarfinfo.structs))) + describe_DWARF_expr(loc_entity.loc_expr, dwarfinfo.structs, cu_offset))) else: d.append(str(loc_entity)) return '\n'.join(indent + s for s in d) - -def attribute_has_location_list(attr): - """ Only some attributes can have location list values, if they have the - required DW_FORM (loclistptr "class" in DWARF spec v3) - """ - if (attr.name in ( 'DW_AT_location', 'DW_AT_string_length', - 'DW_AT_const_value', 'DW_AT_return_addr', - 'DW_AT_data_member_location', 'DW_AT_frame_base', - 'DW_AT_segment', 'DW_AT_static_link', - 'DW_AT_use_location', 'DW_AT_vtable_elem_location')): - if attr.form in ('DW_FORM_data4', 'DW_FORM_data8'): - return True - return False - - if __name__ == '__main__': if sys.argv[1] == '--test': for filename in sys.argv[2:]: diff --git a/examples/dwarf_pubnames_types.py b/examples/dwarf_pubnames_types.py new file mode 100644 index 00000000..d9daaff4 --- /dev/null +++ b/examples/dwarf_pubnames_types.py @@ -0,0 +1,116 @@ +#------------------------------------------------------------------------------- +# elftools example: dwarf_pubnames_types.py +# +# Dump the contents of .debug_pubnames and .debug_pubtypes sections from the +# ELF file. +# +# Note: sample_exe64.elf doesn't have a .debug_pubtypes section. +# +# Vijay Ramasami (rvijayc@gmail.com) +# This code is in the public domain +#------------------------------------------------------------------------------- +from __future__ import print_function +import sys + +# If pyelftools is not installed, the example can also run from the root or +# examples/ dir of the source distribution. +sys.path[0:0] = ['.', '..'] + +from elftools.elf.elffile import ELFFile +from elftools.common.py3compat import bytes2str + +def process_file(filename): + print('Processing file:', filename) + with open(filename, 'rb') as f: + elffile = ELFFile(f) + + if not elffile.has_dwarf_info(): + print(' file has no DWARF info') + return + + # get_dwarf_info returns a DWARFInfo context object, which is the + # starting point for all DWARF-based processing in pyelftools. + dwarfinfo = elffile.get_dwarf_info() + + # get .debug_pubtypes section. + pubnames = dwarfinfo.get_pubnames() + if pubnames is None: + print('ERROR: No .debug_pubnames section found in ELF.') + else: + print('%d entries found in .debug_pubnames' % len(pubnames)) + + print('Trying pubnames example ...') + for name, entry in pubnames.items(): + print('%s: cu_ofs = %d, die_ofs = %d' % + (name, entry.cu_ofs, entry.die_ofs)) + + # get the actual CU/DIE that has this information. + print('Fetching the actual die for %s ...' % name) + for cu in dwarfinfo.iter_CUs(): + if cu.cu_offset == entry.cu_ofs: + for die in cu.iter_DIEs(): + if die.offset == entry.die_ofs: + print('Die Name: %s' % + bytes2str(die.attributes['DW_AT_name'].value)) + + # dump all entries in .debug_pubnames section. + print('Dumping .debug_pubnames table ...') + print('-' * 66) + print('%50s%8s%8s' % ('Symbol', 'CU_OFS', 'DIE_OFS')) + print('-' * 66) + for (name, entry) in pubnames.items(): + print('%50s%8d%8d' % (name, entry.cu_ofs, entry.die_ofs)) + print('-' * 66) + + # get .debug_pubtypes section. + pubtypes = dwarfinfo.get_pubtypes() + if pubtypes is None: + print('ERROR: No .debug_pubtypes section found in ELF') + else: + print('%d entries found in .debug_pubtypes' % len(pubtypes)) + + for name, entry in pubtypes.items(): + print('%s: cu_ofs = %d, die_ofs = %d' % + (name, entry.cu_ofs, entry.die_ofs)) + + # get the actual CU/DIE that has this information. + print('Fetching the actual die for %s ...' % name) + for cu in dwarfinfo.iter_CUs(): + if cu.cu_offset == entry.cu_ofs: + for die in cu.iter_DIEs(): + if die.offset == entry.die_ofs: + print('Die Name: %s' % + bytes2str(die.attributes['DW_AT_name'].value)) + die_info_rec(die) + + # dump all entries in .debug_pubtypes section. + print('Dumping .debug_pubtypes table ...') + print('-' * 66) + print('%50s%8s%8s' % ('Symbol', 'CU_OFS', 'DIE_OFS')) + print('-' * 66) + for (name, entry) in pubtypes.items(): + print('%50s%8d%8d' % (name, entry.cu_ofs, entry.die_ofs)) + print('-' * 66) + + +def die_info_rec(die, indent_level=' '): + """ A recursive function for showing information about a DIE and its + children. + """ + print(indent_level + 'DIE tag=%s, attrs=' % die.tag) + for name, val in die.attributes.items(): + print(indent_level + ' %s = %s' % (name, val)) + child_indent = indent_level + ' ' + for child in die.iter_children(): + die_info_rec(child, child_indent) + + +if __name__ == '__main__': + if sys.argv[1] == '--test': + process_file(sys.argv[2]) + sys.exit(0) + + if len(sys.argv) < 2: + print('Expected usage: {0} '.format(sys.argv[0])) + sys.exit(1) + process_file(sys.argv[1]) diff --git a/examples/elf_notes.py b/examples/elf_notes.py index 9e349182..1be56e06 100644 --- a/examples/elf_notes.py +++ b/examples/elf_notes.py @@ -16,6 +16,7 @@ from elftools.elf.elffile import ELFFile from elftools.elf.sections import NoteSection +from elftools.common.py3compat import bytes2hex def process_file(filename): @@ -36,10 +37,10 @@ def process_file(filename): desc['abi_major'], desc['abi_minor'], desc['abi_tiny'])) - elif note['n_type'] == 'NT_GNU_BUILD_ID': + elif note['n_type'] in {'NT_GNU_BUILD_ID', 'NT_GNU_GOLD_VERSION'}: print(' Desc:', desc) else: - print(' Desc:', ''.join('%.2x' % ord(b) for b in desc)) + print(' Desc:', bytes2hex(desc)) if __name__ == '__main__': diff --git a/examples/reference_output/dwarf_lineprogram_filenames.out b/examples/reference_output/dwarf_lineprogram_filenames.out new file mode 100644 index 00000000..b20bbdda --- /dev/null +++ b/examples/reference_output/dwarf_lineprogram_filenames.out @@ -0,0 +1,8 @@ +Processing file: ./examples/sample_exe64.elf + Found a compile unit at offset 0, length 115 + filename=../sysdeps/x86_64/elf/start.S -> 13 entries + Found a compile unit at offset 119, length 135 + Found a compile unit at offset 258, length 156 + filename=z.c -> 5 entries + Found a compile unit at offset 418, length 300 + filename=elf-init.c -> 15 entries diff --git a/examples/reference_output/dwarf_location_info.out b/examples/reference_output/dwarf_location_info.out new file mode 100644 index 00000000..01c8933b --- /dev/null +++ b/examples/reference_output/dwarf_location_info.out @@ -0,0 +1,33 @@ +Processing file: ./examples/sample_exe64.elf + Found a compile unit at offset 0, length 115 + Found a compile unit at offset 119, length 135 + DIE DW_TAG_variable. attr DW_AT_location. + (DW_OP_addr: 400608) + Found a compile unit at offset 258, length 156 + DIE DW_TAG_subprogram. attr DW_AT_frame_base. + LocationEntry(entry_offset=0, begin_offset=0, end_offset=1, loc_expr=[119, 8]) <<(DW_OP_breg7 (rsp): 8)>> + LocationEntry(entry_offset=20, begin_offset=1, end_offset=4, loc_expr=[119, 16]) <<(DW_OP_breg7 (rsp): 16)>> + LocationEntry(entry_offset=40, begin_offset=4, end_offset=43, loc_expr=[118, 16]) <<(DW_OP_breg6 (rbp): 16)>> + DIE DW_TAG_formal_parameter. attr DW_AT_location. + (DW_OP_fbreg: -20) + DIE DW_TAG_formal_parameter. attr DW_AT_location. + (DW_OP_fbreg: -32) + DIE DW_TAG_variable. attr DW_AT_location. + (DW_OP_addr: 601018) + Found a compile unit at offset 418, length 300 + DIE DW_TAG_subprogram. attr DW_AT_frame_base. + (DW_OP_breg7 (rsp): 8) + DIE DW_TAG_subprogram. attr DW_AT_frame_base. + LocationEntry(entry_offset=76, begin_offset=16, end_offset=64, loc_expr=[119, 8]) <<(DW_OP_breg7 (rsp): 8)>> + LocationEntry(entry_offset=96, begin_offset=64, end_offset=153, loc_expr=[119, 192, 0]) <<(DW_OP_breg7 (rsp): 64)>> + DIE DW_TAG_formal_parameter. attr DW_AT_location. + LocationEntry(entry_offset=133, begin_offset=16, end_offset=85, loc_expr=[85]) <<(DW_OP_reg5 (rdi))>> + LocationEntry(entry_offset=152, begin_offset=85, end_offset=143, loc_expr=[94]) <<(DW_OP_reg14 (r14))>> + DIE DW_TAG_formal_parameter. attr DW_AT_location. + LocationEntry(entry_offset=187, begin_offset=16, end_offset=85, loc_expr=[84]) <<(DW_OP_reg4 (rsi))>> + LocationEntry(entry_offset=206, begin_offset=85, end_offset=138, loc_expr=[93]) <<(DW_OP_reg13 (r13))>> + DIE DW_TAG_formal_parameter. attr DW_AT_location. + LocationEntry(entry_offset=241, begin_offset=16, end_offset=85, loc_expr=[81]) <<(DW_OP_reg1 (rdx))>> + LocationEntry(entry_offset=260, begin_offset=85, end_offset=133, loc_expr=[92]) <<(DW_OP_reg12 (r12))>> + DIE DW_TAG_variable. attr DW_AT_location. + LocationEntry(entry_offset=295, begin_offset=92, end_offset=123, loc_expr=[83]) <<(DW_OP_reg3 (rbx))>> diff --git a/examples/reference_output/dwarf_location_lists.out b/examples/reference_output/dwarf_location_lists.out deleted file mode 100644 index 8788755c..00000000 --- a/examples/reference_output/dwarf_location_lists.out +++ /dev/null @@ -1,23 +0,0 @@ -Processing file: ./examples/sample_exe64.elf - Found a compile unit at offset 0, length 115 - Found a compile unit at offset 119, length 135 - Found a compile unit at offset 258, length 156 - DIE DW_TAG_subprogram. attr DW_AT_frame_base. - LocationEntry(begin_offset=0, end_offset=1, loc_expr=[119, 8]) <<(DW_OP_breg7 (rsp): 8)>> - LocationEntry(begin_offset=1, end_offset=4, loc_expr=[119, 16]) <<(DW_OP_breg7 (rsp): 16)>> - LocationEntry(begin_offset=4, end_offset=43, loc_expr=[118, 16]) <<(DW_OP_breg6 (rbp): 16)>> - Found a compile unit at offset 418, length 300 - DIE DW_TAG_subprogram. attr DW_AT_frame_base. - LocationEntry(begin_offset=16, end_offset=64, loc_expr=[119, 8]) <<(DW_OP_breg7 (rsp): 8)>> - LocationEntry(begin_offset=64, end_offset=153, loc_expr=[119, 192, 0]) <<(DW_OP_breg7 (rsp): 64)>> - DIE DW_TAG_formal_parameter. attr DW_AT_location. - LocationEntry(begin_offset=16, end_offset=85, loc_expr=[85]) <<(DW_OP_reg5 (rdi))>> - LocationEntry(begin_offset=85, end_offset=143, loc_expr=[94]) <<(DW_OP_reg14 (r14))>> - DIE DW_TAG_formal_parameter. attr DW_AT_location. - LocationEntry(begin_offset=16, end_offset=85, loc_expr=[84]) <<(DW_OP_reg4 (rsi))>> - LocationEntry(begin_offset=85, end_offset=138, loc_expr=[93]) <<(DW_OP_reg13 (r13))>> - DIE DW_TAG_formal_parameter. attr DW_AT_location. - LocationEntry(begin_offset=16, end_offset=85, loc_expr=[81]) <<(DW_OP_reg1 (rdx))>> - LocationEntry(begin_offset=85, end_offset=133, loc_expr=[92]) <<(DW_OP_reg12 (r12))>> - DIE DW_TAG_variable. attr DW_AT_location. - LocationEntry(begin_offset=92, end_offset=123, loc_expr=[83]) <<(DW_OP_reg3 (rbx))>> diff --git a/examples/reference_output/dwarf_pubnames_types.out b/examples/reference_output/dwarf_pubnames_types.out new file mode 100644 index 00000000..b8f4040d --- /dev/null +++ b/examples/reference_output/dwarf_pubnames_types.out @@ -0,0 +1,29 @@ +Processing file: ./examples/sample_exe64.elf +5 entries found in .debug_pubnames +Trying pubnames example ... +_IO_stdin_used: cu_ofs = 119, die_ofs = 230 +Fetching the actual die for _IO_stdin_used ... +Die Name: _IO_stdin_used +main: cu_ofs = 258, die_ofs = 303 +Fetching the actual die for main ... +Die Name: main +glob: cu_ofs = 258, die_ofs = 395 +Fetching the actual die for glob ... +Die Name: glob +__libc_csu_fini: cu_ofs = 418, die_ofs = 495 +Fetching the actual die for __libc_csu_fini ... +Die Name: __libc_csu_fini +__libc_csu_init: cu_ofs = 418, die_ofs = 523 +Fetching the actual die for __libc_csu_init ... +Die Name: __libc_csu_init +Dumping .debug_pubnames table ... +------------------------------------------------------------------ + Symbol CU_OFS DIE_OFS +------------------------------------------------------------------ + _IO_stdin_used 119 230 + main 258 303 + glob 258 395 + __libc_csu_fini 418 495 + __libc_csu_init 418 523 +------------------------------------------------------------------ +ERROR: No .debug_pubtypes section found in ELF diff --git a/scripts/readelf.py b/scripts/readelf.py index d65c3858..1e3213b6 100755 --- a/scripts/readelf.py +++ b/scripts/readelf.py @@ -10,6 +10,14 @@ import argparse import os, sys import string +import traceback +import itertools +# Note: zip has different behaviour between Python 2.x and 3.x. +# - Using izip ensures compatibility. +try: + from itertools import izip +except: + izip = zip # For running from development directory. It should take precedence over the # installed pyelftools. @@ -24,7 +32,9 @@ from elftools.elf.dynamic import DynamicSection, DynamicSegment from elftools.elf.enums import ENUM_D_TAG from elftools.elf.segments import InterpSegment -from elftools.elf.sections import NoteSection, SymbolTableSection +from elftools.elf.sections import ( + NoteSection, SymbolTableSection, SymbolTableIndexSection +) from elftools.elf.gnuversions import ( GNUVerSymSection, GNUVerDefSection, GNUVerNeedSection, @@ -34,22 +44,27 @@ describe_ei_class, describe_ei_data, describe_ei_version, describe_ei_osabi, describe_e_type, describe_e_machine, describe_e_version_numeric, describe_p_type, describe_p_flags, - describe_sh_type, describe_sh_flags, + describe_rh_flags, describe_sh_type, describe_sh_flags, describe_symbol_type, describe_symbol_bind, describe_symbol_visibility, describe_symbol_shndx, describe_reloc_type, describe_dyn_tag, - describe_ver_flags, describe_note, describe_attr_tag_arm + describe_dt_flags, describe_dt_flags_1, describe_ver_flags, describe_note, + describe_attr_tag_arm, describe_symbol_other ) from elftools.elf.constants import E_FLAGS from elftools.elf.constants import E_FLAGS_MASKS +from elftools.elf.constants import SH_FLAGS +from elftools.elf.constants import SHN_INDICES from elftools.dwarf.dwarfinfo import DWARFInfo from elftools.dwarf.descriptions import ( describe_reg_name, describe_attr_value, set_global_machine_arch, describe_CFI_instructions, describe_CFI_register_rule, - describe_CFI_CFA_rule, + describe_CFI_CFA_rule, describe_DWARF_expr ) from elftools.dwarf.constants import ( DW_LNS_copy, DW_LNS_set_file, DW_LNE_define_file) +from elftools.dwarf.locationlists import LocationParser, LocationEntry from elftools.dwarf.callframe import CIE, FDE, ZERO +from elftools.ehabi.ehabiinfo import CorruptEHABIEntry, CannotUnwindEHABIEntry, GenericEHABIEntry class ReadElf(object): @@ -70,6 +85,8 @@ def __init__(self, file, output): self._versioninfo = None + self._shndx_sections = None + def display_file_header(self): """ Display the ELF file header """ @@ -91,7 +108,7 @@ def display_file_header(self): self._emitline(' ABI Version: %d' % e_ident['EI_ABIVERSION']) self._emitline(' Type: %s' % - describe_e_type(header['e_type'])) + describe_e_type(header['e_type'], self.elffile)) self._emitline(' Machine: %s' % describe_e_machine(header['e_machine'])) self._emitline(' Version: %s' % @@ -115,10 +132,18 @@ def display_file_header(self): header['e_phnum']) self._emitline(' Size of section headers: %s (bytes)' % header['e_shentsize']) - self._emitline(' Number of section headers: %s' % + self._emit(' Number of section headers: %s' % header['e_shnum']) - self._emitline(' Section header string table index: %s' % + if header['e_shnum'] == 0 and self.elffile.num_sections() != 0: + self._emitline(' (%d)' % self.elffile.num_sections()) + else: + self._emitline('') + self._emit(' Section header string table index: %s' % header['e_shstrndx']) + if header['e_shstrndx'] == SHN_INDICES.SHN_XINDEX: + self._emitline(' (%d)' % self.elffile.get_shstrndx()) + else: + self._emitline('') def decode_flags(self, flags): description = "" @@ -131,12 +156,27 @@ def decode_flags(self, flags): flags &= ~E_FLAGS.EF_ARM_RELEXEC if eabi == E_FLAGS.EF_ARM_EABI_VER5: + EF_ARM_KNOWN_FLAGS = E_FLAGS.EF_ARM_ABI_FLOAT_SOFT|E_FLAGS.EF_ARM_ABI_FLOAT_HARD|E_FLAGS.EF_ARM_LE8|E_FLAGS.EF_ARM_BE8 description += ', Version5 EABI' - if flags: + if flags & E_FLAGS.EF_ARM_ABI_FLOAT_SOFT: + description += ", soft-float ABI" + elif flags & E_FLAGS.EF_ARM_ABI_FLOAT_HARD: + description += ", hard-float ABI" + + if flags & E_FLAGS.EF_ARM_BE8: + description += ", BE8" + elif flags & E_FLAGS.EF_ARM_LE8: + description += ", LE8" + + if flags & ~EF_ARM_KNOWN_FLAGS: description += ', ' else: description += ', ' + elif self.elffile['e_machine'] == 'EM_PPC64': + if flags & E_FLAGS.EF_PPC64_ABI_V2: + description += ', abiv2' + elif self.elffile['e_machine'] == "EM_MIPS": if flags & E_FLAGS.EF_MIPS_NOREORDER: description += ", noreorder" @@ -190,13 +230,13 @@ def display_program_headers(self, show_heading=True): elfheader = self.elffile.header if show_heading: self._emitline('Elf file type is %s' % - describe_e_type(elfheader['e_type'])) + describe_e_type(elfheader['e_type'], self.elffile)) self._emitline('Entry point is %s' % self._format_hex(elfheader['e_entry'])) # readelf weirness - why isn't e_phoff printed as hex? (for section # headers, it is...) self._emitline('There are %s program headers, starting at offset %s' % ( - elfheader['e_phnum'], elfheader['e_phoff'])) + self.elffile.num_segments(), elfheader['e_phoff'])) self._emitline() self._emitline('Program Headers:') @@ -258,6 +298,9 @@ def display_program_headers(self, show_heading=True): for section in self.elffile.iter_sections(): if ( not section.is_null() and + not ((section['sh_flags'] & SH_FLAGS.SHF_TLS) != 0 and + section['sh_type'] == 'SHT_NOBITS' and + segment['p_type'] != 'PT_TLS') and segment.section_in_segment(section)): self._emit('%s ' % section.name) @@ -276,7 +319,7 @@ def display_section_headers(self, show_heading=True): return self._emitline('\nSection Header%s:' % ( - 's' if elfheader['e_shnum'] > 1 else '')) + 's' if self.elffile.num_sections() > 1 else '')) # Different formatting constraints of 32-bit and 64-bit addresses # @@ -331,7 +374,7 @@ def display_symbol_tables(self): """ self._init_versioninfo() - symbol_tables = [s for s in self.elffile.iter_sections() + symbol_tables = [(idx, s) for idx, s in enumerate(self.elffile.iter_sections()) if isinstance(s, SymbolTableSection)] if not symbol_tables and self.elffile.num_sections() == 0: @@ -339,7 +382,7 @@ def display_symbol_tables(self): self._emitline('Dynamic symbol information is not available for' ' displaying symbols.') - for section in symbol_tables: + for section_index, section in symbol_tables: if not isinstance(section, SymbolTableSection): continue @@ -348,8 +391,10 @@ def display_symbol_tables(self): section.name)) continue - self._emitline("\nSymbol table '%s' contains %s entries:" % ( - section.name, section.num_symbols())) + self._emitline("\nSymbol table '%s' contains %d %s:" % ( + section.name, + section.num_symbols(), + 'entry' if section.num_symbols() == 1 else 'entries')) if self.elffile.elfclass == 32: self._emitline(' Num: Value Size Type Bind Vis Ndx Name') @@ -375,17 +420,26 @@ def display_symbol_tables(self): else: version_info = '@@%(name)s' % version + symbol_name = symbol.name + # Print section names for STT_SECTION symbols as readelf does + if (symbol['st_info']['type'] == 'STT_SECTION' + and symbol['st_shndx'] < self.elffile.num_sections() + and symbol['st_name'] == 0): + symbol_name = self.elffile.get_section(symbol['st_shndx']).name + # symbol names are truncated to 25 chars, similarly to readelf - self._emitline('%6d: %s %5d %-7s %-6s %-7s %4s %.25s%s' % ( + self._emitline('%6d: %s %s %-7s %-6s %-7s %4s %.25s%s' % ( nsym, self._format_hex( symbol['st_value'], fullhex=True, lead0x=False), - symbol['st_size'], + "%5d" % symbol['st_size'] if symbol['st_size'] < 100000 else hex(symbol['st_size']), describe_symbol_type(symbol['st_info']['type']), describe_symbol_bind(symbol['st_info']['bind']), - describe_symbol_visibility(symbol['st_other']['visibility']), - describe_symbol_shndx(symbol['st_shndx']), - symbol.name, + describe_symbol_other(symbol['st_other']), + describe_symbol_shndx(self._get_symbol_shndx(symbol, + nsym, + section_index)), + symbol_name, version_info)) def display_dynamic_tags(self): @@ -397,9 +451,10 @@ def display_dynamic_tags(self): continue has_dynamic_sections = True - self._emitline("\nDynamic section at offset %s contains %s entries:" % ( + self._emitline("\nDynamic section at offset %s contains %d %s:" % ( self._format_hex(section['sh_offset']), - section.num_tags())) + section.num_tags(), + 'entry' if section.num_tags() == 1 else 'entries')) self._emitline(" Tag Type Name/Value") padding = 20 + (8 if self.elffile.elfclass == 32 else 0) @@ -414,6 +469,10 @@ def display_dynamic_tags(self): parsed = 'Library soname: [%s]' % tag.soname elif tag.entry.d_tag.endswith(('SZ', 'ENT')): parsed = '%i (bytes)' % tag['d_val'] + elif tag.entry.d_tag == 'DT_FLAGS': + parsed = describe_dt_flags(tag.entry.d_val) + elif tag.entry.d_tag == 'DT_FLAGS_1': + parsed = 'Flags: %s' % describe_dt_flags_1(tag.entry.d_val) elif tag.entry.d_tag.endswith(('NUM', 'COUNT')): parsed = '%i' % tag['d_val'] elif tag.entry.d_tag == 'DT_PLTREL': @@ -421,6 +480,11 @@ def display_dynamic_tags(self): if s.startswith('DT_'): s = s[3:] parsed = '%s' % s + elif tag.entry.d_tag == 'DT_MIPS_FLAGS': + parsed = describe_rh_flags(tag.entry.d_val) + elif tag.entry.d_tag in ('DT_MIPS_SYMTABNO', + 'DT_MIPS_LOCAL_GOTNO'): + parsed = str(tag.entry.d_val) else: parsed = '%#x' % tag['d_val'] @@ -441,7 +505,7 @@ def display_notes(self): for note in section.iter_notes(): self._emitline("\nDisplaying notes found in: {}".format( section.name)) - self._emitline(' Owner Data size Description') + self._emitline(' Owner Data size Description') self._emitline(' %s %s\t%s' % ( note['n_name'].ljust(20), self._format_hex(note['n_descsz'], fieldsize=8), @@ -456,10 +520,11 @@ def display_relocations(self): continue has_relocation_sections = True - self._emitline("\nRelocation section '%s' at offset %s contains %s entries:" % ( + self._emitline("\nRelocation section '%.128s' at offset %s contains %d %s:" % ( section.name, self._format_hex(section['sh_offset']), - section.num_relocations())) + section.num_relocations(), + 'entry' if section.num_relocations() == 1 else 'entries')) if section.is_RELA(): self._emitline(" Offset Info Type Sym. Value Sym. Name + Addend") else: @@ -479,40 +544,94 @@ def display_relocations(self): rel['r_info_type'], self.elffile))) if rel['r_info_sym'] == 0: + if section.is_RELA(): + fieldsize = 8 if self.elffile.elfclass == 32 else 16 + addend = self._format_hex(rel['r_addend'], lead0x=False) + self._emit(' %s %s' % (' ' * fieldsize, addend)) self._emitline() - continue - symbol = symtable.get_symbol(rel['r_info_sym']) - # Some symbols have zero 'st_name', so instead what's used is - # the name of the section they point at. Truncate symbol names - # (excluding version info) to 22 chars, similarly to readelf. - if symbol['st_name'] == 0: - symsec = self.elffile.get_section(symbol['st_shndx']) - symbol_name = symsec.name - version = '' else: - symbol_name = symbol.name - version = self._symbol_version(rel['r_info_sym']) - version = (version['name'] - if version and version['name'] else '') - symbol_name = '%.22s' % symbol_name - if version: - symbol_name += '@' + version - - self._emit(' %s %s' % ( - self._format_hex( - symbol['st_value'], - fullhex=True, lead0x=False), - symbol_name)) - if section.is_RELA(): - self._emit(' %s %x' % ( - '+' if rel['r_addend'] >= 0 else '-', - abs(rel['r_addend']))) - self._emitline() + symbol = symtable.get_symbol(rel['r_info_sym']) + # Some symbols have zero 'st_name', so instead what's used + # is the name of the section they point at. Truncate symbol + # names (excluding version info) to 22 chars, similarly to + # readelf. + if symbol['st_name'] == 0: + symsecidx = self._get_symbol_shndx(symbol, + rel['r_info_sym'], + section['sh_link']) + symsec = self.elffile.get_section(symsecidx) + symbol_name = symsec.name + version = '' + else: + symbol_name = symbol.name + version = self._symbol_version(rel['r_info_sym']) + version = (version['name'] + if version and version['name'] else '') + symbol_name = '%.22s' % symbol_name + if version: + symbol_name += '@' + version + + self._emit(' %s %s' % ( + self._format_hex( + symbol['st_value'], + fullhex=True, lead0x=False), + symbol_name)) + if section.is_RELA(): + self._emit(' %s %x' % ( + '+' if rel['r_addend'] >= 0 else '-', + abs(rel['r_addend']))) + self._emitline() + + # Emit the two additional relocation types for ELF64 MIPS + # binaries. + if (self.elffile.elfclass == 64 and + self.elffile['e_machine'] == 'EM_MIPS'): + for i in (2, 3): + rtype = rel['r_info_type%s' % i] + self._emit(' Type%s: %s' % ( + i, + describe_reloc_type(rtype, self.elffile))) + self._emitline() if not has_relocation_sections: self._emitline('\nThere are no relocations in this file.') + def display_arm_unwind(self): + if not self.elffile.has_ehabi_info(): + self._emitline('There are no .ARM.idx sections in this file.') + return + for ehabi_info in self.elffile.get_ehabi_infos(): + # Unwind section '.ARM.exidx' at offset 0x203e8 contains 1009 entries: + self._emitline("\nUnwind section '%s' at offset 0x%x contains %d %s" % ( + ehabi_info.section_name(), + ehabi_info.section_offset(), + ehabi_info.num_entry(), + 'entry' if ehabi_info.num_entry() == 1 else 'entries')) + + for i in range(ehabi_info.num_entry()): + entry = ehabi_info.get_entry(i) + self._emitline() + self._emitline("Entry %d:" % i) + if isinstance(entry, CorruptEHABIEntry): + self._emitline(" [corrupt] %s" % entry.reason) + continue + self._emit(" Function offset 0x%x: " % entry.function_offset) + if isinstance(entry, CannotUnwindEHABIEntry): + self._emitline("[cantunwind]") + continue + elif entry.eh_table_offset: + self._emitline("@0x%x" % entry.eh_table_offset) + else: + self._emitline("Compact (inline)") + if isinstance(entry, GenericEHABIEntry): + self._emitline(" Personality: 0x%x" % entry.personality) + else: + self._emitline(" Compact model index: %d" % entry.personality) + for mnemonic_item in entry.mnmemonic_array(): + self._emit(' ') + self._emitline(mnemonic_item) + def display_version_info(self): """ Display the version info contained in the file """ @@ -524,9 +643,7 @@ def display_version_info(self): for section in self.elffile.iter_sections(): if isinstance(section, GNUVerSymSection): - self._print_version_section_header( - section, 'Version symbols', lead0x=False) - + self._print_version_section_header(section, 'Version symbols') num_symbols = section.num_symbols() # Symbol version info are printed four by four entries @@ -737,6 +854,10 @@ def display_debug_dump(self, dump_what): self._dump_debug_frames_interp() elif dump_what == 'aranges': self._dump_debug_aranges() + elif dump_what in { 'pubtypes', 'pubnames' }: + self._dump_debug_namelut(dump_what) + elif dump_what == 'loc': + self._dump_debug_locations() else: self._emitline('debug dump not yet supported for "%s"' % dump_what) @@ -791,8 +912,9 @@ def _print_version_section_header(self, version_section, name, lead0x=True, else: num_entries = version_section.num_symbols() - self._emitline("\n%s section '%s' contains %s entries:" % - (name, version_section.name, num_entries)) + self._emitline("\n%s section '%s' contains %d %s:" % ( + name, version_section.name, num_entries, + 'entry' if num_entries == 1 else 'entries')) self._emitline('%sAddr: %s Offset: %s Link: %i (%s)' % ( ' ' * indent, self._format_hex( @@ -850,7 +972,7 @@ def _symbol_version(self, nsym): if self._versioninfo['type'] == 'GNU': # In GNU versioning mode, the highest bit is used to - # store wether the symbol is hidden or not + # store whether the symbol is hidden or not if index & 0x8000: index &= ~0x8000 symbol_version['hidden'] = True @@ -883,6 +1005,22 @@ def _section_from_spec(self, spec): # Not a number. Must be a name then return self.elffile.get_section_by_name(spec) + def _get_symbol_shndx(self, symbol, symbol_index, symtab_index): + """ Get the index into the section header table for the "symbol" + at "symbol_index" located in the symbol table with section index + "symtab_index". + """ + symbol_shndx = symbol['st_shndx'] + if symbol_shndx != SHN_INDICES.SHN_XINDEX: + return symbol_shndx + + # Check for or lazily construct index section mapping (symbol table + # index -> corresponding symbol table index section object) + if self._shndx_sections is None: + self._shndx_sections = {sec.symboltable: sec for sec in self.elffile.iter_sections() + if isinstance(sec, SymbolTableIndexSection)} + return self._shndx_sections[symtab_index].get_section_index(symbol_index) + def _note_relocs_for_section(self, section): """ If there are relocation sections pointing to the givne section, emit a note about it. @@ -935,7 +1073,10 @@ def _dump_debug_info(self): # correctly reflect the nesting depth # die_depth = 0 + current_function = None for die in cu.iter_DIEs(): + if die.tag == 'DW_TAG_subprogram': + current_function = die self._emitline(' <%s><%x>: Abbrev Number: %s%s' % ( die_depth, die.offset, @@ -950,11 +1091,19 @@ def _dump_debug_info(self): # Unknown attribute values are passed-through as integers if isinstance(name, int): name = 'Unknown AT value: %x' % name - self._emitline(' <%x> %-18s: %s' % ( + + attr_desc = describe_attr_value(attr, die, section_offset) + + if 'DW_OP_fbreg' in attr_desc and current_function and not 'DW_AT_frame_base' in current_function.attributes: + postfix = ' [without dw_at_frame_base]' + else: + postfix = '' + + self._emitline(' <%x> %-18s: %s%s' % ( attr.offset, name, - describe_attr_value( - attr, die, section_offset))) + attr_desc, + postfix)) if die.has_children: die_depth += 1 @@ -967,7 +1116,8 @@ def _dump_debug_line_programs(self): """ if not self._dwarfinfo.has_debug_info: return - self._emitline('Decoded dump of debug contents of section %s:\n' % self._dwarfinfo.debug_line_sec.name) + self._emitline('Contents of the %s section:' % self._dwarfinfo.debug_line_sec.name) + self._emitline() for cu in self._dwarfinfo.iter_CUs(): lineprogram = self._dwarfinfo.line_program_for_CU(cu) @@ -982,7 +1132,7 @@ def _dump_debug_line_programs(self): cu_filename = '%s/%s' % (bytes2str(dir), cu_filename) self._emitline('CU: %s:' % cu_filename) - self._emitline('File name Line number Starting address') + self._emitline('File name Line number Starting address Stmt') # Print each state's file, line and address information. For some # instructions other output is needed to be compatible with @@ -1004,23 +1154,19 @@ def _dump_debug_line_programs(self): elif entry.command == DW_LNE_define_file: self._emitline('%s:' % ( bytes2str(lineprogram['include_directory'][entry.args[0].dir_index]))) - elif not state.end_sequence: - # readelf doesn't print the state after end_sequence - # instructions. I think it's a bug but to be compatible - # I don't print them too. - if lineprogram['version'] < 4: - self._emitline('%-35s %11d %18s' % ( - bytes2str(lineprogram['file_entry'][state.file - 1].name), - state.line, - '0' if state.address == 0 else - self._format_hex(state.address))) - else: - self._emitline('%-35s %11d %18s[%d]' % ( - bytes2str(lineprogram['file_entry'][state.file - 1].name), - state.line, - '0' if state.address == 0 else - self._format_hex(state.address), - state.op_index)) + elif lineprogram['version'] < 4 or self.elffile['e_machine'] == 'EM_PPC64': + self._emitline('%-35s %11s %18s %s' % ( + bytes2str(lineprogram['file_entry'][state.file - 1].name), + state.line if not state.end_sequence else '-', + '0' if state.address == 0 else self._format_hex(state.address), + 'x' if state.is_stmt and not state.end_sequence else '')) + else: + self._emitline('%-35s %11d %18s[%d] %s' % ( + bytes2str(lineprogram['file_entry'][state.file - 1].name), + state.line if not state.end_sequence else '-', + '0' if state.address == 0 else self._format_hex(state.address), + state.op_index, + 'x' if state.is_stmt and not state.end_sequence else '')) if entry.command == DW_LNS_copy: # Another readelf oddity... self._emitline() @@ -1090,6 +1236,40 @@ def _dump_debug_frames(self): self._dwarfinfo.debug_frame_sec, self._dwarfinfo.CFI_entries()) + def _dump_debug_namelut(self, what): + """ + Dump the debug pubnames section. + """ + if what == 'pubnames': + namelut = self._dwarfinfo.get_pubnames() + section = self._dwarfinfo.debug_pubnames_sec + else: + namelut = self._dwarfinfo.get_pubtypes() + section = self._dwarfinfo.debug_pubtypes_sec + + # readelf prints nothing if the section is not present. + if namelut is None or len(namelut) == 0: + return + + self._emitline('Contents of the %s section:' % section.name) + self._emitline() + + cu_headers = namelut.get_cu_headers() + + # go over CU-by-CU first and item-by-item next. + for (cu_hdr, (cu_ofs, items)) in izip(cu_headers, itertools.groupby( + namelut.items(), key = lambda x: x[1].cu_ofs)): + + self._emitline(' Length: %d' % cu_hdr.unit_length) + self._emitline(' Version: %d' % cu_hdr.version) + self._emitline(' Offset into .debug_info section: 0x%x' % cu_hdr.debug_info_offset) + self._emitline(' Size of area in .debug_info section: %d' % cu_hdr.debug_info_length) + self._emitline() + self._emitline(' Offset Name') + for item in items: + self._emitline(' %x %s' % (item[1].die_ofs - cu_ofs, item[0])) + self._emitline() + def _dump_debug_aranges(self): """ Dump the aranges table """ @@ -1236,6 +1416,70 @@ def _dump_debug_frames_interp(self): self._dwarfinfo.debug_frame_sec, self._dwarfinfo.CFI_entries()) + def _dump_debug_locations(self): + """ Dump the location lists from .debug_location section + """ + def _get_cu_base(cu): + top_die = cu.get_top_DIE() + attr = top_die.attributes + if 'DW_AT_low_pc' in attr: + return attr['DW_AT_low_pc'].value + elif 'DW_AT_entry_pc' in attr: + return attr['DW_AT_entry_pc'].value + else: + raise ValueError("Can't find the base IP (low_pc) for a CU") + + di = self._dwarfinfo + loc_lists = di.location_lists() + if not loc_lists: # No locations section - readelf outputs nothing + return + + loc_lists = list(loc_lists.iter_location_lists()) + if len(loc_lists) == 0: + # Present but empty locations section - readelf outputs a message + self._emitline("\nSection '%s' has no debugging data." % di.debug_loc_sec.name) + return + + # To dump a location list, one needs to know the CU. + # Scroll through DIEs once, list the known location list offsets + cu_map = dict() # Loc list offset => CU + for cu in di.iter_CUs(): + for die in cu.iter_DIEs(): + for key in die.attributes: + attr = die.attributes[key] + if (LocationParser.attribute_has_location(attr, cu['version']) and + not LocationParser._attribute_has_loc_expr(attr, cu['version'])): + cu_map[attr.value] = cu + + addr_size = di.config.default_address_size # In bytes, 4 or 8 + addr_width = addr_size * 2 # In hex digits, 8 or 16 + line_template = " %%08x %%0%dx %%0%dx %%s%%s" % (addr_width, addr_width) + + self._emitline('Contents of the %s section:\n' % di.debug_loc_sec.name) + self._emitline(' Offset Begin End Expression') + for loc_list in loc_lists: + cu = cu_map.get(loc_list[0].entry_offset, False) + if not cu: + raise ValueError("Location list can't be tracked to a CU") + base_ip = _get_cu_base(cu) + for entry in loc_list: + # TODO: support BaseAddressEntry lines + expr = describe_DWARF_expr(entry.loc_expr, cu.structs, cu.cu_offset) + postfix = ' (start == end)' if entry.begin_offset == entry.end_offset else '' + self._emitline(line_template % ( + entry.entry_offset, + base_ip + entry.begin_offset, + base_ip + entry.end_offset, + expr, + postfix)) + # Pyelftools doesn't store the terminating entry, + # but readelf emits its offset, so this should too. + last = loc_list[-1] + last_len = 2*addr_size + if isinstance(last, LocationEntry): + last_len += 2 + len(last.loc_expr) + self._emitline(" %08x " % (last.entry_offset + last_len)) + def _display_arch_specific_arm(self): """ Display the ARM architecture-specific info contained in the file. """ @@ -1307,6 +1551,9 @@ def main(stream=None): argparser.add_argument('-r', '--relocs', action='store_true', dest='show_relocs', help='Display the relocations (if present)') + argparser.add_argument('-au', '--arm-unwind', + action='store_true', dest='show_arm_unwind', + help='Display the armeabi unwind information (if present)') argparser.add_argument('-x', '--hex-dump', action='store', dest='show_hex_dump', metavar='', help='Dump the contents of section as bytes') @@ -1323,7 +1570,11 @@ def main(stream=None): action='store', dest='debug_dump_what', metavar='', help=( 'Display the contents of DWARF debug sections. can ' + - 'one of {info,decodedline,frames,frames-interp}')) + 'one of {info,decodedline,frames,frames-interp,aranges,pubtypes,pubnames,loc}')) + argparser.add_argument('--traceback', + action='store_true', dest='show_traceback', + help='Dump the Python traceback on ELFError' + ' exceptions from elftools') args = argparser.parse_args() @@ -1357,6 +1608,8 @@ def main(stream=None): readelf.display_notes() if args.show_relocs: readelf.display_relocations() + if args.show_arm_unwind: + readelf.display_arm_unwind() if args.show_version_info: readelf.display_version_info() if args.show_arch_specific: @@ -1368,7 +1621,10 @@ def main(stream=None): if args.debug_dump_what: readelf.display_debug_dump(args.debug_dump_what) except ELFError as ex: + sys.stdout.flush() sys.stderr.write('ELF error: %s\n' % ex) + if args.show_traceback: + traceback.print_exc() sys.exit(1) diff --git a/setup.py b/setup.py index 4fc0fa65..90cfc226 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ description='Library for analyzing ELF files and DWARF debugging information', long_description=description, license='Public domain', - version='0.24+s2e', + version='0.28+s2e', author='Eli Bendersky', maintainer='Eli Bendersky', author_email='eliben@gmail.com', @@ -41,6 +41,7 @@ 'elftools.elf', 'elftools.common', 'elftools.dwarf', + 'elftools.ehabi', 'elftools.construct', 'elftools.construct.lib', ], diff --git a/test/all_tests.py b/test/all_tests.py index 4cb8e3ca..289a0fd6 100755 --- a/test/all_tests.py +++ b/test/all_tests.py @@ -11,8 +11,8 @@ import subprocess, sys from utils import is_in_rootdir -def run_test_script(path): - cmd = [sys.executable, path] +def run_test_script(path, *args): + cmd = [sys.executable, path] + list(args) print("Running '%s'" % ' '.join(cmd)) subprocess.check_call(cmd) @@ -22,7 +22,7 @@ def main(): return 1 run_test_script('test/run_all_unittests.py') run_test_script('test/run_examples_test.py') - run_test_script('test/run_readelf_tests.py') + run_test_script('test/run_readelf_tests.py', '--parallel') if __name__ == '__main__': sys.exit(main()) diff --git a/test/external_tools/README.txt b/test/external_tools/README.txt index 3380ccee..28f031fb 100644 --- a/test/external_tools/README.txt +++ b/test/external_tools/README.txt @@ -1,4 +1,8 @@ Some utilities that use libelf to create synthetic ELF files -Also, readelf picked up from a built binutils. Run it with --version to version -details. The binary is built on a 64-bit Ubuntu machine. +readelf is built as follows: + +* From binutils Git: https://sourceware.org/git/binutils-gdb.git +* Branch binutils-2_37-branch (commit cd7ce9337f2eda1ef57abbb28e207d2458e6fcda) +* configure, then make from the binutils/ directory +* Built on a 64-bit Ubuntu machine diff --git a/test/external_tools/readelf b/test/external_tools/readelf index c4679726..95494d52 100755 Binary files a/test/external_tools/readelf and b/test/external_tools/readelf differ diff --git a/test/run_all_unittests.py b/test/run_all_unittests.py index 0e00a4d5..5f226dd2 100755 --- a/test/run_all_unittests.py +++ b/test/run_all_unittests.py @@ -13,7 +13,7 @@ import unittest # Make it possible to run this file from the root dir of pyelftools without -# installing pyelftools; useful for Travis testing, etc. +# installing pyelftools; useful for CI testing, etc. sys.path[0:0] = ['.'] diff --git a/test/run_examples_test.py b/test/run_examples_test.py index 349f13b7..c5268f34 100755 --- a/test/run_examples_test.py +++ b/test/run_examples_test.py @@ -12,7 +12,7 @@ from utils import run_exe, is_in_rootdir, dump_output_to_temp_files # Make it possible to run this file from the root dir of pyelftools without -# installing pyelftools; useful for Travis testing, etc. +# installing pyelftools; useful for CI testing, etc. sys.path[0:0] = ['.'] # Create a global logger object @@ -63,7 +63,7 @@ def run_example_and_compare(example_path): return True else: testlog.info('.......FAIL comparison') - dump_output_to_temp_files(testlog, example_out) + dump_output_to_temp_files(testlog, example_out, ref_str) return False diff --git a/test/run_readelf_tests.py b/test/run_readelf_tests.py index 7f4631b6..59a039c5 100755 --- a/test/run_readelf_tests.py +++ b/test/run_readelf_tests.py @@ -20,7 +20,7 @@ from utils import run_exe, is_in_rootdir, dump_output_to_temp_files # Make it possible to run this file from the root dir of pyelftools without -# installing pyelftools; useful for Travis testing, etc. +# installing pyelftools; useful for CI testing, etc. sys.path[0:0] = ['.'] # Create a global logger object @@ -48,17 +48,27 @@ def discover_testfiles(rootdir): yield os.path.join(rootdir, filename) -def run_test_on_file(filename, verbose=False): +def run_test_on_file(filename, verbose=False, opt=None): """ Runs a test on the given input filename. Return True if all test runs succeeded. + If opt is specified, rather that going over the whole + set of supported readelf options, the test will only + run for one option. """ success = True testlog.info("Test file '%s'" % filename) - for option in [ + if opt is None: + options = [ '-e', '-d', '-s', '-n', '-r', '-x.text', '-p.shstrtab', '-V', '--debug-dump=info', '--debug-dump=decodedline', '--debug-dump=frames', '--debug-dump=frames-interp', - '--debug-dump=aranges']: + '--debug-dump=aranges', '--debug-dump=pubtypes', + '--debug-dump=pubnames', '--debug-dump=loc' + ] + else: + options = [opt] + + for option in options: if verbose: testlog.info("..option='%s'" % option) # TODO(zlobober): this is a dirty hack to make tests work for ELF core @@ -81,7 +91,7 @@ def run_test_on_file(filename, verbose=False): rc, stdout = run_exe(exe_path, args) if verbose: testlog.info("....elapsed: %s" % (time.time() - t1,)) if rc != 0: - testlog.error("@@ aborting - '%s' returned '%s'" % (exe_path, rc)) + testlog.error("@@ aborting - '%s %s' returned '%s'" % (exe_path, option, rc)) return False stdouts.append(stdout) if verbose: testlog.info('....comparing output...') @@ -120,15 +130,19 @@ def prepare_lines(s): lines1 = prepare_lines(s1) lines2 = prepare_lines(s2) - flag_after_symtable = False + flag_in_debug_line_section = False if len(lines1) != len(lines2): return False, 'Number of lines different: %s vs %s' % ( len(lines1), len(lines2)) for i in range(len(lines1)): - if 'symbol table' in lines1[i]: - flag_after_symtable = True + if lines1[i].endswith('debug_line section:'): + # .debug_line or .zdebug_line + flag_in_debug_line_section = True + + # readelf spelling error for GNU property notes + lines1[i] = lines1[i].replace('procesor-specific type', 'processor-specific type') # Compare ignoring whitespace lines1_parts = lines1[i].split() @@ -149,12 +163,32 @@ def prepare_lines(s): sm = SequenceMatcher() sm.set_seqs(lines1[i], lines2[i]) changes = sm.get_opcodes() - if flag_after_symtable: - # Detect readelf's adding @ with lib and version after - # symbol name. - if ( len(changes) == 2 and changes[1][0] == 'delete' and - lines1[i][changes[1][1]] == '@'): + if flag_in_debug_line_section: + # readelf outputs an additional "View" column: ignore it + if len(lines1_parts) >= 2 and lines1_parts[-2] == 'view': ok = True + else: + # Fast check special-cased for the only ELF we have which + # has this information (dwarf_gnuops4.so.elf) + ok = ( lines1_parts[-2:] == ['1', 'x'] + and lines2_parts[-1] == 'x') + elif '[...]' in lines1[i]: + # Special case truncations with ellipsis like these: + # .note.gnu.bu[...] redelf + # .note.gnu.build-i pyelftools + # Or more complex for symbols with versions, like these: + # _unw[...]@gcc_3.0 readelf + # _unwind_resume@gcc_3.0 pyelftools + for p1, p2 in zip(lines1_parts, lines2_parts): + dots_start = p1.find('[...]') + if dots_start != -1: + break + ok = p1.endswith('[...]') and p1[:dots_start] == p2[:dots_start] + if not ok: + dots_end = dots_start + 5 + if len(p1) > dots_end and p1[dots_end] == '@': + ok = ( p1[:dots_start] == p2[:dots_start] + and p1[p1.rfind('@'):] == p2[p2.rfind('@'):]) elif 'at_const_value' in lines1[i]: # On 32-bit machines, readelf doesn't correctly represent # some boundary LEB128 numbers @@ -165,11 +199,11 @@ def prepare_lines(s): elif 'os/abi' in lines1[i]: if 'unix - gnu' in lines1[i] and 'unix - linux' in lines2[i]: ok = True - elif ( 'unknown at value' in lines1[i] and - 'dw_at_apple' in lines2[i]): - ok = True + elif len(lines1_parts) == 3 and lines1_parts[2] == 'nt_gnu_property_type_0': + # readelf does not seem to print a readable description for this + ok = lines1_parts == lines2_parts[:3] else: - for s in ('t (tls)', 'l (large)'): + for s in ('t (tls)', 'l (large)', 'd (mbind)'): if s in lines1[i] or s in lines2[i]: ok = True break @@ -199,6 +233,9 @@ def main(): '-k', '--keep-going', action='store_true', dest='keep_going', help="Run all tests, don't stop at the first failure") + argparser.add_argument('--opt', + action='store', dest='opt', metavar='', + help= 'Limit the test one one readelf option.') args = argparser.parse_args() if args.parallel: @@ -220,14 +257,12 @@ def main(): if len(filenames) > 1 and args.parallel: pool = Pool() - results = pool.map( - run_test_on_file, - filenames) + results = pool.map(run_test_on_file, filenames) failures = results.count(False) else: failures = 0 for filename in filenames: - if not run_test_on_file(filename, verbose=args.verbose): + if not run_test_on_file(filename, args.verbose, args.opt): failures += 1 if not args.keep_going: break diff --git a/test/test_arm_call_reloc.py b/test/test_arm_call_reloc.py new file mode 100644 index 00000000..e23f25cf --- /dev/null +++ b/test/test_arm_call_reloc.py @@ -0,0 +1,45 @@ +#------------------------------------------------------------------------------- +# elftools tests +# +# Test 'R_ARM_CALL' relocation type support. +# Compare the '.text' section data of ELF file that was relocated by elftools +# with an ELF file that was relocated by linker. +# +# Dmitry Koltunov (koltunov@ispras.ru) +# This code is in the public domain +#------------------------------------------------------------------------------- +import os +import sys +import unittest + +from elftools.common.py3compat import BytesIO +from elftools.elf.elffile import ELFFile +from elftools.elf.relocation import RelocationHandler + + +def do_relocation(rel_elf): + data = rel_elf.get_section_by_name('.text').data() + rh = RelocationHandler(rel_elf) + + stream = BytesIO() + stream.write(data) + + rel = rel_elf.get_section_by_name('.rel.text') + rh.apply_section_relocations(stream, rel) + return stream.getvalue() + + +class TestARMRElocation(unittest.TestCase): + def test_reloc(self): + test_dir = os.path.join('test', 'testfiles_for_unittests') + with open(os.path.join(test_dir, 'arm_reloc_unrelocated.o'), 'rb') as rel_f, \ + open(os.path.join(test_dir, 'arm_reloc_relocated.elf'), 'rb') as f: + rel_elf = ELFFile(rel_f) + elf = ELFFile(f) + + # Comparison of '.text' section data + self.assertEqual(do_relocation(rel_elf), + elf.get_section_by_name('.text').data()) + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_callframe.py b/test/test_callframe.py index 5be2717f..fc434f9b 100644 --- a/test/test_callframe.py +++ b/test/test_callframe.py @@ -9,10 +9,13 @@ from elftools.common.py3compat import BytesIO from elftools.dwarf.callframe import ( CallFrameInfo, CIE, FDE, instruction_name, CallFrameInstruction, - RegisterRule) + RegisterRule, DecodedCallFrameTable, CFARule) from elftools.dwarf.structs import DWARFStructs from elftools.dwarf.descriptions import (describe_CFI_instructions, set_global_machine_arch) +from elftools.dwarf.enums import DW_EH_encoding_flags +from elftools.elf.elffile import ELFFile +from os.path import join class TestCallFrame(unittest.TestCase): @@ -83,6 +86,7 @@ def test_spec_sample_d6(self): self.assertEqual(entries[1]['length'], 40) self.assertEqual(entries[1]['CIE_pointer'], 0) self.assertEqual(entries[1]['address_range'], 84) + self.assertIsNone(entries[1].lsda_pointer) self.assertIs(entries[1].cie, entries[0]) self.assertEqual(len(entries[1].instructions), 21) self.assertInstruction(entries[1].instructions[0], @@ -145,6 +149,75 @@ def test_describe_CFI_instructions(self): ( ' DW_CFA_def_cfa: r7 (edi) ofs 2\n' + ' DW_CFA_expression: r2 (edx) (DW_OP_addr: 201; DW_OP_deref; DW_OP_deref)\n')) + def test_CFIEntry_get_decoded(self): + oracle_decoded = DecodedCallFrameTable( + table = [ + {'pc': 0, 'cfa': CFARule(reg = 29, offset = 0, expr = None)} + ], + reg_order = [] + ) + + test_dir = join('test', 'testfiles_for_unittests') + with open(join(test_dir, 'simple_mipsel.elf'), 'rb') as f: + elf = ELFFile(f) + di = elf.get_dwarf_info() + entries = di.CFI_entries() + decoded = entries[0].get_decoded() + self.assertEqual(oracle_decoded.table[0]['cfa'].reg, + decoded.table[0]['cfa'].reg + ) + self.assertEqual(oracle_decoded.table[0]['cfa'].offset, + decoded.table[0]['cfa'].offset) + + def test_ehframe_fde_with_lsda_pointer(self): + # CIE and FDE dumped from exceptions_0, offset 0xcc0 + # binary is at https://github.com/angr/binaries/blob/master/tests/x86_64/exceptions_0 + data = (b'' + + # CIE + b'\x1c\x00\x00\x00' + # length + b'\x00\x00\x00\x00' + # ID + b'\x01' + # version + b'\x7a\x50\x4c\x52\x00' + # augmentation string + b'\x01' + # code alignment + b'\x78' + # data alignment + b'\x10' + # return address register + b'\x07' + # augmentation data length + b'\x9b' + # personality function pointer encoding + b'\x3d\x13\x20\x00' + # personality function pointer + b'\x1b' + # LSDA pointer encoding + b'\x1b' + # FDE encoding + b'\x0c\x07\x08\x90' + # initial instructions + b'\x01\x00\x00' + + # FDE + b'\x24\x00\x00\x00' + # length + b'\x24\x00\x00\x00' + # CIE reference pointer + b'\x62\xfd\xff\xff' + # pc begin + b'\x89\x00\x00\x00' + # pc range + b'\x04' + # augmentation data length + b'\xb7\x00\x00\x00' + # LSDA pointer + b'\x41\x0e\x10\x86' + # initial instructions + b'\x02\x43\x0d\x06' + + b'\x45\x83\x03\x02' + + b'\x7f\x0c\x07\x08' + + b'\x00\x00\x00' + ) + s = BytesIO(data) + + structs = DWARFStructs(little_endian=True, dwarf_format=32, address_size=8) + cfi = CallFrameInfo(s, len(data), 0, structs, for_eh_frame=True) + entries = cfi.get_entries() + + self.assertEqual(len(entries), 2) + self.assertIsInstance(entries[0], CIE) + self.assertIn('LSDA_encoding', entries[0].augmentation_dict) + # check LSDA encoding + lsda_encoding = entries[0].augmentation_dict['LSDA_encoding'] + basic_encoding = lsda_encoding & 0x0f + modifier = lsda_encoding & 0xf0 + self.assertEqual(basic_encoding, DW_EH_encoding_flags['DW_EH_PE_sdata4']) + self.assertEqual(modifier, DW_EH_encoding_flags['DW_EH_PE_pcrel']) + self.assertIsInstance(entries[1], FDE) + self.assertEqual(entries[1].lsda_pointer, 232) if __name__ == '__main__': unittest.main() diff --git a/test/test_core_notes.py b/test/test_core_notes.py deleted file mode 100644 index 41ee6f8d..00000000 --- a/test/test_core_notes.py +++ /dev/null @@ -1,47 +0,0 @@ -#------------------------------------------------------------------------------ -# elftools tests -# -# Maxim Akhmedov (max42@yandex-team.ru) -# This code is in the public domain -#------------------------------------------------------------------------------ -import unittest -import os - -from elftools.elf.elffile import ELFFile -from elftools.elf.segments import NoteSegment - -class TestCoreNotes(unittest.TestCase): - """ This test makes sure than core dump specific - sections are properly analyzed. - """ - - def test_core_prpsinfo(self): - with open(os.path.join('test', - 'testfiles_for_unittests', 'core_linux64.elf'), - 'rb') as f: - elf = ELFFile(f) - for segment in elf.iter_segments(): - if not isinstance(segment, NoteSegment): - continue - notes = list(segment.iter_notes()) - for note in segment.iter_notes(): - if note['n_type'] != 'NT_PRPSINFO': - continue - desc = note['n_desc'] - self.assertEquals(desc['pr_state'], 0) - self.assertEquals(desc['pr_sname'], b'R') - self.assertEquals(desc['pr_zomb'], 0) - self.assertEquals(desc['pr_nice'], 0) - self.assertEquals(desc['pr_flag'], 0x400600) - self.assertEquals(desc['pr_uid'], 1000) - self.assertEquals(desc['pr_gid'], 1000) - self.assertEquals(desc['pr_pid'], 23395) - self.assertEquals(desc['pr_ppid'], 23187) - self.assertEquals(desc['pr_pgrp'], 23395) - self.assertEquals(desc['pr_sid'], 23187) - self.assertEquals( - desc['pr_fname'], - b'coredump_self\x00\x00\x00') - self.assertEquals( - desc['pr_psargs'], - b'./coredump_self foo bar 42 ' + b'\x00' * (80 - 27)) diff --git a/test/test_core_notes32.py b/test/test_core_notes32.py new file mode 100644 index 00000000..d0726e96 --- /dev/null +++ b/test/test_core_notes32.py @@ -0,0 +1,174 @@ +#------------------------------------------------------------------------------ +# elftools tests +# +# Kyle Zeng (zengyhkyle@asu.edu) +# This code is in the public domain +#------------------------------------------------------------------------------ +import unittest +import os + +from elftools.elf.elffile import ELFFile +from elftools.elf.segments import NoteSegment + + +class TestCoreNotes(unittest.TestCase): + """ This test makes sure than core dump specific + sections are properly analyzed. + """ + @classmethod + def setUpClass(cls): + cls._core_file = open(os.path.join('test', + 'testfiles_for_unittests', 'core_linux32.elf'), + 'rb') + + def test_core_prpsinfo(self): + elf = ELFFile(self._core_file) + for segment in elf.iter_segments(): + if not isinstance(segment, NoteSegment): + continue + notes = list(segment.iter_notes()) + for note in segment.iter_notes(): + if note['n_type'] != 'NT_PRPSINFO': + continue + desc = note['n_desc'] + self.assertEqual(desc['pr_state'], 0) + self.assertEqual(desc['pr_sname'], b'R') + self.assertEqual(desc['pr_zomb'], 0) + self.assertEqual(desc['pr_nice'], 0) + self.assertEqual(desc['pr_flag'], 0x400600) + self.assertEqual(desc['pr_uid'], 1000) + self.assertEqual(desc['pr_gid'], 1000) + self.assertEqual(desc['pr_pid'], 11038) + self.assertEqual(desc['pr_ppid'], 10442) + self.assertEqual(desc['pr_pgrp'], 11038) + self.assertEqual(desc['pr_sid'], 10442) + self.assertEqual( + desc['pr_fname'], + b'coredump\x00\x00\x00\x00\x00\x00\x00\x00') + self.assertEqual( + desc['pr_psargs'], + b'./coredump foo bar 42 '.ljust(80, b'\x00')) + + def test_core_nt_file(self): + """ + Test that the parsing of the NT_FILE note within a core file is + correct. + The assertions are made against the output of eu-readelf. + + eu-readelf -n core_linux64.elf + ... + CORE 0x0000018b NT_FILE (mapped files) + Page size: 4096 + Start End Page Offset + 0x56624000 0x56625000 0x00000000 + /tmp/coredump + 0x56625000 0x56626000 0x00000000 + /tmp/coredump + 0x56626000 0x56627000 0x00000001 + /tmp/coredump + 0xf7d13000 0xf7ee8000 0x00000000 + /lib/i386-linux-gnu/libc-2.27.so + 0xf7ee8000 0xf7ee9000 0x000001d5 + /lib/i386-linux-gnu/libc-2.27.so + 0xf7ee9000 0xf7eeb000 0x000001d5 + /lib/i386-linux-gnu/libc-2.27.so + 0xf7eeb000 0xf7eec000 0x000001d7 + /lib/i386-linux-gnu/libc-2.27.so + 0xf7f39000 0xf7f5f000 0x00000000 + /lib/i386-linux-gnu/ld-2.27.so + 0xf7f5f000 0xf7f60000 0x00000025 + /lib/i386-linux-gnu/ld-2.27.so + 0xf7f60000 0xf7f61000 0x00000026 + /lib/i386-linux-gnu/ld-2.27.so + ... + """ + elf = ELFFile(self._core_file) + nt_file_found = False + for segment in elf.iter_segments(): + if not isinstance(segment, NoteSegment): + continue + for note in segment.iter_notes(): + if note['n_type'] != 'NT_FILE': + continue + nt_file_found = True + desc = note['n_desc'] + self.assertEqual(desc['num_map_entries'], 10) + self.assertEqual(desc['page_size'], 4096) + self.assertEqual(len(desc['Elf_Nt_File_Entry']), 10) + self.assertEqual(len(desc['filename']), 10) + + self.validate_nt_file_entry(desc['Elf_Nt_File_Entry'][0], + desc['page_size'], + 0x56624000, 0x56625000, 0x00000000) + self.assertEqual(desc['filename'][0], + b"/tmp/coredump") + + self.validate_nt_file_entry(desc['Elf_Nt_File_Entry'][1], + desc['page_size'], + 0x56625000, 0x56626000, 0x00000000) + self.assertEqual(desc['filename'][1], + b"/tmp/coredump") + + self.validate_nt_file_entry(desc['Elf_Nt_File_Entry'][2], + desc['page_size'], + 0x56626000, 0x56627000, 0x00000001) + self.assertEqual(desc['filename'][2], + b"/tmp/coredump") + + self.validate_nt_file_entry(desc['Elf_Nt_File_Entry'][3], + desc['page_size'], + 0xf7d13000, 0xf7ee8000, 0x00000000) + self.assertEqual(desc['filename'][3], + b"/lib/i386-linux-gnu/libc-2.27.so") + + self.validate_nt_file_entry(desc['Elf_Nt_File_Entry'][4], + desc['page_size'], + 0xf7ee8000, 0xf7ee9000, 0x000001d5) + self.assertEqual(desc['filename'][4], + b"/lib/i386-linux-gnu/libc-2.27.so") + + self.validate_nt_file_entry(desc['Elf_Nt_File_Entry'][5], + desc['page_size'], + 0xf7ee9000, 0xf7eeb000, 0x000001d5) + self.assertEqual(desc['filename'][5], + b"/lib/i386-linux-gnu/libc-2.27.so") + + self.validate_nt_file_entry(desc['Elf_Nt_File_Entry'][6], + desc['page_size'], + 0xf7eeb000, 0xf7eec000, 0x000001d7) + self.assertEqual(desc['filename'][6], + b"/lib/i386-linux-gnu/libc-2.27.so") + + self.validate_nt_file_entry(desc['Elf_Nt_File_Entry'][7], + desc['page_size'], + 0xf7f39000, 0xf7f5f000, 0x00000000) + self.assertEqual(desc['filename'][7], + b"/lib/i386-linux-gnu/ld-2.27.so") + + self.validate_nt_file_entry(desc['Elf_Nt_File_Entry'][8], + desc['page_size'], + 0xf7f5f000, 0xf7f60000, 0x00000025) + self.assertEqual(desc['filename'][8], + b"/lib/i386-linux-gnu/ld-2.27.so") + + self.validate_nt_file_entry(desc['Elf_Nt_File_Entry'][9], + desc['page_size'], + 0xf7f60000, 0xf7f61000, 0x00000026) + self.assertEqual(desc['filename'][9], + b"/lib/i386-linux-gnu/ld-2.27.so") + + self.assertTrue(nt_file_found) + + def validate_nt_file_entry(self, + entry, + page_size, + expected_vm_start, + expected_vm_end, + expected_page_offset): + self.assertEqual(entry.vm_start, expected_vm_start) + self.assertEqual(entry.vm_end, expected_vm_end) + self.assertEqual(entry.page_offset, expected_page_offset) + + @classmethod + def tearDownClass(cls): + cls._core_file.close() diff --git a/test/test_core_notes32_mips.py b/test/test_core_notes32_mips.py new file mode 100644 index 00000000..8c910b1d --- /dev/null +++ b/test/test_core_notes32_mips.py @@ -0,0 +1,60 @@ +#------------------------------------------------------------------------------ +# elftools tests +# +# Lukas Dresel (lukas.dresel@cs.ucsb.edu) +# This code is in the public domain +#------------------------------------------------------------------------------ +import unittest +import os + +from elftools.elf.elffile import ELFFile +from elftools.elf.segments import NoteSegment + + +class TestCoreNotes(unittest.TestCase): + """ This test ensures that core dump specific notes + are properly analyzed. Specifically, tests for a + regression where MIPS PRPSINFO structures would be + parsed incorrectly due to being treated as 16-bit + fields instead of 32-bit fields. + """ + @classmethod + def setUpClass(cls): + cls._core_file = open(os.path.join('test', + 'testfiles_for_unittests', 'core_linux32_qemu_mips.elf'), + 'rb') + + def test_core_prpsinfo(self): + elf = ELFFile(self._core_file) + for segment in elf.iter_segments(): + if not isinstance(segment, NoteSegment): + continue + for note in segment.iter_notes(): + if note['n_type'] != 'NT_PRPSINFO': + continue + desc = note['n_desc'] + self.assertEqual(desc['pr_state'], 0) + self.assertEqual(desc['pr_sname'], b'\0') + self.assertEqual(desc['pr_zomb'], 0) + self.assertEqual(desc['pr_nice'], 0) + self.assertEqual(desc['pr_flag'], 0x0) + self.assertEqual(desc['pr_uid'], 1000) + self.assertEqual(desc['pr_gid'], 1000) + self.assertEqual(desc['pr_pid'], 449015) + self.assertEqual(desc['pr_ppid'], 4238) + self.assertEqual(desc['pr_pgrp'], 449015) + self.assertEqual(desc['pr_sid'], 4238) + self.assertEqual( + desc['pr_fname'], + b'crash'.ljust(16, b'\0')) + self.assertEqual( + desc['pr_psargs'], + b'./crash '.ljust(80, b'\x00')) + + @classmethod + def tearDownClass(cls): + cls._core_file.close() + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_core_notes64.py b/test/test_core_notes64.py new file mode 100644 index 00000000..4caef862 --- /dev/null +++ b/test/test_core_notes64.py @@ -0,0 +1,192 @@ +#------------------------------------------------------------------------------ +# elftools tests +# +# Maxim Akhmedov (max42@yandex-team.ru) +# This code is in the public domain +#------------------------------------------------------------------------------ +import unittest +import os + +from elftools.elf.elffile import ELFFile +from elftools.elf.segments import NoteSegment + + +class TestCoreNotes(unittest.TestCase): + """ This test makes sure than core dump specific + sections are properly analyzed. + """ + @classmethod + def setUpClass(cls): + cls._core_file = open(os.path.join('test', + 'testfiles_for_unittests', 'core_linux64.elf'), + 'rb') + + def test_core_prpsinfo(self): + elf = ELFFile(self._core_file) + for segment in elf.iter_segments(): + if not isinstance(segment, NoteSegment): + continue + notes = list(segment.iter_notes()) + for note in segment.iter_notes(): + if note['n_type'] != 'NT_PRPSINFO': + continue + desc = note['n_desc'] + self.assertEqual(desc['pr_state'], 0) + self.assertEqual(desc['pr_sname'], b'R') + self.assertEqual(desc['pr_zomb'], 0) + self.assertEqual(desc['pr_nice'], 0) + self.assertEqual(desc['pr_flag'], 0x400600) + self.assertEqual(desc['pr_uid'], 1000) + self.assertEqual(desc['pr_gid'], 1000) + self.assertEqual(desc['pr_pid'], 23395) + self.assertEqual(desc['pr_ppid'], 23187) + self.assertEqual(desc['pr_pgrp'], 23395) + self.assertEqual(desc['pr_sid'], 23187) + self.assertEqual( + desc['pr_fname'], + b'coredump_self\x00\x00\x00') + self.assertEqual( + desc['pr_psargs'], + b'./coredump_self foo bar 42 ' + b'\x00' * (80 - 27)) + + def test_core_nt_file(self): + """ + Test that the parsing of the NT_FILE note within a core file is + correct. + The assertions are made against the output of eu-readelf. + + eu-readelf -n core_linux64.elf + ... + CORE 621 FILE + 10 files: + 00400000-00401000 00000000 4096 + /home/max42/pyelftools/test/coredump_self + 00600000-00601000 00000000 4096 + /home/max42/pyelftools/test/coredump_self + 00601000-00602000 00001000 4096 + /home/max42/pyelftools/test/coredump_self + 7fa4593ae000-7fa45956d000 00000000 1830912 + /lib/x86_64-linux-gnu/libc-2.23.so + 7fa45956d000-7fa45976d000 001bf000 2097152 + /lib/x86_64-linux-gnu/libc-2.23.so + 7fa45976d000-7fa459771000 001bf000 16384 + /lib/x86_64-linux-gnu/libc-2.23.so + 7fa459771000-7fa459773000 001c3000 8192 + /lib/x86_64-linux-gnu/libc-2.23.so + 7fa459777000-7fa45979d000 00000000 155648 + /lib/x86_64-linux-gnu/ld-2.23.so + 7fa45999c000-7fa45999d000 00025000 4096 + /lib/x86_64-linux-gnu/ld-2.23.so + 7fa45999d000-7fa45999e000 00026000 4096 + /lib/x86_64-linux-gnu/ld-2.23.so + ... + """ + elf = ELFFile(self._core_file) + nt_file_found = False + for segment in elf.iter_segments(): + if not isinstance(segment, NoteSegment): + continue + for note in segment.iter_notes(): + if note['n_type'] != 'NT_FILE': + continue + nt_file_found = True + desc = note['n_desc'] + self.assertEqual(desc['num_map_entries'], 10) + self.assertEqual(desc['page_size'], 4096) + self.assertEqual(len(desc['Elf_Nt_File_Entry']), 10) + self.assertEqual(len(desc['filename']), 10) + + self.validate_nt_file_entry(desc['Elf_Nt_File_Entry'][0], + desc['page_size'], + 0x00400000, + 0x00401000, + 0x00000000) + self.assertEqual(desc['filename'][0], + b"/home/max42/pyelftools/test/coredump_self") + + self.validate_nt_file_entry(desc['Elf_Nt_File_Entry'][1], + desc['page_size'], + 0x00600000, + 0x00601000, + 0x00000000) + self.assertEqual(desc['filename'][1], + b"/home/max42/pyelftools/test/coredump_self") + + self.validate_nt_file_entry(desc['Elf_Nt_File_Entry'][2], + desc['page_size'], + 0x00601000, + 0x00602000, + 0x00001000) + self.assertEqual(desc['filename'][2], + b"/home/max42/pyelftools/test/coredump_self") + + self.validate_nt_file_entry(desc['Elf_Nt_File_Entry'][3], + desc['page_size'], + 0x7fa4593ae000, + 0x7fa45956d000, + 0x00000000) + self.assertEqual(desc['filename'][3], + b"/lib/x86_64-linux-gnu/libc-2.23.so") + + self.validate_nt_file_entry(desc['Elf_Nt_File_Entry'][4], + desc['page_size'], + 0x7fa45956d000, + 0x7fa45976d000, + 0x001bf000) + self.assertEqual(desc['filename'][4], + b"/lib/x86_64-linux-gnu/libc-2.23.so") + + self.validate_nt_file_entry(desc['Elf_Nt_File_Entry'][5], + desc['page_size'], + 0x7fa45976d000, + 0x7fa459771000, + 0x001bf000) + self.assertEqual(desc['filename'][5], + b"/lib/x86_64-linux-gnu/libc-2.23.so") + + self.validate_nt_file_entry(desc['Elf_Nt_File_Entry'][6], + desc['page_size'], + 0x7fa459771000, + 0x7fa459773000, + 0x001c3000) + self.assertEqual(desc['filename'][6], + b"/lib/x86_64-linux-gnu/libc-2.23.so") + + self.validate_nt_file_entry(desc['Elf_Nt_File_Entry'][7], + desc['page_size'], + 0x7fa459777000, + 0x7fa45979d000, + 0x00000000) + self.assertEqual(desc['filename'][7], + b"/lib/x86_64-linux-gnu/ld-2.23.so") + + self.validate_nt_file_entry(desc['Elf_Nt_File_Entry'][8], + desc['page_size'], + 0x7fa45999c000, + 0x7fa45999d000, + 0x00025000) + self.assertEqual(desc['filename'][8], + b"/lib/x86_64-linux-gnu/ld-2.23.so") + + self.validate_nt_file_entry(desc['Elf_Nt_File_Entry'][9], + desc['page_size'], + 0x7fa45999d000, + 0x7fa45999e000, + 0x00026000) + self.assertEqual(desc['filename'][9], + b"/lib/x86_64-linux-gnu/ld-2.23.so") + self.assertTrue(nt_file_found) + + def validate_nt_file_entry(self, + entry, + page_size, + expected_vm_start, + expected_vm_end, + expected_page_offset): + self.assertEqual(entry.vm_start, expected_vm_start) + self.assertEqual(entry.vm_end, expected_vm_end) + self.assertEqual(entry.page_offset * page_size, expected_page_offset) + + @classmethod + def tearDownClass(cls): + cls._core_file.close() diff --git a/test/test_dbgfile.py b/test/test_dbgfile.py new file mode 100644 index 00000000..901a19fd --- /dev/null +++ b/test/test_dbgfile.py @@ -0,0 +1,49 @@ +""" +Test that elftools does not fail to load debug symbol ELF files +""" +import unittest +import os + +from elftools.elf.elffile import ELFFile, DynamicSection +from elftools.dwarf.callframe import ZERO + +class TestDBGFile(unittest.TestCase): + def test_dynamic_segment(self): + """ Test that the degenerate case for the dynamic segment does not crash + """ + with open(os.path.join('test', 'testfiles_for_unittests', + 'debug_info.elf'), 'rb') as f: + elf = ELFFile(f) + + seen_dynamic_segment = False + for segment in elf.iter_segments(): + if segment.header.p_type == 'PT_DYNAMIC': + self.assertEqual(segment.num_tags(), 0, "The dynamic segment in this file should be empty") + seen_dynamic_segment = True + break + + self.assertTrue(seen_dynamic_segment, "There should be a dynamic segment in this file") + + def test_dynamic_section(self): + """ Test that the degenerate case for the dynamic section does not crash + """ + with open(os.path.join('test', 'testfiles_for_unittests', + 'debug_info.elf'), 'rb') as f: + elf = ELFFile(f) + section = DynamicSection(elf.get_section_by_name('.dynamic').header, '.dynamic', elf) + + self.assertEqual(section.num_tags(), 0, "The dynamic section in this file should be empty") + + def test_eh_frame(self): + """ Test that parsing .eh_frame with SHT_NOBITS does not crash + """ + with open(os.path.join('test', 'testfiles_for_unittests', + 'debug_info.elf'), 'rb') as f: + elf = ELFFile(f) + dwarf = elf.get_dwarf_info() + eh_frame = list(dwarf.EH_CFI_entries()) + self.assertEqual(len(eh_frame), 1, "There should only be the ZERO entry in eh_frame") + self.assertIs(type(eh_frame[0]), ZERO, "The only eh_frame entry should be the terminator") + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_die_size.py b/test/test_die_size.py new file mode 100644 index 00000000..20fcab8e --- /dev/null +++ b/test/test_die_size.py @@ -0,0 +1,32 @@ +#------------------------------------------------------------------------------ +# elftools tests +# +# Anders Dellien (anders@andersdellien.se) +# This code is in the public domain +#------------------------------------------------------------------------------ +import unittest +import os + +from elftools.elf.elffile import ELFFile + +class TestDieSize(unittest.TestCase): + """ This test verifies that null DIEs are treated correctly - i.e. + removed when we 'unflatten' the linear list and build a tree. + The test file contains a CU with two non-null DIEs (both three bytes big), + where the second one is followed by three null DIEs. + We verify that the null DIEs are discarded and that the length of the second DIE + does not include the null entries that follow it. + """ + def test_die_size(self): + with open(os.path.join('test', + 'testfiles_for_unittests', 'trailing_null_dies.elf'), + 'rb') as f: + elffile = ELFFile(f) + self.assertTrue(elffile.has_dwarf_info()) + dwarfinfo = elffile.get_dwarf_info() + for CU in dwarfinfo.iter_CUs(): + for child in CU.get_top_DIE().iter_children(): + self.assertEqual(child.size, 3) + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_dwarf_aranges.py b/test/test_dwarf_aranges.py new file mode 100644 index 00000000..7b7d0cc3 --- /dev/null +++ b/test/test_dwarf_aranges.py @@ -0,0 +1,36 @@ +import os +import unittest + +from elftools.elf.elffile import ELFFile + +address_a = 0x112f; +address_b = 0x1154; + +class TestRangeLists(unittest.TestCase): + def test_arange_absent(self): + with open(os.path.join('test', 'testfiles_for_unittests', 'aranges_absent.elf'), 'rb') as f: + elffile = ELFFile(f) + self.assertTrue(elffile.has_dwarf_info()) + aranges = elffile.get_dwarf_info().get_aranges() + self.assertIsNone(aranges) + + def test_arange_partial(self): + with open(os.path.join('test', 'testfiles_for_unittests', 'aranges_partial.elf'), 'rb') as f: + elffile = ELFFile(f) + self.assertTrue(elffile.has_dwarf_info()) + aranges = elffile.get_dwarf_info().get_aranges() + self.assertIsNotNone(aranges) + self.assertIsNone(aranges.cu_offset_at_addr(address_a)) + self.assertIsNotNone(aranges.cu_offset_at_addr(address_b)) + + def test_arange_complete(self): + with open(os.path.join('test', 'testfiles_for_unittests', 'aranges_complete.elf'), 'rb') as f: + elffile = ELFFile(f) + self.assertTrue(elffile.has_dwarf_info()) + aranges = elffile.get_dwarf_info().get_aranges() + self.assertIsNotNone(aranges) + self.assertIsNotNone(aranges.cu_offset_at_addr(address_a)) + self.assertIsNotNone(aranges.cu_offset_at_addr(address_b)) + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_dwarf_attr_form_flag_present.py b/test/test_dwarf_attr_form_flag_present.py new file mode 100644 index 00000000..9ec9ce5d --- /dev/null +++ b/test/test_dwarf_attr_form_flag_present.py @@ -0,0 +1,25 @@ +#------------------------------------------------------------------------------- +# elftools tests +# +# Eli Bendersky (eliben@gmail.com), Santhosh Kumar Mani (santhoshmani@gmail.com) +# This code is in the public domain +#------------------------------------------------------------------------------- +import os +import unittest + +from elftools.elf.elffile import ELFFile + + +class TestAttrFormFlagPresent(unittest.TestCase): + def test_form_flag_present_value_is_true(self): + with open(os.path.join('test', 'testfiles_for_unittests', + 'lambda.elf'), 'rb') as f: + elffile = ELFFile(f) + self.assertTrue(elffile.has_dwarf_info()) + + dwarf = elffile.get_dwarf_info() + for cu in dwarf.iter_CUs(): + for die in cu.iter_DIEs(): + for _, attr in die.attributes.items(): + if attr.form == "DW_FORM_flag_present": + self.assertTrue(attr.value) diff --git a/test/test_dwarf_constisntloc.py b/test/test_dwarf_constisntloc.py new file mode 100644 index 00000000..e98a0ca7 --- /dev/null +++ b/test/test_dwarf_constisntloc.py @@ -0,0 +1,37 @@ +#------------------------------------------------------------------------------ +# elftools tests +# +# Seva Alekseyev (sevaa@sprynet.com) +# This code is in the public domain +#------------------------------------------------------------------------------ + +import unittest +import os, sys, io + +sys.path.insert(1, os.getcwd()) + +from elftools.elf.elffile import ELFFile +from elftools.dwarf.dwarfinfo import DWARFInfo, DebugSectionDescriptor, DwarfConfig +from elftools.dwarf.locationlists import LocationParser + +class TestConstWithData4IsntLocation(unittest.TestCase): + def _test_file(self, filename): + filepath = os.path.join('test', 'testfiles_for_unittests', filename) + with open(filepath, 'rb') as f: + elffile = ELFFile(f) + dwarfinfo = elffile.get_dwarf_info() + locparser = LocationParser(dwarfinfo.location_lists()) + for CU in dwarfinfo.iter_CUs(): + ver = CU['version'] + for DIE in CU.iter_DIEs(): + for key in DIE.attributes: + attr = DIE.attributes[key] + if LocationParser.attribute_has_location(attr, ver): + # This will crash on unpatched library on DIE at 0x9f + locparser.parse_from_attribute(attr, ver) + + def test_main(self): + self._test_file('pascalenum.o') + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_dwarf_cu_and_die_cache.py b/test/test_dwarf_cu_and_die_cache.py new file mode 100644 index 00000000..bf7f4d78 --- /dev/null +++ b/test/test_dwarf_cu_and_die_cache.py @@ -0,0 +1,58 @@ +#------------------------------------------------------------------------------- +# elftools tests +# +# Eli Bendersky (eliben@gmail.com), Milton Miller +# This code is in the public domain +#------------------------------------------------------------------------------- +import os +import unittest + +from elftools.elf.elffile import ELFFile +from elftools.common.py3compat import bytes2str + +class TestCacheLUTandDIEref(unittest.TestCase): + def dprint(self, list): + if False: + self.oprint(list) + + def oprint(self, list): + if False: + print(list) + + def test_die_from_LUTentry(self): + lines = [''] + with open(os.path.join('test', 'testfiles_for_unittests', + 'lambda.elf'), 'rb') as f: + elffile = ELFFile(f) + self.assertTrue(elffile.has_dwarf_info()) + + dwarf = elffile.get_dwarf_info() + pt = dwarf.get_pubnames() + for (k, v) in pt.items(): + ndie = dwarf.get_DIE_from_lut_entry(v) + self.dprint(ndie) + if not 'DW_AT_type' in ndie.attributes: + continue + if not 'DW_AT_name' in ndie.attributes: + continue + name = bytes2str(ndie.attributes['DW_AT_name'].value) + tlist = [] + tdie = ndie + while True: + tdie = tdie.get_DIE_from_attribute('DW_AT_type') + self.dprint(ndie) + ttag = tdie.tag + if isinstance(ttag, int): + ttag = 'TAG(0x%x)' % ttag + tlist.append(ttag) + if 'DW_AT_name' in tdie.attributes: + break + tlist.append(bytes2str(tdie.attributes['DW_AT_name'].value)) + tname = ' '.join(tlist) + line = "%s DIE at %s is of type %s" % ( + ndie.tag, ndie.offset, tname) + lines.append(line) + self.dprint(line) + + self.oprint('\n'.join(lines)) + self.assertGreater(len(lines), 1) diff --git a/test/test_dwarf_expr.py b/test/test_dwarf_expr.py index 747ee388..308d8728 100644 --- a/test/test_dwarf_expr.py +++ b/test/test_dwarf_expr.py @@ -7,6 +7,7 @@ import unittest from elftools.dwarf.descriptions import ExprDumper, set_global_machine_arch +from elftools.dwarf.dwarf_expr import DWARFExprParser, DWARFExprOp from elftools.dwarf.structs import DWARFStructs @@ -21,52 +22,69 @@ def setUp(self): set_global_machine_arch('x64') def test_basic_single(self): - self.visitor.process_expr([0x1b]) - self.assertEqual(self.visitor.get_str(), + self.assertEqual(self.visitor.dump_expr([0x1b]), 'DW_OP_div') - self.setUp() - self.visitor.process_expr([0x74, 0x82, 0x01]) - self.assertEqual(self.visitor.get_str(), + self.assertEqual(self.visitor.dump_expr([0x74, 0x82, 0x01]), 'DW_OP_breg4 (rsi): 130') - self.setUp() - self.visitor.process_expr([0x91, 0x82, 0x01]) - self.assertEqual(self.visitor.get_str(), + self.assertEqual(self.visitor.dump_expr([0x91, 0x82, 0x01]), 'DW_OP_fbreg: 130') - self.setUp() - self.visitor.process_expr([0x51]) - self.assertEqual(self.visitor.get_str(), + self.assertEqual(self.visitor.dump_expr([0x51]), 'DW_OP_reg1 (rdx)') - self.setUp() - self.visitor.process_expr([0x90, 16]) - self.assertEqual(self.visitor.get_str(), + self.assertEqual(self.visitor.dump_expr([0x90, 16]), 'DW_OP_regx: 16 (rip)') - self.setUp() - self.visitor.process_expr([0x9d, 0x8f, 0x0A, 0x90, 0x01]) - self.assertEqual(self.visitor.get_str(), + self.assertEqual(self.visitor.dump_expr([0x9d, 0x8f, 0x0A, 0x90, 0x01]), 'DW_OP_bit_piece: 1295 144') + self.assertEqual(self.visitor.dump_expr([0x0e, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00]), + 'DW_OP_const8u: 71777214294589695') + def test_basic_sequence(self): - self.visitor.process_expr([0x03, 0x01, 0x02, 0, 0, 0x06, 0x06]) - self.assertEqual(self.visitor.get_str(), + self.assertEqual(self.visitor.dump_expr([0x03, 0x01, 0x02, 0, 0, 0x06, 0x06]), 'DW_OP_addr: 201; DW_OP_deref; DW_OP_deref') - self.setUp() - self.visitor.process_expr([0x15, 0xFF, 0x0b, 0xf1, 0xff]) - self.assertEqual(self.visitor.get_str(), + self.assertEqual(self.visitor.dump_expr([0x15, 0xFF, 0x0b, 0xf1, 0xff]), 'DW_OP_pick: 255; DW_OP_const2s: -15') - self.setUp() - self.visitor.process_expr([0x1d, 0x1e, 0x1d, 0x1e, 0x1d, 0x1e]) - self.assertEqual(self.visitor.get_str(), + self.assertEqual(self.visitor.dump_expr([0x1d, 0x1e, 0x1d, 0x1e, 0x1d, 0x1e]), 'DW_OP_mod; DW_OP_mul; DW_OP_mod; DW_OP_mul; DW_OP_mod; DW_OP_mul') + # 0xe0 maps to both DW_OP_GNU_push_tls_address and DW_OP_lo_user, so + # check for both to prevent non-determinism. + self.assertIn(self.visitor.dump_expr([0x08, 0x0f, 0xe0]), + ('DW_OP_const1u: 15; DW_OP_GNU_push_tls_address', + 'DW_OP_const1u: 15; DW_OP_lo_user')) -if __name__ == '__main__': - unittest.main() +class TestParseExpr(unittest.TestCase): + structs32 = DWARFStructs( + little_endian=True, + dwarf_format=32, + address_size=4) + + def setUp(self): + set_global_machine_arch('x64') + + def test_single(self): + p = DWARFExprParser(self.structs32) + lst = p.parse_expr([0x1b]) + self.assertEqual(lst, [DWARFExprOp(op=0x1B, op_name='DW_OP_div', args=[])]) + + lst = p.parse_expr([0x90, 16]) + self.assertEqual(lst, [DWARFExprOp(op=0x90, op_name='DW_OP_regx', args=[16])]) + lst = p.parse_expr([0xe0]) + self.assertEqual(len(lst), 1) + # 0xe0 maps to both DW_OP_GNU_push_tls_address and DW_OP_lo_user, so + # check for both to prevent non-determinism. + self.assertIn(lst[0], [ + DWARFExprOp(op=0xe0, op_name='DW_OP_GNU_push_tls_address', args=[]), + DWARFExprOp(op=0xe0, op_name='DW_OP_lo_user', args=[])]) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_dwarf_lineprogram.py b/test/test_dwarf_lineprogram.py index 5a5c65aa..2a0a19e0 100644 --- a/test/test_dwarf_lineprogram.py +++ b/test/test_dwarf_lineprogram.py @@ -18,7 +18,7 @@ def _make_program_in_stream(self, stream): """ ds = DWARFStructs(little_endian=True, dwarf_format=32, address_size=4) header = ds.Dwarf_lineprog_header.parse( - b'\x04\x10\x00\x00' + # initial lenght + b'\x04\x10\x00\x00' + # initial length b'\x03\x00' + # version b'\x20\x00\x00\x00' + # header length b'\x01\x01\x01\x0F' + # flags @@ -100,6 +100,27 @@ def test_spec_sample_60(self): self.assertLineState(linetable[7].state, address=0x24b, line=7, end_sequence=False) self.assertLineState(linetable[9].state, address=0x24d, line=7, end_sequence=True) + def test_lne_set_discriminator(self): + """ + Tests the handling of DWARFv4's new DW_LNE_set_discriminator opcode. + """ + s = BytesIO() + s.write( + b'\x00\x02\x04\x05' + # DW_LNE_set_discriminator (discriminator=0x05) + b'\x01' + # DW_LNS_copy + b'\x00\x01\x01' # DW_LNE_end_sequence + ) + + lp = self._make_program_in_stream(s) + linetable = lp.get_entries() + + # We expect two entries, since DW_LNE_set_discriminator does not add + # an entry of its own. + self.assertEqual(len(linetable), 2) + self.assertEqual(linetable[0].command, DW_LNS_copy) + self.assertLineState(linetable[0].state, discriminator=0x05) + self.assertLineState(linetable[1].state, discriminator=0x00, end_sequence=True) + if __name__ == '__main__': unittest.main() diff --git a/test/test_dwarf_locexpr_on_gnucall.py b/test/test_dwarf_locexpr_on_gnucall.py new file mode 100644 index 00000000..29d57a79 --- /dev/null +++ b/test/test_dwarf_locexpr_on_gnucall.py @@ -0,0 +1,36 @@ +#------------------------------------------------------------------------------ +# elftools tests +# +# Seva Alekseyev (sevaa@sprynet.com) +# This code is in the public domain +#------------------------------------------------------------------------------ + +import unittest +import os, sys, io + +# sys.path.insert(1, os.getcwd()) + +from elftools.elf.elffile import ELFFile +from elftools.dwarf.dwarfinfo import DWARFInfo, DebugSectionDescriptor, DwarfConfig +from elftools.dwarf.locationlists import LocationParser + +class TestGNUCallAttributesHaveLocation(unittest.TestCase): + def _test_file(self, filename): + filepath = os.path.join('test', 'testfiles_for_unittests', filename) + with open(filepath, 'rb') as f: + elffile = ELFFile(f) + dwarfinfo = elffile.get_dwarf_info() + for CU in dwarfinfo.iter_CUs(): + ver = CU['version'] + for DIE in CU.iter_DIEs(): + for key in DIE.attributes: + attr = DIE.attributes[key] + if attr.form == 'DW_FORM_exprloc': + self.assertTrue(LocationParser.attribute_has_location(attr, CU['version']), "Attribute %s not recognized as a location" % key) + + + def test_main(self): + self._test_file('dwarf_gnuops1.o') + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_dwarf_v5.py b/test/test_dwarf_v5.py new file mode 100644 index 00000000..0468d076 --- /dev/null +++ b/test/test_dwarf_v5.py @@ -0,0 +1,21 @@ +import os +import unittest + +from elftools.elf.elffile import ELFFile + + +class TestDWARFv5(unittest.TestCase): + def test_dwarfv5_parses(self): + dwarfv5_basic = os.path.join('test', 'testfiles_for_unittests', 'dwarfv5_basic.elf') + with open(dwarfv5_basic, 'rb') as f: + elf = ELFFile(f) + # DWARFv5 debugging information is detected. + self.assertTrue(elf.has_dwarf_info()) + + # Fetching DWARFInfo for DWARFv5 doesn't completely explode. + dwarf = elf.get_dwarf_info() + self.assertIsNotNone(dwarf) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_dwarf_v5_forms.py b/test/test_dwarf_v5_forms.py new file mode 100644 index 00000000..efae53c5 --- /dev/null +++ b/test/test_dwarf_v5_forms.py @@ -0,0 +1,52 @@ +# The dwarf_v5_forms.debug file was generated as follows, using gcc 11.2.0 on +# an x86_64 machine. +# $ cat dwarf_v5_forms.c +# int main(); +# { +# char ** val; +# return 0; +# } +# $ gcc -O0 -gdwarf-5 dwarf_v5_forms.c -o dwarf_v5_forms.debug +# $ strip --only-keep-debug dwarf_v5_forms.debug +import unittest +import os + + +from elftools.elf.elffile import ELFFile + +class TestDWARFV5_forms(unittest.TestCase): + + def test_DW_FORM_implicit_const(self): + path = os.path.join('test', 'testfiles_for_unittests', + 'dwarf_v5_forms.debug') + with open(path, 'rb') as f: + elffile = ELFFile(f) + dwarfinfo = elffile.get_dwarf_info() + # File is very small, so load all DIEs. + dies = [] + for cu in dwarfinfo.iter_CUs(): + dies.extend(cu.iter_DIEs()) + # Locate the "var" DIE. + for die in dies: + # There should be only one + if (die.tag == "DW_TAG_variable" and + die.attributes["DW_AT_name"].value == b'val'): + # In the dwarfinfo, it's type is sized using a + # DW_FORM_implicit_const: check it is parsed correctly + break + dietype = cu.get_DIE_from_refaddr(die.attributes["DW_AT_type"].value) + byte_size_attr = dietype.attributes["DW_AT_byte_size"] + self.assertEqual(byte_size_attr.form, "DW_FORM_implicit_const") + self.assertEqual(byte_size_attr.value, 8) + + def test_DW_FORM_linestrp(self): + path = os.path.join('test', 'testfiles_for_unittests', + 'dwarf_v5_forms.debug') + with open(path, 'rb') as f: + elffile = ELFFile(f) + dwarfinfo = elffile.get_dwarf_info() + cu = next(dwarfinfo.iter_CUs()) + top_die = cu.get_top_DIE() + name_attr = top_die.attributes["DW_AT_name"] + self.assertEqual(name_attr.form, "DW_FORM_line_strp") + self.assertEqual(name_attr.value, b"dwarf_v5_forms.c") diff --git a/test/test_dynamic.py b/test/test_dynamic.py index 1ef00809..a310d8ae 100644 --- a/test/test_dynamic.py +++ b/test/test_dynamic.py @@ -49,13 +49,14 @@ def test_missing_sections(self): for t in segment.iter_tags(): if t.entry.d_tag == 'DT_NEEDED': - libs.append(t.needed.decode('utf-8')) + libs.append(t.needed) exp = ['libc.so.6'] self.assertEqual(libs, exp) - def test_reading_symbols(self): - """Verify we can read symbol table without SymbolTableSection""" + def test_reading_symbols_elf_hash(self): + """ Verify we can read symbol table without SymbolTableSection but with + a SYSV-style symbol hash table""" with open(os.path.join('test', 'testfiles_for_unittests', 'aarch64_super_stripped.elf'), 'rb') as f: elf = ELFFile(f) @@ -63,10 +64,39 @@ def test_reading_symbols(self): if segment.header.p_type != 'PT_DYNAMIC': continue + num_symbols = segment.num_symbols() symbol_names = [x.name for x in segment.iter_symbols()] + symbol_at_index_3 = segment.get_symbol(3) + symbols_abort = segment.get_symbol_by_name('abort') - exp = [b'', b'__libc_start_main', b'__gmon_start__', b'abort'] + self.assertEqual(num_symbols, 4) + exp = ['', '__libc_start_main', '__gmon_start__', 'abort'] self.assertEqual(symbol_names, exp) + self.assertEqual(symbol_at_index_3.name, 'abort') + self.assertIsNotNone(symbols_abort) + + def test_reading_symbols_gnu_hash(self): + """ Verify we can read symbol table without SymbolTableSection but with + a GNU symbol hash table""" + with open(os.path.join('test', 'testfiles_for_unittests', + 'android_dyntags.elf'), 'rb') as f: + elf = ELFFile(f) + for segment in elf.iter_segments(): + if segment.header.p_type != 'PT_DYNAMIC': + continue + + num_symbols = segment.num_symbols() + symbol_names = [x.name for x in segment.iter_symbols()] + symbol_at_index_3 = segment.get_symbol(3) + symbols_atfork = segment.get_symbol_by_name('__register_atfork') + + self.assertEqual(num_symbols, 212) + exp = ['', '__cxa_finalize' , '__cxa_atexit', '__register_atfork', + '__stack_chk_fail', '_ZNK7android7RefBase9decStrongEPKv', + '_ZN7android7RefBaseD2Ev', '_ZdlPv', 'pthread_mutex_lock'] + self.assertEqual(symbol_names[:9], exp) + self.assertEqual(symbol_at_index_3.name, '__register_atfork') + self.assertIsNotNone(symbols_atfork) def test_sunw_tags(self): def extract_sunw(filename): diff --git a/test/test_ehabi_decoder.py b/test/test_ehabi_decoder.py new file mode 100644 index 00000000..61ad8b40 --- /dev/null +++ b/test/test_ehabi_decoder.py @@ -0,0 +1,95 @@ +# ------------------------------------------------------------------------------- +# elftools: tests +# +# LeadroyaL (leadroyal@qq.com) +# This code is in the public domain +# ------------------------------------------------------------------------------- + +import unittest + +from elftools.ehabi.decoder import EHABIBytecodeDecoder + + +class TestEHABIDecoder(unittest.TestCase): + """ Tests for the EHABI decoder. + """ + + def testLLVM(self): + # Reference: https://github.com/llvm/llvm-project/blob/master/llvm/test/tools/llvm-readobj/ELF/ARM/unwind.s + mnemonic_array = EHABIBytecodeDecoder([0xb1, 0x0f, 0xa7, 0x3f, 0xb0, 0xb0]).mnemonic_array + self.assertEqual(mnemonic_array[0].mnemonic, "pop {r0, r1, r2, r3}") + self.assertEqual(mnemonic_array[1].mnemonic, "pop {r4, r5, r6, r7, r8, r9, r10, fp}") + self.assertEqual(mnemonic_array[2].mnemonic, "vsp = vsp + 256") + self.assertEqual(mnemonic_array[3].mnemonic, "finish") + self.assertEqual(mnemonic_array[4].mnemonic, "finish") + + mnemonic_array = EHABIBytecodeDecoder([0xc9, 0x84, 0xb0]).mnemonic_array + self.assertEqual(mnemonic_array[0].mnemonic, "pop {d8, d9, d10, d11, d12}") + self.assertEqual(mnemonic_array[1].mnemonic, "finish") + + mnemonic_array = EHABIBytecodeDecoder( + [0xD7, 0xC9, 0x02, 0xC8, 0x02, 0xC7, 0x03, 0xC6, + 0x02, 0xC2, 0xBA, 0xB3, 0x12, 0xB2, 0x80, 0x04, + 0xB1, 0x01, 0xB0, 0xA9, 0xA1, 0x91, 0x84, 0xC0, + 0x80, 0xC0, 0x80, 0x01, 0x81, 0x00, 0x80, 0x00, + 0x42, 0x02, ]).mnemonic_array + self.assertEqual(mnemonic_array[0].mnemonic, "pop {d8, d9, d10, d11, d12, d13, d14, d15}") + self.assertEqual(mnemonic_array[1].mnemonic, "pop {d0, d1, d2}") + self.assertEqual(mnemonic_array[2].mnemonic, "pop {d16, d17, d18}") + self.assertEqual(mnemonic_array[3].mnemonic, "pop {wCGR0, wCGR1}") + self.assertEqual(mnemonic_array[4].mnemonic, "pop {wR0, wR1, wR2}") + self.assertEqual(mnemonic_array[5].mnemonic, "pop {wR10, wR11, wR12}") + self.assertEqual(mnemonic_array[6].mnemonic, "pop {d8, d9, d10}") + self.assertEqual(mnemonic_array[7].mnemonic, "pop {d1, d2, d3}") + self.assertEqual(mnemonic_array[8].mnemonic, "vsp = vsp + 2564") + self.assertEqual(mnemonic_array[9].mnemonic, "pop {r0}") + self.assertEqual(mnemonic_array[10].mnemonic, "finish") + self.assertEqual(mnemonic_array[11].mnemonic, "pop {r4, r5, lr}") + self.assertEqual(mnemonic_array[12].mnemonic, "pop {r4, r5}") + self.assertEqual(mnemonic_array[13].mnemonic, "vsp = r1") + self.assertEqual(mnemonic_array[14].mnemonic, "pop {r10, fp, lr}") + self.assertEqual(mnemonic_array[15].mnemonic, "pop {r10, fp}") + self.assertEqual(mnemonic_array[16].mnemonic, "pop {r4}") + self.assertEqual(mnemonic_array[17].mnemonic, "pop {ip}") + self.assertEqual(mnemonic_array[18].mnemonic, "refuse to unwind") + self.assertEqual(mnemonic_array[19].mnemonic, "vsp = vsp - 12") + self.assertEqual(mnemonic_array[20].mnemonic, "vsp = vsp + 12") + + mnemonic_array = EHABIBytecodeDecoder( + [0xD8, 0xD0, 0xCA, 0xC9, 0x00, 0xC8, 0x00, 0xC7, + 0x10, 0xC7, 0x01, 0xC7, 0x00, 0xC6, 0x00, 0xC0, + 0xB8, 0xB4, 0xB3, 0x00, 0xB2, 0x00, 0xB1, 0x10, + 0xB1, 0x01, 0xB1, 0x00, 0xB0, 0xA8, 0xA0, 0x9F, + 0x9D, 0x91, 0x88, 0x00, 0x80, 0x00, 0x40, 0x00, + ]).mnemonic_array + self.assertEqual(mnemonic_array[0].mnemonic, "spare") + self.assertEqual(mnemonic_array[1].mnemonic, "pop {d8}") + self.assertEqual(mnemonic_array[2].mnemonic, "spare") + self.assertEqual(mnemonic_array[3].mnemonic, "pop {d0}") + self.assertEqual(mnemonic_array[4].mnemonic, "pop {d16}") + self.assertEqual(mnemonic_array[5].mnemonic, "spare") + self.assertEqual(mnemonic_array[6].mnemonic, "pop {wCGR0}") + self.assertEqual(mnemonic_array[7].mnemonic, "spare") + self.assertEqual(mnemonic_array[8].mnemonic, "pop {wR0}") + self.assertEqual(mnemonic_array[9].mnemonic, "pop {wR10}") + self.assertEqual(mnemonic_array[10].mnemonic, "pop {d8}") + self.assertEqual(mnemonic_array[11].mnemonic, "spare") + self.assertEqual(mnemonic_array[12].mnemonic, "pop {d0}") + self.assertEqual(mnemonic_array[13].mnemonic, "vsp = vsp + 516") + self.assertEqual(mnemonic_array[14].mnemonic, "spare") + self.assertEqual(mnemonic_array[15].mnemonic, "pop {r0}") + self.assertEqual(mnemonic_array[16].mnemonic, "spare") + self.assertEqual(mnemonic_array[17].mnemonic, "finish") + self.assertEqual(mnemonic_array[18].mnemonic, "pop {r4, lr}") + self.assertEqual(mnemonic_array[19].mnemonic, "pop {r4}") + self.assertEqual(mnemonic_array[20].mnemonic, "reserved (WiMMX MOVrr)") + self.assertEqual(mnemonic_array[21].mnemonic, "reserved (ARM MOVrr)") + self.assertEqual(mnemonic_array[22].mnemonic, "vsp = r1") + self.assertEqual(mnemonic_array[23].mnemonic, "pop {pc}") + self.assertEqual(mnemonic_array[24].mnemonic, "refuse to unwind") + self.assertEqual(mnemonic_array[25].mnemonic, "vsp = vsp - 4") + self.assertEqual(mnemonic_array[26].mnemonic, "vsp = vsp + 4") + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_ehabi_elf.py b/test/test_ehabi_elf.py new file mode 100644 index 00000000..9a0c12b7 --- /dev/null +++ b/test/test_ehabi_elf.py @@ -0,0 +1,89 @@ +# ------------------------------------------------------------------------------- +# elftools: tests +# +# LeadroyaL (leadroyal@qq.com) +# This code is in the public domain +# ------------------------------------------------------------------------------- + +import unittest +import os + +from elftools.ehabi.ehabiinfo import EHABIEntry, CannotUnwindEHABIEntry, GenericEHABIEntry, CorruptEHABIEntry +from elftools.elf.elffile import ELFFile + + +class TestEHABIELF(unittest.TestCase): + """ Parse ELF and visit ARM exception handler index table entry. + """ + + def test_parse_object_file(self): + # FIXME: `.ARM.exidx.text.XXX` need relocation, it's too complex for current unittest. + fname = os.path.join('test', 'testfiles_for_unittests', 'arm_exidx_test.o') + with open(fname, 'rb') as f: + elf = ELFFile(f) + try: + elf.get_ehabi_infos() + self.assertTrue(False, "Unreachable code") + except AssertionError as e: + self.assertEqual(str(e), "Current version of pyelftools doesn't support relocatable file.") + + def test_parse_shared_library(self): + fname = os.path.join('test', 'testfiles_for_unittests', 'arm_exidx_test.so') + with open(fname, 'rb') as f: + elf = ELFFile(f) + self.assertTrue(elf.has_ehabi_info()) + infos = elf.get_ehabi_infos() + self.assertEqual(1, len(infos)) + info = infos[0] + + self.assertIsInstance(info.get_entry(0), EHABIEntry) + self.assertEqual(info.get_entry(0).function_offset, 0x34610) + self.assertEqual(info.get_entry(0).eh_table_offset, 0x69544) + self.assertEqual(info.get_entry(0).bytecode_array, [0x97, 0x41, 0x84, 0x0d, 0xb0, 0xb0]) + + self.assertIsInstance(info.get_entry(7), CannotUnwindEHABIEntry) + self.assertEqual(info.get_entry(7).function_offset, 0x346f8) + + self.assertIsInstance(info.get_entry(8), EHABIEntry) + self.assertEqual(info.get_entry(8).personality, 0) + self.assertEqual(info.get_entry(8).function_offset, 0x3473c) + self.assertEqual(info.get_entry(8).bytecode_array, [0x97, 0x84, 0x08]) + + self.assertIsInstance(info.get_entry(9), GenericEHABIEntry) + self.assertEqual(info.get_entry(9).function_offset, 0x3477c) + self.assertEqual(info.get_entry(9).personality, 0x31a30) + + for i in range(info.num_entry()): + self.assertNotIsInstance(info.get_entry(i), CorruptEHABIEntry) + + def test_parse_executable(self): + fname = os.path.join('test', 'testfiles_for_unittests', 'arm_exidx_test.elf') + with open(fname, 'rb') as f: + elf = ELFFile(f) + self.assertTrue(elf.has_ehabi_info()) + infos = elf.get_ehabi_infos() + self.assertEqual(1, len(infos)) + info = infos[0] + + self.assertIsInstance(info.get_entry(0), EHABIEntry) + self.assertEqual(info.get_entry(0).function_offset, 0x4f50) + self.assertEqual(info.get_entry(0).eh_table_offset, 0x22864) + self.assertEqual(info.get_entry(0).bytecode_array, [0x97, 0x41, 0x84, 0x0d, 0xb0, 0xb0]) + + self.assertIsInstance(info.get_entry(7), CannotUnwindEHABIEntry) + self.assertEqual(info.get_entry(7).function_offset, 0x5040) + + self.assertIsInstance(info.get_entry(8), GenericEHABIEntry) + self.assertEqual(info.get_entry(8).personality, 0x15d21) + + self.assertIsInstance(info.get_entry(9), EHABIEntry) + self.assertEqual(info.get_entry(9).function_offset, 0x5144) + self.assertEqual(info.get_entry(9).personality, 0) + self.assertEqual(info.get_entry(9).bytecode_array, [0x97, 0x84, 0x08]) + + for i in range(info.num_entry()): + self.assertNotIsInstance(info.get_entry(i), CorruptEHABIEntry) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_elffile.py b/test/test_elffile.py index 654b2021..cc929b9f 100644 --- a/test/test_elffile.py +++ b/test/test_elffile.py @@ -5,6 +5,7 @@ # This code is in the public domain #------------------------------------------------------------------------------- import unittest +import os from elftools.elf.elffile import ELFFile @@ -13,12 +14,18 @@ class TestMap(unittest.TestCase): def test_address_offsets(self): class MockELF(ELFFile): __init__ = object.__init__ - def iter_segments(self): - return iter(( - dict(p_type='PT_PHDR', p_vaddr=0x10100, p_filesz=0x100, p_offset=0x400), - dict(p_type='PT_LOAD', p_vaddr=0x10200, p_filesz=0x200, p_offset=0x100), - dict(p_type='PT_LOAD', p_vaddr=0x10100, p_filesz=0x100, p_offset=0x400), - )) + def iter_segments(self, type=None): + if type == 'PT_LOAD': + return iter(( + dict(p_type='PT_LOAD', p_vaddr=0x10200, p_filesz=0x200, p_offset=0x100), + dict(p_type='PT_LOAD', p_vaddr=0x10100, p_filesz=0x100, p_offset=0x400), + )) + else: + return iter(( + dict(p_type='PT_PHDR', p_vaddr=0x10100, p_filesz=0x100, p_offset=0x400), + dict(p_type='PT_LOAD', p_vaddr=0x10200, p_filesz=0x200, p_offset=0x100), + dict(p_type='PT_LOAD', p_vaddr=0x10100, p_filesz=0x100, p_offset=0x400), + )) elf = MockELF() @@ -43,6 +50,16 @@ def iter_segments(self): self.assertEqual(tuple(elf.address_offsets(0x103FE, 4)), ()) self.assertEqual(tuple(elf.address_offsets(0x10400, 4)), ()) +class TestSectionFilter(unittest.TestCase): + + def test_section_filter(self): + with open(os.path.join('test', 'testfiles_for_unittests', + 'arm_exidx_test.so'), 'rb') as f: + elf = ELFFile(f) + self.assertEqual(len(list(elf.iter_sections())), 26) + self.assertEqual(len(list(elf.iter_sections('SHT_REL'))), 2) + self.assertEqual(len(list(elf.iter_sections('SHT_ARM_EXIDX'))), 1) + self.assertTrue(elf.has_ehabi_info()) if __name__ == '__main__': unittest.main() diff --git a/test/test_get_section_index.py b/test/test_get_section_index.py new file mode 100644 index 00000000..0935b7dc --- /dev/null +++ b/test/test_get_section_index.py @@ -0,0 +1,60 @@ +#------------------------------------------------------------------------------- +# Tests the functionality of get_section_index +# +# Jonathan Bruchim (YonBruchim@gmail.com) +# This code is in the public domain +#------------------------------------------------------------------------------- +import unittest +import os + +from elftools.elf.elffile import ELFFile + + +class TestGetSectionIndex(unittest.TestCase): + def test_existing_section(self): + with open(os.path.join('test', 'testfiles_for_unittests', + 'simple_gcc.elf.arm'), 'rb') as f: + elf = ELFFile(f) + + # Find the symbol table. + data_section_index = elf.get_section_index('.data') + self.assertIsNotNone(data_section_index) + + # Test we can find a symbol by its name. + data_section = elf.get_section(data_section_index) + self.assertIsNotNone(data_section) + + # Test it is actually the symbol we expect. + self.assertEqual(data_section.name, '.data') + + def test_missing_section(self): + with open(os.path.join('test', 'testfiles_for_unittests', + 'simple_gcc.elf.arm'), 'rb') as f: + elf = ELFFile(f) + + # try getting a missing section index + missing_section_index = elf.get_section_index('non-existent section') + self.assertIsNone(missing_section_index) + + def test_uninitialized_section_name_map(self): + with open(os.path.join('test', 'testfiles_for_unittests', + 'simple_gcc.elf.arm'), 'rb') as f: + elf = ELFFile(f) + + elf._section_name_map = None + + # Find the symbol table. + data_section_index = elf.get_section_index('.data') + self.assertIsNotNone(data_section_index) + + # Test we can find a symbol by its name. + data_section = elf.get_section(data_section_index) + self.assertIsNotNone(data_section) + + # Test it is actually the symbol we expect. + self.assertEqual(data_section.name, '.data') + + +if __name__ == '__main__': + unittest.main() + diff --git a/test/test_hash.py b/test/test_hash.py new file mode 100644 index 00000000..2c2ffec4 --- /dev/null +++ b/test/test_hash.py @@ -0,0 +1,115 @@ +# -*- coding: utf-8 -*- +#------------------------------------------------------------------------------- +# elftools tests +# +# Andreas Ziegler (andreas.ziegler@fau.de) +# This code is in the public domain +#------------------------------------------------------------------------------- +import unittest +import os + +from elftools.elf.elffile import ELFFile +from elftools.elf.hash import ELFHashTable, GNUHashTable + +class TestELFHash(unittest.TestCase): + """ Tests for the ELF hash table. + """ + + def test_elf_hash(self): + """ Verify correctness of ELF hashing function. The expected values + were computed with the C implementation from the glibc source code. + """ + self.assertEqual(ELFHashTable.elf_hash(''), 0x00000000) + self.assertEqual(ELFHashTable.elf_hash('main'), 0x000737fe) + self.assertEqual(ELFHashTable.elf_hash('printf'), 0x077905a6) + self.assertEqual(ELFHashTable.elf_hash('exit'), 0x0006cf04) + self.assertEqual(ELFHashTable.elf_hash(u'ïó®123'), 0x0efddae3) + self.assertEqual(ELFHashTable.elf_hash(b'\xe4\xbd\xa0\xe5\xa5\xbd'), + 0x0f07f00d) + + def test_get_number_of_syms(self): + """ Verify we can get get the number of symbols from an ELF hash + section. + """ + with open(os.path.join('test', 'testfiles_for_unittests', + 'aarch64_super_stripped.elf'), 'rb') as f: + elf = ELFFile(f) + dynamic_segment = None + for segment in elf.iter_segments(): + if segment.header.p_type == 'PT_DYNAMIC': + dynamic_segment = segment + break + + _, hash_offset = dynamic_segment.get_table_offset('DT_HASH') + + hash_section = ELFHashTable(elf, hash_offset, dynamic_segment) + self.assertIsNotNone(hash_section) + self.assertEqual(hash_section.get_number_of_symbols(), 4) + + def test_get_symbol(self): + """ Verify we can get a specific symbol from an ELF hash section. + """ + path = os.path.join('test', 'testfiles_for_unittests', + 'simple_mipsel.elf') + with open(path, 'rb') as f: + elf = ELFFile(f) + hash_section = elf.get_section_by_name('.hash') + self.assertIsNotNone(hash_section) + symbol_main = hash_section.get_symbol('main') + self.assertIsNotNone(symbol_main) + self.assertEqual(symbol_main['st_value'], int(0x400790)) + + +class TestGNUHash(unittest.TestCase): + """ Tests for the GNU hash table. + """ + + def test_gnu_hash(self): + """ Verify correctness of GNU hashing function. The expected values + were computed with the C implementation from the glibc source code. + """ + self.assertEqual(GNUHashTable.gnu_hash(''), 0x00001505) + self.assertEqual(GNUHashTable.gnu_hash('main'), 0x7c9a7f6a) + self.assertEqual(GNUHashTable.gnu_hash('printf'), 0x156b2bb8) + self.assertEqual(GNUHashTable.gnu_hash('exit'), 0x7c967e3f) + self.assertEqual(GNUHashTable.gnu_hash(u'ïó®123'), 0x8025a693) + self.assertEqual(GNUHashTable.gnu_hash(b'\xe4\xbd\xa0\xe5\xa5\xbd'), + 0x296eec2d) + + def test_get_number_of_syms(self): + """ Verify we can get get the number of symbols from a GNU hash + section. + """ + + with open(os.path.join('test', 'testfiles_for_unittests', + 'lib_versioned64.so.1.elf'), 'rb') as f: + elf = ELFFile(f) + hash_section = elf.get_section_by_name('.gnu.hash') + self.assertIsNotNone(hash_section) + self.assertEqual(hash_section.get_number_of_symbols(), 24) + + def test_get_symbol(self): + """ Verify we can get a specific symbol from a GNU hash section. + """ + with open(os.path.join('test', 'testfiles_for_unittests', + 'lib_versioned64.so.1.elf'), 'rb') as f: + elf = ELFFile(f) + hash_section = elf.get_section_by_name('.gnu.hash') + self.assertIsNotNone(hash_section) + symbol_f1 = hash_section.get_symbol('function1_ver1_1') + self.assertIsNotNone(symbol_f1) + self.assertEqual(symbol_f1['st_value'], int(0x9a2)) + + def test_get_symbol_big_endian(self): + """ Verify we can get a specific symbol from a GNU hash section in a + big-endian file. + """ + with open(os.path.join('test', 'testfiles_for_unittests', + 'aarch64_be_gnu_hash.so.elf'), 'rb') as f: + elf = ELFFile(f) + self.assertFalse(elf.little_endian) + hash_section = elf.get_section_by_name('.gnu.hash') + self.assertIsNotNone(hash_section) + symbol_f1 = hash_section.get_symbol('caller') + self.assertIsNotNone(symbol_f1) + self.assertEqual(symbol_f1['st_value'], int(0x5a4)) diff --git a/test/test_pubtypes.py b/test/test_pubtypes.py new file mode 100644 index 00000000..ed7c6a16 --- /dev/null +++ b/test/test_pubtypes.py @@ -0,0 +1,25 @@ +#------------------------------------------------------------------------------- +# elftools tests +# +# Efimov Vasiliy (real@ispras.ru) +# This code is in the public domain +#------------------------------------------------------------------------------- +import os +import unittest + +from elftools.elf.elffile import ELFFile + + +class TestEmptyPubtypes(unittest.TestCase): + def test_empty_pubtypes(self): + test_dir = os.path.join('test', 'testfiles_for_unittests') + with open(os.path.join(test_dir, 'empty_pubtypes', 'main.elf'), 'rb') as f: + elf = ELFFile(f) + + # This test targets `ELFParseError` caused by buggy handling + # of ".debug_pubtypes" section which only has zero terminator + # entry. + self.assertEqual(len(elf.get_dwarf_info().get_pubtypes()), 0) + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_refaddr_bitness.py b/test/test_refaddr_bitness.py new file mode 100644 index 00000000..ea01db64 --- /dev/null +++ b/test/test_refaddr_bitness.py @@ -0,0 +1,65 @@ +#------------------------------------------------------------------------------ +# elftools tests +# +# Seva Alekseyev (sevaa@sprynet.com) +# This code is in the public domain +# +# The error that motivated this fix was in an iOS binary in Mach-O format. It +# had v2 DWARF data, but it was targeting a 64 bit architecture. Before the fix, +# pyelftools would assume that DW_FORM_ref_addr attribute took 4 bytes and +# misparse the DWARF data in the binary. +# +# Since pyelftools doesn't work with Mach-O files, I've taken a sample binary +# apart, and saved the three relevant sections - info, abbrev, and str as flat +# files. The metadata (the fact that it's targeting ARM64) is hard-coded, since +# the Mach-O header isn't preserved. +#------------------------------------------------------------------------------ + +import unittest +import os, sys, io + +from elftools.dwarf.dwarfinfo import DWARFInfo, DebugSectionDescriptor, DwarfConfig + +class TestRefAddrOnDWARFv2With64BitTarget(unittest.TestCase): + def test_main(self): + # Read the three saved sections as bytestreams + with open(os.path.join('test', 'testfiles_for_unittests', 'arm64_on_dwarfv2.info.dat'), 'rb') as f: + info = f.read() + with open(os.path.join('test', 'testfiles_for_unittests', 'arm64_on_dwarfv2.abbrev.dat'), 'rb') as f: + abbrev = f.read() + with open(os.path.join('test', 'testfiles_for_unittests', 'arm64_on_dwarfv2.str.dat'), 'rb') as f: + str = f.read() + + # Parse the DWARF info + di = DWARFInfo( + config = DwarfConfig(little_endian = True, default_address_size = 8, machine_arch = "ARM64"), + debug_info_sec = DebugSectionDescriptor(io.BytesIO(info), '__debug_info', None, len(info), 0), + debug_aranges_sec = None, + debug_abbrev_sec = DebugSectionDescriptor(io.BytesIO(abbrev), '__debug_abbrev', None, len(abbrev), 0), + debug_frame_sec = None, + eh_frame_sec = None, + debug_str_sec = DebugSectionDescriptor(io.BytesIO(str), '__debug_str', None, len(str), 0), + debug_loc_sec = None, + debug_ranges_sec = None, + debug_line_sec = None, + debug_pubtypes_sec = None, + debug_pubnames_sec = None, + debug_addr_sec=None, + debug_str_offsets_sec=None, + debug_line_str_sec=None, + ) + + CUs = [cu for cu in di.iter_CUs()] + # Locate a CU that I know has a reference in DW_FORM_ref_addr form + CU = CUs[21] + self.assertEqual(CU['version'], 2) + # Make sure pyelftools appreciates the difference between the target address size and DWARF inter-DIE offset size + self.assertEqual(CU.structs.dwarf_format, 32) + self.assertEqual(CU['address_size'], 8) + DIEs = [die for die in CU.iter_DIEs()] + # Before the patch, DIE #2 is misparsed, the current offset is off, the rest are misparsed too + self.assertEqual(len(DIEs), 15) + # It was 9 before the patch, which was wrong. + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_relocations.py b/test/test_relocations.py new file mode 100644 index 00000000..f1c8f107 --- /dev/null +++ b/test/test_relocations.py @@ -0,0 +1,48 @@ +import os +import sys +import unittest + +from elftools.common.py3compat import BytesIO +from elftools.elf.elffile import ELFFile +from elftools.elf.dynamic import DynamicSegment, DynamicSection + + +class TestRelocation(unittest.TestCase): + def test_dynamic_segment(self): + """Verify that we can process relocations on the PT_DYNAMIC segment without section headers""" + + test_dir = os.path.join('test', 'testfiles_for_unittests') + with open(os.path.join(test_dir, 'x64_bad_sections.elf'), 'rb') as f: + elff = ELFFile(f) + + for seg in elff.iter_segments(): + if isinstance(seg, DynamicSegment): + relos = seg.get_relocation_tables() + self.assertEqual(set(relos), {'JMPREL', 'RELA'}) + + def test_dynamic_section(self): + """Verify that we can parse relocations from the .dynamic section""" + + test_dir = os.path.join('test', 'testfiles_for_unittests') + with open(os.path.join(test_dir, 'sample_exe64.elf'), 'rb') as f: + elff = ELFFile(f) + + for sect in elff.iter_sections(): + if isinstance(sect, DynamicSection): + relos = sect.get_relocation_tables() + self.assertEqual(set(relos), {'JMPREL', 'RELA'}) + + def test_dynamic_section_solaris(self): + """Verify that we can parse relocations from the .dynamic section""" + + test_dir = os.path.join('test', 'testfiles_for_unittests') + with open(os.path.join(test_dir, 'exe_solaris32_cc.elf'), 'rb') as f: + elff = ELFFile(f) + + for sect in elff.iter_sections(): + if isinstance(sect, DynamicSection): + relos = sect.get_relocation_tables() + self.assertEqual(set(relos), {'JMPREL', 'REL'}) + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_relr.py b/test/test_relr.py new file mode 100644 index 00000000..69e39d6d --- /dev/null +++ b/test/test_relr.py @@ -0,0 +1,49 @@ +#------------------------------------------------------------------------------- +# elftools tests +# +# Andreas Ziegler (andreas.ziegler@fau.de) +# This code is in the public domain +#------------------------------------------------------------------------------- +# The lib_relro.so.elf file was generated as follows (on Debian 11): +# $ cat lib_relro.c +# int retfunc(){ return 1; } +# int (*ptr1)() = retfunc; +# int (*ptr2)() = retfunc; +# <...> +# int (*ptr100)() = retfunc; +# $ clang-12 -c -o lib_relro.o -fPIC lib_relro.c +# $ ld.lld-12 -o lib_relro.so.elf --pack-dyn-relocs=relr --shared -Bsymbolic-functions lib_relro.o + +import unittest +import os + +from elftools.elf.elffile import ELFFile + +class TestRelr(unittest.TestCase): + + def test_num_relocations(self): + """ Verify we can get the number of relocations in a RELR relocation + section. + """ + path = os.path.join('test', 'testfiles_for_unittests', + 'lib_relro.so.elf') + with open(path, 'rb') as f: + elf = ELFFile(f) + relr_section = elf.get_section_by_name('.relr.dyn') + self.assertIsNotNone(relr_section) + self.assertEqual(relr_section.num_relocations(), 100) + + def test_get_relocation(self): + """ Verify we can get a specific relocation in a RELR relocation + section. + """ + path = os.path.join('test', 'testfiles_for_unittests', + 'lib_relro.so.elf') + with open(path, 'rb') as f: + elf = ELFFile(f) + relr_section = elf.get_section_by_name('.relr.dyn') + self.assertIsNotNone(relr_section) + reloc = relr_section.get_relocation(n=0) + self.assertEqual(reloc['r_offset'], 0x4540) + reloc = relr_section.get_relocation(n=65) + self.assertEqual(reloc['r_offset'], 0x4748) diff --git a/test/testfiles_for_location_info/test-dwarf2.o b/test/testfiles_for_location_info/test-dwarf2.o new file mode 100755 index 00000000..9bc2a280 Binary files /dev/null and b/test/testfiles_for_location_info/test-dwarf2.o differ diff --git a/test/testfiles_for_location_info/test-dwarf4.o b/test/testfiles_for_location_info/test-dwarf4.o new file mode 100755 index 00000000..187ce70f Binary files /dev/null and b/test/testfiles_for_location_info/test-dwarf4.o differ diff --git a/test/testfiles_for_readelf/aarch64-relocs-le.o.elf b/test/testfiles_for_readelf/aarch64-relocs-le.o.elf new file mode 100644 index 00000000..3f74d229 Binary files /dev/null and b/test/testfiles_for_readelf/aarch64-relocs-le.o.elf differ diff --git a/test/testfiles_for_readelf/aarch64-relocs.c b/test/testfiles_for_readelf/aarch64-relocs.c new file mode 100644 index 00000000..c78a5629 --- /dev/null +++ b/test/testfiles_for_readelf/aarch64-relocs.c @@ -0,0 +1,16 @@ +/* This source was compiled for aarch64 (little endian). + aarch64-linux-gnu-gcc -c -o aarch64-relocs-le.o.elf aarch64-relocs.c -g +*/ + +extern struct { + int i, j; +} data; + +extern int bar (void); + +int +foo (int a) +{ + data.i += a; + data.j -= bar(); +} diff --git a/test/testfiles_for_readelf/angr-eh_frame.elf b/test/testfiles_for_readelf/angr-eh_frame.elf new file mode 100644 index 00000000..f6514ad0 Binary files /dev/null and b/test/testfiles_for_readelf/angr-eh_frame.elf differ diff --git a/test/testfiles_for_readelf/dt_flags.elf b/test/testfiles_for_readelf/dt_flags.elf new file mode 100644 index 00000000..17574dd0 Binary files /dev/null and b/test/testfiles_for_readelf/dt_flags.elf differ diff --git a/test/testfiles_for_readelf/dwarf_gnuops4.so.elf b/test/testfiles_for_readelf/dwarf_gnuops4.so.elf new file mode 100644 index 00000000..d9ffe9b3 Binary files /dev/null and b/test/testfiles_for_readelf/dwarf_gnuops4.so.elf differ diff --git a/test/testfiles_for_readelf/many_sections.o.elf b/test/testfiles_for_readelf/many_sections.o.elf new file mode 100644 index 00000000..f51fd3eb Binary files /dev/null and b/test/testfiles_for_readelf/many_sections.o.elf differ diff --git a/test/testfiles_for_readelf/mips64-relocs-be.o.elf b/test/testfiles_for_readelf/mips64-relocs-be.o.elf new file mode 100644 index 00000000..fb6d587f Binary files /dev/null and b/test/testfiles_for_readelf/mips64-relocs-be.o.elf differ diff --git a/test/testfiles_for_readelf/mips64-relocs-le.o.elf b/test/testfiles_for_readelf/mips64-relocs-le.o.elf new file mode 100644 index 00000000..028d4137 Binary files /dev/null and b/test/testfiles_for_readelf/mips64-relocs-le.o.elf differ diff --git a/test/testfiles_for_readelf/mips64-relocs.c b/test/testfiles_for_readelf/mips64-relocs.c new file mode 100644 index 00000000..707b625d --- /dev/null +++ b/test/testfiles_for_readelf/mips64-relocs.c @@ -0,0 +1,19 @@ +/* This source was compiled for MIPS64 (big endian) and MIPS64EL (little + endial): + + mips64-unknown-linux-gnu-gcc -c mips64-relocs.c -o mips64-relocs-be.o.elf -mabi=64 + mips64el-unknown-linux-gnu-gcc -c mips64-relocs.c -o mips64-relocs-le.o.elf -mabi=64 +*/ + +extern struct { + int i, j; +} data; + +extern int bar (void); + +int +foo (int a) +{ + data.i += a; + data.j -= bar(); +} diff --git a/test/testfiles_for_readelf/note_gnu_property.S b/test/testfiles_for_readelf/note_gnu_property.S new file mode 100644 index 00000000..20a845b3 --- /dev/null +++ b/test/testfiles_for_readelf/note_gnu_property.S @@ -0,0 +1,103 @@ +/** + * Test ELF for .note.gnu.property, built on x86-64. + * + * Object file: + * gcc -c note_gnu_property.S -o note_gnu_property.o.elf + * + * ELF executable (to also have a PT_GNU_PROPERTY program header): + * gcc -DEXE -c note_gnu_property.S -o /tmp/x.o + * ld /tmp/x.o -o note_gnu_property.elf + */ + +// https://github.com/hjl-tools/linux-abi/wiki/linux-abi-draft.pdf +#define NT_GNU_PROPERTY_TYPE_0 5 +#define GNU_PROPERTY_STACK_SIZE 1 +#define GNU_PROPERTY_NO_COPY_ON_PROTECTED 2 +#define GNU_PROPERTY_LOPROC 0xc0000000 +#define GNU_PROPERTY_HIPROC 0xdfffffff +#define GNU_PROPERTY_LOUSER 0xe0000000 +#define GNU_PROPERTY_HIUSER 0xffffffff + +// Unknown property types for testing purposes +#define GNU_PROPERTY_TEST_UNKNOWN 0x12345678 +#define GNU_PROPERTY_TEST_UNKNOWN_PROC (GNU_PROPERTY_LOPROC + 0x1234567) +#define GNU_PROPERTY_TEST_UNKNOWN_USER (GNU_PROPERTY_LOUSER + 0x1234567) + +// https://gitlab.com/x86-psABIs/x86-64-ABI/ +// https://gitlab.com/x86-psABIs/x86-64-ABI/-/wikis/x86-64-psABI +#define GNU_PROPERTY_X86_UINT32_AND_LO 0xc0000002 +#define GNU_PROPERTY_X86_UINT32_AND_HI 0xc0007fff +#define GNU_PROPERTY_X86_UINT32_OR_LO 0xc0008000 +#define GNU_PROPERTY_X86_UINT32_OR_HI 0xc000ffff +#define GNU_PROPERTY_X86_UINT32_OR_AND_LO 0xc0010000 +#define GNU_PROPERTY_X86_UINT32_OR_AND_HI 0xc0017fff + +#define GNU_PROPERTY_X86_FEATURE_1_AND (GNU_PROPERTY_X86_UINT32_AND_LO + 0) +#define GNU_PROPERTY_X86_FEATURE_1_IBT (1U << 0) +#define GNU_PROPERTY_X86_FEATURE_1_SHSTK (1U << 1) +#define GNU_PROPERTY_X86_FEATURE_1_LAM_U48 (1U << 2) +#define GNU_PROPERTY_X86_FEATURE_1_LAM_U57 (1U << 3) + +#ifdef __x86_64__ +#define ALIGN .p2align 3 +#else +#define ALIGN .p2align 2 +#endif + +.section ".text" +.global _start +_start: + ud2 + +.section ".note.gnu.property", "a" + ALIGN + .long 1f - 0f // n_namesz + .long end - 2f // n_descsz + .long NT_GNU_PROPERTY_TYPE_0 // n_type +0: .asciz "GNU" // n_name +1: + ALIGN +2: .long GNU_PROPERTY_STACK_SIZE // pr_type + .long 4f - 3f // pr_datasz +3: + .dc.a 0x123000 +4: + ALIGN + .long GNU_PROPERTY_NO_COPY_ON_PROTECTED // pr_type + .long 0 // pr_datasz + ALIGN + +// Avoid these if linking to executable, linkers may not recognize them +#ifndef EXE + .long GNU_PROPERTY_TEST_UNKNOWN // pr_type + .long 6f-5f // pr_datasz +5: + .ascii "hello world" +6: + ALIGN + .long GNU_PROPERTY_TEST_UNKNOWN_PROC // pr_type + .long 8f-7f // pr_datasz +7: + .ascii "foobar" +8: + ALIGN + .long GNU_PROPERTY_TEST_UNKNOWN_USER // pr_type + .long 10f-9f // pr_datasz +9: + .ascii "bazquuz" +10: + ALIGN +#endif + +11: .long GNU_PROPERTY_X86_FEATURE_1_AND // pr_type. + .long 13f - 12f // pr_datasz +12: + // Not sure if LAM_U48 and LAM_U57 make sense together, readelf does not + // seem to complain and outputs both. + .long GNU_PROPERTY_X86_FEATURE_1_IBT \ + | GNU_PROPERTY_X86_FEATURE_1_SHSTK \ + | GNU_PROPERTY_X86_FEATURE_1_LAM_U48 \ + | GNU_PROPERTY_X86_FEATURE_1_LAM_U57 +13: + ALIGN +end: diff --git a/test/testfiles_for_readelf/note_gnu_property.elf b/test/testfiles_for_readelf/note_gnu_property.elf new file mode 100755 index 00000000..408efee7 Binary files /dev/null and b/test/testfiles_for_readelf/note_gnu_property.elf differ diff --git a/test/testfiles_for_readelf/note_gnu_property.o.elf b/test/testfiles_for_readelf/note_gnu_property.o.elf new file mode 100644 index 00000000..f7f3a63b Binary files /dev/null and b/test/testfiles_for_readelf/note_gnu_property.o.elf differ diff --git a/test/testfiles_for_readelf/powerpc64-relocs-le.o.elf b/test/testfiles_for_readelf/powerpc64-relocs-le.o.elf new file mode 100644 index 00000000..9d67c1ff Binary files /dev/null and b/test/testfiles_for_readelf/powerpc64-relocs-le.o.elf differ diff --git a/test/testfiles_for_readelf/powerpc64-relocs.c b/test/testfiles_for_readelf/powerpc64-relocs.c new file mode 100644 index 00000000..fe5c3e85 --- /dev/null +++ b/test/testfiles_for_readelf/powerpc64-relocs.c @@ -0,0 +1,16 @@ +/* This source was compiled for ppc64le. + clang --target=powerpc64le -c -o powerpc64-relocs-le.o.elf powerpc64-relocs.c -g +*/ + +extern struct { + int i, j; +} data; + +extern int bar (void); + +int +foo (int a) +{ + data.i += a; + data.j -= bar(); +} diff --git a/test/testfiles_for_readelf/reloc_arm_gcc.o.elf b/test/testfiles_for_readelf/reloc_arm_gcc.o.elf new file mode 100644 index 00000000..4d8e4e9f Binary files /dev/null and b/test/testfiles_for_readelf/reloc_arm_gcc.o.elf differ diff --git a/test/testfiles_for_readelf/reloc_armhf_gcc.o.elf b/test/testfiles_for_readelf/reloc_armhf_gcc.o.elf new file mode 100644 index 00000000..320aea3c Binary files /dev/null and b/test/testfiles_for_readelf/reloc_armhf_gcc.o.elf differ diff --git a/test/testfiles_for_readelf/reloc_armsf_gcc.o.elf b/test/testfiles_for_readelf/reloc_armsf_gcc.o.elf new file mode 100755 index 00000000..04c3849f Binary files /dev/null and b/test/testfiles_for_readelf/reloc_armsf_gcc.o.elf differ diff --git a/test/testfiles_for_readelf/simple_armeb_gcc.o.elf b/test/testfiles_for_readelf/simple_armeb_gcc.o.elf new file mode 100755 index 00000000..bd0c38b3 Binary files /dev/null and b/test/testfiles_for_readelf/simple_armeb_gcc.o.elf differ diff --git a/test/testfiles_for_readelf/simple_armhf_gcc.o.elf b/test/testfiles_for_readelf/simple_armhf_gcc.o.elf new file mode 100644 index 00000000..a5a02335 Binary files /dev/null and b/test/testfiles_for_readelf/simple_armhf_gcc.o.elf differ diff --git a/test/testfiles_for_readelf/tls.c b/test/testfiles_for_readelf/tls.c new file mode 100644 index 00000000..781bf99c --- /dev/null +++ b/test/testfiles_for_readelf/tls.c @@ -0,0 +1,8 @@ +// Compile into tls.elf using: +// $ gcc -m32 -o tls.elf tls.c +// For tls64.elf, use: +// $ gcc -m64 -o tls64.elf tls.c + +__thread int i; + +int main(){} diff --git a/test/testfiles_for_readelf/tls.elf b/test/testfiles_for_readelf/tls.elf new file mode 100755 index 00000000..8c9ce8f4 Binary files /dev/null and b/test/testfiles_for_readelf/tls.elf differ diff --git a/test/testfiles_for_readelf/tls64.elf b/test/testfiles_for_readelf/tls64.elf new file mode 100755 index 00000000..ef775387 Binary files /dev/null and b/test/testfiles_for_readelf/tls64.elf differ diff --git a/test/testfiles_for_unittests/aarch64_be_gnu_hash.so.elf b/test/testfiles_for_unittests/aarch64_be_gnu_hash.so.elf new file mode 100755 index 00000000..ed368449 Binary files /dev/null and b/test/testfiles_for_unittests/aarch64_be_gnu_hash.so.elf differ diff --git a/test/testfiles_for_unittests/aranges_absent.elf b/test/testfiles_for_unittests/aranges_absent.elf new file mode 100755 index 00000000..5672d2d1 Binary files /dev/null and b/test/testfiles_for_unittests/aranges_absent.elf differ diff --git a/test/testfiles_for_unittests/aranges_complete.elf b/test/testfiles_for_unittests/aranges_complete.elf new file mode 100755 index 00000000..6767fa72 Binary files /dev/null and b/test/testfiles_for_unittests/aranges_complete.elf differ diff --git a/test/testfiles_for_unittests/aranges_partial.elf b/test/testfiles_for_unittests/aranges_partial.elf new file mode 100755 index 00000000..6ed3ea6b Binary files /dev/null and b/test/testfiles_for_unittests/aranges_partial.elf differ diff --git a/test/testfiles_for_unittests/aranges_partial_a.c b/test/testfiles_for_unittests/aranges_partial_a.c new file mode 100644 index 00000000..47ae2bcc --- /dev/null +++ b/test/testfiles_for_unittests/aranges_partial_a.c @@ -0,0 +1,20 @@ +/* +clang -g -c aranges_partial_a.c +clang -g -gdwarf-aranges -c aranges_partial_b.c +clang -g aranges_partial_{a,b}.o -o aranges_partial.elf + +clang -g -gdwarf-aranges -c aranges_partial_a.c +clang -g -gdwarf-aranges -c aranges_partial_b.c +clang -g aranges_partial_{a,b}.o -o aranges_complete.elf + +clang -g -c aranges_partial_a.c +clang -g -c aranges_partial_b.c +clang -g aranges_partial_{a,b}.o -o aranges_absent.elf +*/ + +extern int test(); + +int main() { + int a = test(); + return a; +} diff --git a/test/testfiles_for_unittests/aranges_partial_b.c b/test/testfiles_for_unittests/aranges_partial_b.c new file mode 100644 index 00000000..496a99d1 --- /dev/null +++ b/test/testfiles_for_unittests/aranges_partial_b.c @@ -0,0 +1,4 @@ +int test() { + int a = 0; + return a; +} diff --git a/test/testfiles_for_unittests/arm64_on_dwarfv2.abbrev.dat b/test/testfiles_for_unittests/arm64_on_dwarfv2.abbrev.dat new file mode 100644 index 00000000..16b2768f Binary files /dev/null and b/test/testfiles_for_unittests/arm64_on_dwarfv2.abbrev.dat differ diff --git a/test/testfiles_for_unittests/arm64_on_dwarfv2.info.dat b/test/testfiles_for_unittests/arm64_on_dwarfv2.info.dat new file mode 100644 index 00000000..b87d3b96 Binary files /dev/null and b/test/testfiles_for_unittests/arm64_on_dwarfv2.info.dat differ diff --git a/test/testfiles_for_unittests/arm64_on_dwarfv2.str.dat b/test/testfiles_for_unittests/arm64_on_dwarfv2.str.dat new file mode 100644 index 00000000..3bea27a7 Binary files /dev/null and b/test/testfiles_for_unittests/arm64_on_dwarfv2.str.dat differ diff --git a/test/testfiles_for_unittests/arm_exidx_test.cpp b/test/testfiles_for_unittests/arm_exidx_test.cpp new file mode 100644 index 00000000..4790cc0f --- /dev/null +++ b/test/testfiles_for_unittests/arm_exidx_test.cpp @@ -0,0 +1,23 @@ +#include +#include + +void func1(int i); + +void func2(int i); + +void func1(int i) { + if (i == 0) + return; + func2(i - 1); +} + +void func2(int i) { + if (i == 0) + return; + func1(i - 1); +} + +int main(int argc, char **argv) { + std::string hello = "Hello from C++"; + std::cout << hello << std::endl; +} diff --git a/test/testfiles_for_unittests/arm_exidx_test.elf b/test/testfiles_for_unittests/arm_exidx_test.elf new file mode 100644 index 00000000..94bb535c Binary files /dev/null and b/test/testfiles_for_unittests/arm_exidx_test.elf differ diff --git a/test/testfiles_for_unittests/arm_exidx_test.o b/test/testfiles_for_unittests/arm_exidx_test.o new file mode 100644 index 00000000..c13b0037 Binary files /dev/null and b/test/testfiles_for_unittests/arm_exidx_test.o differ diff --git a/test/testfiles_for_unittests/arm_exidx_test.so b/test/testfiles_for_unittests/arm_exidx_test.so new file mode 100755 index 00000000..ef45313a Binary files /dev/null and b/test/testfiles_for_unittests/arm_exidx_test.so differ diff --git a/test/testfiles_for_unittests/arm_reloc_relocated.elf b/test/testfiles_for_unittests/arm_reloc_relocated.elf new file mode 100755 index 00000000..f1cfbb8d Binary files /dev/null and b/test/testfiles_for_unittests/arm_reloc_relocated.elf differ diff --git a/test/testfiles_for_unittests/arm_reloc_source.c b/test/testfiles_for_unittests/arm_reloc_source.c new file mode 100644 index 00000000..4ea1c67c --- /dev/null +++ b/test/testfiles_for_unittests/arm_reloc_source.c @@ -0,0 +1,41 @@ +/* Generated by compiling with any LLVM version and +** with any GNU Arm Embedded Toolchain version. +** LLVM 3.8.0/5.0.0 and GNU Arm Embedded Toolchain 2.26 is fine. +** +** clang -O0 --target=arm-none-eabi -emit-llvm -c simple.c -o simple.bc +** llc -O0 -march=arm -filetype=obj simple.bc -o reloc_simple_arm_llvm.o +** arm-none-eabi-ld -e main reloc_simple_arm_llvm.o -o simple_arm_llvm.elf +** +** reloc_simple_arm_llvm.o is ELF file that needs call relocation. +** +** simple_arm_llvm.elf is a relocated ELF file. +*/ + +int add(int a, int b) { + return a + b; +} + +int sub(int a, int b) { + return a - b; +} + +int mul(int a, int b) { + return a * b; +} + +void triple(int a, int b) { + add(a, b); + sub(a, b); + mul(a, b); +} + +int main(void) { + int a = 0xABCD, b = 0x1234; + + add(a, b); + sub(a, b); + mul(a, b); + triple(a, b); + + return 0; +} diff --git a/test/testfiles_for_unittests/arm_reloc_unrelocated.o b/test/testfiles_for_unittests/arm_reloc_unrelocated.o new file mode 100644 index 00000000..a1bfbaa8 Binary files /dev/null and b/test/testfiles_for_unittests/arm_reloc_unrelocated.o differ diff --git a/test/testfiles_for_unittests/core_linux32.elf b/test/testfiles_for_unittests/core_linux32.elf new file mode 100644 index 00000000..bbc5a045 Binary files /dev/null and b/test/testfiles_for_unittests/core_linux32.elf differ diff --git a/test/testfiles_for_unittests/core_linux32_qemu_mips.elf b/test/testfiles_for_unittests/core_linux32_qemu_mips.elf new file mode 100644 index 00000000..6dad0fc0 Binary files /dev/null and b/test/testfiles_for_unittests/core_linux32_qemu_mips.elf differ diff --git a/test/testfiles_for_unittests/debug_info.elf b/test/testfiles_for_unittests/debug_info.elf new file mode 100644 index 00000000..502b9202 Binary files /dev/null and b/test/testfiles_for_unittests/debug_info.elf differ diff --git a/test/testfiles_for_unittests/dwarf_gnuops1.o b/test/testfiles_for_unittests/dwarf_gnuops1.o new file mode 100644 index 00000000..d489f64f Binary files /dev/null and b/test/testfiles_for_unittests/dwarf_gnuops1.o differ diff --git a/test/testfiles_for_unittests/dwarf_v5_forms.debug b/test/testfiles_for_unittests/dwarf_v5_forms.debug new file mode 100755 index 00000000..4f6a402e Binary files /dev/null and b/test/testfiles_for_unittests/dwarf_v5_forms.debug differ diff --git a/test/testfiles_for_unittests/dwarfv5_basic.elf b/test/testfiles_for_unittests/dwarfv5_basic.elf new file mode 100755 index 00000000..4a9363ca Binary files /dev/null and b/test/testfiles_for_unittests/dwarfv5_basic.elf differ diff --git a/test/testfiles_for_unittests/empty_pubtypes/Makefile b/test/testfiles_for_unittests/empty_pubtypes/Makefile new file mode 100644 index 00000000..c53964f7 --- /dev/null +++ b/test/testfiles_for_unittests/empty_pubtypes/Makefile @@ -0,0 +1,2 @@ +main.elf: main.c + gcc -gpubnames -g -O0 main.c -o main.elf diff --git a/test/testfiles_for_unittests/empty_pubtypes/main.c b/test/testfiles_for_unittests/empty_pubtypes/main.c new file mode 100644 index 00000000..ab73b3a2 --- /dev/null +++ b/test/testfiles_for_unittests/empty_pubtypes/main.c @@ -0,0 +1 @@ +void main() {} diff --git a/test/testfiles_for_unittests/empty_pubtypes/main.elf b/test/testfiles_for_unittests/empty_pubtypes/main.elf new file mode 100755 index 00000000..9a3c2c13 Binary files /dev/null and b/test/testfiles_for_unittests/empty_pubtypes/main.elf differ diff --git a/test/testfiles_for_unittests/lambda.elf b/test/testfiles_for_unittests/lambda.elf new file mode 100755 index 00000000..d232a051 Binary files /dev/null and b/test/testfiles_for_unittests/lambda.elf differ diff --git a/test/testfiles_for_unittests/lib_relro.so.elf b/test/testfiles_for_unittests/lib_relro.so.elf new file mode 100755 index 00000000..16b8587c Binary files /dev/null and b/test/testfiles_for_unittests/lib_relro.so.elf differ diff --git a/test/testfiles_for_unittests/pascalenum.o b/test/testfiles_for_unittests/pascalenum.o new file mode 100644 index 00000000..a8520e87 Binary files /dev/null and b/test/testfiles_for_unittests/pascalenum.o differ diff --git a/test/testfiles_for_unittests/simple_mipsel.c b/test/testfiles_for_unittests/simple_mipsel.c new file mode 100644 index 00000000..601434b0 --- /dev/null +++ b/test/testfiles_for_unittests/simple_mipsel.c @@ -0,0 +1,14 @@ +/* Generated by compiling with any GCC version for MIPS Little Endian. +** GCC 5.4.0 is fine. + +/usr/bin/mipsel-linux-gnu-gcc -g -O0 ./simple_mipsel.c -o ./simple_mipsel.elf +*/ + +void main(void) +{ + int a = 1, b = 2, res; + + res = a + b; + + return; +} diff --git a/test/testfiles_for_unittests/simple_mipsel.elf b/test/testfiles_for_unittests/simple_mipsel.elf new file mode 100755 index 00000000..86c25611 Binary files /dev/null and b/test/testfiles_for_unittests/simple_mipsel.elf differ diff --git a/test/testfiles_for_unittests/trailing_null_dies.elf b/test/testfiles_for_unittests/trailing_null_dies.elf new file mode 100644 index 00000000..1bc7f5ee Binary files /dev/null and b/test/testfiles_for_unittests/trailing_null_dies.elf differ diff --git a/test/testfiles_for_unittests/x64_bad_sections.elf b/test/testfiles_for_unittests/x64_bad_sections.elf new file mode 100644 index 00000000..6a30111f Binary files /dev/null and b/test/testfiles_for_unittests/x64_bad_sections.elf differ diff --git a/tox.ini b/tox.ini index 7e9b4846..1c0192f4 100644 --- a/tox.ini +++ b/tox.ini @@ -1,7 +1,9 @@ [tox] -envlist = py27,py34,py35 +envlist = py27,py38 [testenv] +setenv = + LC_ALL = en_US.utf-8 commands = python test/run_all_unittests.py python test/run_examples_test.py