diff --git a/playa/converter.py b/playa/converter.py
index bd35ddf2..b0e22948 100644
--- a/playa/converter.py
+++ b/playa/converter.py
@@ -9,6 +9,7 @@
 
 from playa.layout import (
     LAParams,
+    LTComponent,
     LTChar,
     LTCurve,
     LTFigure,
@@ -21,7 +22,8 @@
 from playa.pdfcolor import PDFColorSpace
 from playa.pdfdevice import PDFTextDevice
 from playa.pdffont import PDFFont, PDFUnicodeNotDefined
-from playa.pdfinterp import PDFGraphicState, PDFResourceManager
+from playa.pdfinterp import PDFGraphicState, PDFResourceManager, PDFStackT
+from playa.psparser import PSLiteral
 from playa.pdfpage import PDFPage
 from playa.pdftypes import PDFStream
 from playa.utils import (
@@ -30,6 +32,7 @@
     Point,
     Rect,
     apply_matrix_pt,
+    decode_text,
     mult_matrix,
 )
 
@@ -39,6 +42,8 @@
 class PDFLayoutAnalyzer(PDFTextDevice):
     cur_item: LTLayoutContainer
     ctm: Matrix
+    cur_mcid: Optional[int] = None
+    cur_tag: Optional[str] = None
 
     def __init__(
         self,
@@ -76,6 +81,24 @@ def end_figure(self, _: str) -> None:
         self.cur_item = self._stack.pop()
         self.cur_item.add(fig)
 
+    def begin_tag(self, tag: PSLiteral, props: Optional[PDFStackT] = None) -> None:
+        """Handle beginning of tag, setting current MCID if any."""
+        self.cur_tag = decode_text(tag.name)
+        if isinstance(props, dict) and "MCID" in props:
+            self.cur_mcid = props["MCID"]
+        else:
+            self.cur_mcid = None
+
+    def end_tag(self) -> None:
+        """Handle beginning of tag, clearing current MCID."""
+        self.cur_tag = None
+        self.cur_mcid = None
+
+    def add_item(self, item: LTComponent) -> None:
+        item.mcid = self.cur_mcid
+        item.tag = self.cur_tag
+        self.cur_item.add(item)
+
     def render_image(self, name: str, stream: PDFStream) -> None:
         assert isinstance(self.cur_item, LTFigure), str(type(self.cur_item))
         item = LTImage(
@@ -83,7 +106,7 @@ def render_image(self, name: str, stream: PDFStream) -> None:
             stream,
             (self.cur_item.x0, self.cur_item.y0, self.cur_item.x1, self.cur_item.y1),
         )
-        self.cur_item.add(item)
+        self.add_item(item)
 
     def paint_path(
         self,
@@ -92,6 +115,8 @@ def paint_path(
         fill: bool,
         evenodd: bool,
         path: Sequence[PathSegment],
+        ncs: Optional[PDFColorSpace] = None,
+        scs: Optional[PDFColorSpace] = None,
     ) -> None:
         """Paint paths described in section 4.4 of the PDF reference manual"""
         shape = "".join(x[0] for x in path)
@@ -109,7 +134,7 @@ def paint_path(
             # recurse if there are multiple m's in this shape
             for m in re.finditer(r"m[^m]+", shape):
                 subpath = path[m.start(0) : m.end(0)]
-                self.paint_path(gstate, stroke, fill, evenodd, subpath)
+                self.paint_path(gstate, stroke, fill, evenodd, subpath, ncs, scs)
 
         else:
             # Although the 'h' command does not not literally provide a
@@ -153,8 +178,9 @@ def paint_path(
                     gstate.ncolor,
                     original_path=transformed_path,
                     dashing_style=gstate.dash,
+                    ncs=ncs, scs=scs
                 )
-                self.cur_item.add(line)
+                self.add_item(line)
 
             elif shape in {"mlllh", "mllll"}:
                 (x0, y0), (x1, y1), (x2, y2), (x3, y3), _ = pts
@@ -174,8 +200,9 @@ def paint_path(
                         gstate.ncolor,
                         transformed_path,
                         gstate.dash,
+                        ncs, scs
                     )
-                    self.cur_item.add(rect)
+                    self.add_item(rect)
                 else:
                     curve = LTCurve(
                         gstate.linewidth,
@@ -187,8 +214,9 @@ def paint_path(
                         gstate.ncolor,
                         transformed_path,
                         gstate.dash,
+                        ncs, scs
                     )
-                    self.cur_item.add(curve)
+                    self.add_item(curve)
             else:
                 curve = LTCurve(
                     gstate.linewidth,
@@ -200,8 +228,9 @@ def paint_path(
                     gstate.ncolor,
                     transformed_path,
                     gstate.dash,
+                    ncs, scs
                 )
-                self.cur_item.add(curve)
+                self.add_item(curve)
 
     def render_char(
         self,
@@ -211,8 +240,9 @@ def render_char(
         scaling: float,
         rise: float,
         cid: int,
-        ncs: PDFColorSpace,
         graphicstate: PDFGraphicState,
+        ncs: Optional[PDFColorSpace] = None,
+        scs: Optional[PDFColorSpace] = None,
     ) -> float:
         try:
             text = font.to_unichr(cid)
@@ -230,10 +260,11 @@ def render_char(
             text,
             textwidth,
             textdisp,
-            ncs,
             graphicstate,
+            ncs,
+            scs,
         )
-        self.cur_item.add(item)
+        self.add_item(item)
         return item.adv
 
     def handle_undefined_char(self, font: PDFFont, cid: int) -> str:
diff --git a/playa/data_structures.py b/playa/data_structures.py
index fab26c84..a43705b6 100644
--- a/playa/data_structures.py
+++ b/playa/data_structures.py
@@ -1,4 +1,4 @@
-from typing import Any, Iterable, List, Optional, Tuple
+from typing import Any, Dict, Iterator, List, Tuple
 
 from playa import settings
 from playa.pdfparser import PDFSyntaxError
@@ -6,41 +6,45 @@
 from playa.utils import choplist
 
 
+def walk_number_tree(tree: Dict[str, Any]) -> Iterator[Tuple[int, Any]]:
+    stack = [tree]
+    while stack:
+        item = dict_value(stack.pop())
+        if "Nums" in item:
+            for k, v in choplist(2, list_value(item["Nums"])):
+                yield int_value(k), v
+        if "Kids" in item:
+            stack.extend(reversed(list_value(item["Kids"])))
+
+
 class NumberTree:
     """A PDF number tree.
 
-    See Section 3.8.6 of the PDF Reference.
+    See Section 7.9.7 of the PDF 1.7 Reference.
     """
 
     def __init__(self, obj: Any):
         self._obj = dict_value(obj)
-        self.nums: Optional[Iterable[Any]] = None
-        self.kids: Optional[Iterable[Any]] = None
-        self.limits: Optional[Iterable[Any]] = None
 
-        if "Nums" in self._obj:
-            self.nums = list_value(self._obj["Nums"])
-        if "Kids" in self._obj:
-            self.kids = list_value(self._obj["Kids"])
-        if "Limits" in self._obj:
-            self.limits = list_value(self._obj["Limits"])
+    def __iter__(self) -> Iterator[Tuple[int, Any]]:
+        return walk_number_tree(self._obj)
 
-    def _parse(self) -> List[Tuple[int, Any]]:
-        items = []
-        if self.nums:  # Leaf node
-            for k, v in choplist(2, self.nums):
-                items.append((int_value(k), v))
+    def __contains__(self, num) -> bool:
+        for idx, val in self:
+            if idx == num:
+                return True
+        return False
 
-        if self.kids:  # Root or intermediate node
-            for child_ref in self.kids:
-                items += NumberTree(child_ref)._parse()
-
-        return items
+    def __getitem__(self, num) -> Any:
+        for idx, val in self:
+            if idx == num:
+                return val
+        raise IndexError(f"Number {num} not in tree")
 
     @property
     def values(self) -> List[Tuple[int, Any]]:
-        values = self._parse()
-
+        values = list(self)
+        # NOTE: They are supposed to be sorted! (but, I suppose, often aren't)
         if settings.STRICT:
             if not all(a[0] <= b[0] for a, b in zip(values, values[1:])):
                 raise PDFSyntaxError("Number tree elements are out of order")
diff --git a/playa/layout.py b/playa/layout.py
index 72de7e64..a3d93cf2 100644
--- a/playa/layout.py
+++ b/playa/layout.py
@@ -126,6 +126,11 @@ def __repr__(self) -> str:
 class LTItem:
     """Interface for things that can be analyzed"""
 
+    # Any item could be in a marked content section
+    mcid: Optional[int] = None
+    # Which could have a tag
+    tag: Optional[str] = None
+
     def analyze(self, laparams: LAParams) -> None:
         """Perform the layout analysis."""
 
@@ -234,9 +239,13 @@ def __init__(
         non_stroking_color: Optional[Color] = None,
         original_path: Optional[List[PathSegment]] = None,
         dashing_style: Optional[Tuple[object, object]] = None,
+        ncs: Optional[PDFColorSpace] = None,
+        scs: Optional[PDFColorSpace] = None,
     ) -> None:
         LTComponent.__init__(self, get_bound(pts))
         self.pts = pts
+        self.ncs = ncs
+        self.scs = scs
         self.linewidth = linewidth
         self.stroke = stroke
         self.fill = fill
@@ -268,6 +277,8 @@ def __init__(
         non_stroking_color: Optional[Color] = None,
         original_path: Optional[List[PathSegment]] = None,
         dashing_style: Optional[Tuple[object, object]] = None,
+        ncs: Optional[PDFColorSpace] = None,
+        scs: Optional[PDFColorSpace] = None,
     ) -> None:
         LTCurve.__init__(
             self,
@@ -280,6 +291,7 @@ def __init__(
             non_stroking_color,
             original_path,
             dashing_style,
+            ncs, scs,
         )
 
 
@@ -300,6 +312,8 @@ def __init__(
         non_stroking_color: Optional[Color] = None,
         original_path: Optional[List[PathSegment]] = None,
         dashing_style: Optional[Tuple[object, object]] = None,
+        ncs: Optional[PDFColorSpace] = None,
+        scs: Optional[PDFColorSpace] = None,
     ) -> None:
         (x0, y0, x1, y1) = bbox
         LTCurve.__init__(
@@ -313,6 +327,7 @@ def __init__(
             non_stroking_color,
             original_path,
             dashing_style,
+            ncs, scs,
         )
 
 
@@ -365,14 +380,16 @@ def __init__(
         text: str,
         textwidth: float,
         textdisp: Union[float, Tuple[Optional[float], float]],
-        ncs: PDFColorSpace,
         graphicstate: PDFGraphicState,
+        ncs: Optional[PDFColorSpace] = None,
+        scs: Optional[PDFColorSpace] = None,
     ) -> None:
         LTText.__init__(self)
         self._text = text
         self.matrix = matrix
         self.fontname = font.fontname
         self.ncs = ncs
+        self.scs = scs
         self.graphicstate = graphicstate
         self.adv = textwidth * fontsize * scaling
         # compute the boundary rectangle.
diff --git a/playa/pdfdevice.py b/playa/pdfdevice.py
index becb6658..def0c828 100644
--- a/playa/pdfdevice.py
+++ b/playa/pdfdevice.py
@@ -79,6 +79,8 @@ def paint_path(
         fill: bool,
         evenodd: bool,
         path: Sequence[PathSegment],
+        ncs: Optional[PDFColorSpace] = None,
+        scs: Optional[PDFColorSpace] = None,
     ) -> None:
         pass
 
@@ -89,8 +91,9 @@ def render_string(
         self,
         textstate: "PDFTextState",
         seq: PDFTextSeq,
-        ncs: PDFColorSpace,
         graphicstate: "PDFGraphicState",
+        ncs: Optional[PDFColorSpace] = None,
+        scs: Optional[PDFColorSpace] = None,
     ) -> None:
         pass
 
@@ -100,8 +103,9 @@ def render_string(
         self,
         textstate: "PDFTextState",
         seq: PDFTextSeq,
-        ncs: PDFColorSpace,
         graphicstate: "PDFGraphicState",
+        ncs: Optional[PDFColorSpace] = None,
+        scs: Optional[PDFColorSpace] = None,
     ) -> None:
         assert self.ctm is not None
         matrix = utils.mult_matrix(textstate.matrix, self.ctm)
@@ -127,8 +131,9 @@ def render_string(
                 wordspace,
                 rise,
                 dxscale,
-                ncs,
                 graphicstate,
+                ncs,
+                scs,
             )
         else:
             textstate.linematrix = self.render_string_horizontal(
@@ -142,8 +147,9 @@ def render_string(
                 wordspace,
                 rise,
                 dxscale,
-                ncs,
                 graphicstate,
+                ncs,
+                scs,
             )
 
     def render_string_horizontal(
@@ -158,8 +164,9 @@ def render_string_horizontal(
         wordspace: float,
         rise: float,
         dxscale: float,
-        ncs: PDFColorSpace,
         graphicstate: "PDFGraphicState",
+        ncs: Optional[PDFColorSpace] = None,
+        scs: Optional[PDFColorSpace] = None,
     ) -> Point:
         (x, y) = pos
         needcharspace = False
@@ -182,8 +189,9 @@ def render_string_horizontal(
                         scaling,
                         rise,
                         cid,
-                        ncs,
                         graphicstate,
+                        ncs,
+                        scs,
                     )
                     if cid == 32 and wordspace:
                         x += wordspace
@@ -202,8 +210,9 @@ def render_string_vertical(
         wordspace: float,
         rise: float,
         dxscale: float,
-        ncs: PDFColorSpace,
         graphicstate: "PDFGraphicState",
+        ncs: Optional[PDFColorSpace] = None,
+        scs: Optional[PDFColorSpace] = None,
     ) -> Point:
         (x, y) = pos
         needcharspace = False
@@ -226,8 +235,9 @@ def render_string_vertical(
                         scaling,
                         rise,
                         cid,
-                        ncs,
                         graphicstate,
+                        ncs,
+                        scs,
                     )
                     if cid == 32 and wordspace:
                         y += wordspace
@@ -242,8 +252,9 @@ def render_char(
         scaling: float,
         rise: float,
         cid: int,
-        ncs: PDFColorSpace,
         graphicstate: "PDFGraphicState",
+        ncs: Optional[PDFColorSpace] = None,
+        scs: Optional[PDFColorSpace] = None,
     ) -> float:
         return 0
 
@@ -265,8 +276,9 @@ def render_string(
         self,
         textstate: "PDFTextState",
         seq: PDFTextSeq,
-        ncs: PDFColorSpace,
         graphicstate: "PDFGraphicState",
+        ncs: Optional[PDFColorSpace] = None,
+        scs: Optional[PDFColorSpace] = None,
     ) -> None:
         font = textstate.font
         assert font is not None
diff --git a/playa/pdfdocument.py b/playa/pdfdocument.py
index 17e83566..5f592286 100644
--- a/playa/pdfdocument.py
+++ b/playa/pdfdocument.py
@@ -1116,35 +1116,43 @@ def read_xref_from(
 class PageLabels(NumberTree):
     """PageLabels from the document catalog.
 
-    See Section 8.3.1 in the PDF Reference.
+    See Section 12.4.2 in the PDF 1.7 Reference.
     """
 
     @property
     def labels(self) -> Iterator[str]:
-        ranges = self.values
-
-        # The tree must begin with page index 0
-        if len(ranges) == 0 or ranges[0][0] != 0:
+        itor = iter(self)
+        try:
+            start, label_dict_unchecked = next(itor)
+            # The tree must begin with page index 0
+            if start != 0:
+                if settings.STRICT:
+                    raise PDFSyntaxError("PageLabels is missing page index 0")
+                else:
+                    # Try to cope, by assuming empty labels for the initial pages
+                    start = 0
+        except StopIteration:
             if settings.STRICT:
-                raise PDFSyntaxError("PageLabels is missing page index 0")
-            else:
-                # Try to cope, by assuming empty labels for the initial pages
-                ranges.insert(0, (0, {}))
+                raise PDFSyntaxError("PageLabels is empty")
+            start = 0
+            label_dict_unchecked = {}
 
-        for next, (start, label_dict_unchecked) in enumerate(ranges, 1):
+        while True:  # forever!
             label_dict = dict_value(label_dict_unchecked)
             style = label_dict.get("S")
             prefix = decode_text(str_value(label_dict.get("P", b"")))
             first_value = int_value(label_dict.get("St", 1))
 
-            if next == len(ranges):
+            try:
+                next_start, label_dict_unchecked = next(itor)
+            except StopIteration:
                 # This is the last specified range. It continues until the end
                 # of the document.
                 values: Iterable[int] = itertools.count(first_value)
             else:
-                end, _ = ranges[next]
-                range_length = end - start
+                range_length = next_start - start
                 values = range(first_value, first_value + range_length)
+                start = next_start
 
             for value in values:
                 label = self._format_page_label(value, style)
diff --git a/playa/pdfinterp.py b/playa/pdfinterp.py
index 52b19932..78371869 100644
--- a/playa/pdfinterp.py
+++ b/playa/pdfinterp.py
@@ -553,7 +553,7 @@ def do_re(self, x: PDFStackT, y: PDFStackT, w: PDFStackT, h: PDFStackT) -> None:
 
     def do_S(self) -> None:
         """Stroke path"""
-        self.device.paint_path(self.graphicstate, True, False, False, self.curpath)
+        self.device.paint_path(self.graphicstate, True, False, False, self.curpath, self.ncs, self.scs)
         self.curpath = []
 
     def do_s(self) -> None:
@@ -563,7 +563,7 @@ def do_s(self) -> None:
 
     def do_f(self) -> None:
         """Fill path using nonzero winding number rule"""
-        self.device.paint_path(self.graphicstate, False, True, False, self.curpath)
+        self.device.paint_path(self.graphicstate, False, True, False, self.curpath, self.ncs, self.scs)
         self.curpath = []
 
     def do_F(self) -> None:
@@ -571,17 +571,17 @@ def do_F(self) -> None:
 
     def do_f_a(self) -> None:
         """Fill path using even-odd rule"""
-        self.device.paint_path(self.graphicstate, False, True, True, self.curpath)
+        self.device.paint_path(self.graphicstate, False, True, True, self.curpath, self.ncs, self.scs)
         self.curpath = []
 
     def do_B(self) -> None:
         """Fill and stroke path using nonzero winding number rule"""
-        self.device.paint_path(self.graphicstate, True, True, False, self.curpath)
+        self.device.paint_path(self.graphicstate, True, True, False, self.curpath, self.ncs, self.scs)
         self.curpath = []
 
     def do_B_a(self) -> None:
         """Fill and stroke path using even-odd rule"""
-        self.device.paint_path(self.graphicstate, True, True, True, self.curpath)
+        self.device.paint_path(self.graphicstate, True, True, True, self.curpath, self.ncs, self.scs)
         self.curpath = []
 
     def do_b(self) -> None:
@@ -865,12 +865,15 @@ def do_TJ(self, seq: PDFStackT) -> None:
             if settings.STRICT:
                 raise PDFInterpreterError("No font specified!")
             return
+        # FIXME: Are we sure?
         assert self.ncs is not None
+        assert self.scs is not None
         self.device.render_string(
             self.textstate,
             cast(PDFTextSeq, seq),
-            self.ncs,
             self.graphicstate.copy(),
+            self.ncs,
+            self.scs,
         )
 
     def do_Tj(self, s: PDFStackT) -> None:
diff --git a/playa/pdfstructtree.py b/playa/pdfstructtree.py
index 88352e24..7d3c32bd 100644
--- a/playa/pdfstructtree.py
+++ b/playa/pdfstructtree.py
@@ -144,60 +144,68 @@ class PDFStructTree(Findable):
     """Parse the structure tree of a PDF.
 
     This class creates a representation of the portion of the
-    structure tree that reaches marked content sections, either for a
-    single page, or for the whole document.  Note that this is slightly
-    different from the behaviour of other PDF libraries which will
-    also include structure elements with no content.
+    structure tree that reaches marked content sections for a document
+    or a subset of its pages.  Note that this is slightly different
+    from the behaviour of other PDF libraries which will also include
+    structure elements with no content.
 
     If the PDF has no structure, the constructor will raise
     `PDFNoStructTree`.
 
+    Args:
+      doc: Document from which to extract structure tree
+      pages: List of (number, page) pairs - numbers will be used to
+             identify pages in the tree through the `page_number`
+             attribute of `PDFStructElement`.
     """
 
     page: Union[PDFPage, None]
 
-    def __init__(self, doc: "PDFDocument", page: Union[PDFPage, None] = None):
+    def __init__(
+        self,
+        doc: "PDFDocument",
+        pages: Union[Iterable[Tuple[Union[int, None], PDFPage]], None] = None,
+    ):
         if "StructTreeRoot" not in doc.catalog:
             raise PDFNoStructTree("Catalog has no 'StructTreeRoot' entry")
         self.root = resolve1(doc.catalog["StructTreeRoot"])
         self.role_map = resolve1(self.root.get("RoleMap", {}))
         self.class_map = resolve1(self.root.get("ClassMap", {}))
         self.children: List[PDFStructElement] = []
+        self.page_dict: Dict[Any, Union[int, None]]
 
-        # If we have a specific page then we will work backwards from
-        # its ParentTree - this is because structure elements could
-        # span multiple pages, and the "Pg" attribute is *optional*,
-        # so this is the approved way to get a page's structure...
-        if page is not None:
-            self.page = page
-            self.page_dict = None
-            # ...EXCEPT that the ParentTree is sometimes missing, in which
-            # case we fall back to the non-approved way.
+        if pages is None:
+            self.page_dict = {
+                page.pageid: idx + 1 for idx, page in enumerate(doc.get_pages())
+            }
+            self._parse_struct_tree()
+        else:
+            pagelist = list(pages)
+            self.page_dict = {
+                page.pageid: page_number for page_number, page in pagelist
+            }
             parent_tree_obj = self.root.get("ParentTree")
-            if parent_tree_obj is None:
-                self._parse_struct_tree()
-            else:
+            # If we have a single page then we will work backwards from
+            # its ParentTree - this is because structure elements could
+            # span multiple pages, and the "Pg" attribute is *optional*,
+            # so this is the approved way to get a page's structure...
+            if len(pagelist) == 1 and parent_tree_obj is not None:
+                _, page = pagelist[0]
                 parent_tree = NumberTree(parent_tree_obj)
                 # If there is no marked content in the structure tree for
                 # this page (which can happen even when there is a
                 # structure tree) then there is no `StructParents`.
                 # Note however that if there are XObjects in a page,
                 # *they* may have `StructParent` (not `StructParents`)
-                if "StructParents" not in self.page.attrs:
+                if "StructParents" not in page.attrs:
                     return
-                parent_id = self.page.attrs["StructParents"]
-                # NumberTree should have a `get` method like it does in pdf.js...
-                parent_array = resolve1(
-                    next(array for num, array in parent_tree.values if num == parent_id)
-                )
+                parent_id = page.attrs["StructParents"]
+                parent_array = resolve1(parent_tree[parent_id])
                 self._parse_parent_tree(parent_array)
-        else:
-            self.page = None
-            # Overhead of creating pages shouldn't be too bad we hope!
-            self.page_dict = {
-                page.pageid: idx + 1 for idx, page in enumerate(doc.get_pages())
-            }
-            self._parse_struct_tree()
+            else:
+                # ...EXCEPT that the ParentTree is sometimes missing, in which
+                # case we fall back to the non-approved way.
+                self._parse_struct_tree()
 
     def _make_attributes(
         self, obj: Dict[str, Any], revision: Union[int, None]
@@ -327,13 +335,7 @@ def on_parsed_page(self, obj: Dict[str, Any]) -> bool:
         if "Pg" not in obj:
             return True
         page_objid = obj["Pg"].objid
-        if self.page_dict is not None:
-            return page_objid in self.page_dict
-        if self.page is not None:
-            # We have to do this to satisfy mypy
-            if page_objid != self.page.pageid:
-                return False
-        return True
+        return page_objid in self.page_dict
 
     def _parse_struct_tree(self) -> None:
         """Populate the structure tree starting from the root, skipping
diff --git a/playa/utils.py b/playa/utils.py
index a35b58cf..ab536066 100644
--- a/playa/utils.py
+++ b/playa/utils.py
@@ -630,13 +630,15 @@ def nunpack(s: bytes, default: int = 0) -> int:
 )
 
 
-def decode_text(s: bytes) -> str:
+def decode_text(s: Union[str, bytes]) -> str:
     """Decodes a PDFDocEncoding string to Unicode."""
     if isinstance(s, bytes) and s.startswith(b"\xfe\xff"):
         return str(s[2:], "utf-16be", "ignore")
     try:
-        ords = (ord(c) if isinstance(c, str) else c for c in s)
-        return "".join(PDFDocEncoding[o] for o in ords)
+        if isinstance(s, str):
+            return "".join(PDFDocEncoding[ord(c)] for c in s)
+        else:
+            return "".join(PDFDocEncoding[c] for c in s)
     except IndexError:
         return str(s)
 
diff --git a/tests/test_pdfstructtree.py b/tests/test_pdfstructtree.py
index 968427fd..1f3ff650 100644
--- a/tests/test_pdfstructtree.py
+++ b/tests/test_pdfstructtree.py
@@ -13,7 +13,7 @@ class TestClass(unittest.TestCase):
 
     def test_structure_tree_class(self):
         with playa.open(TESTDIR / "image_structure.pdf") as pdf:
-            stree = PDFStructTree(pdf, next(pdf.get_pages()))
+            stree = PDFStructTree(pdf, [(1, next(pdf.get_pages()))])
             doc_elem = next(iter(stree))
             assert [k.type for k in doc_elem] == ["P", "P", "Figure"]
 
@@ -22,7 +22,7 @@ def test_find_all_tree(self):
         Test find_all() and find() on trees
         """
         with playa.open(TESTDIR / "image_structure.pdf") as pdf:
-            stree = PDFStructTree(pdf, next(pdf.get_pages()))
+            stree = PDFStructTree(pdf, [(1, next(pdf.get_pages()))])
             figs = list(stree.find_all("Figure"))
             assert len(figs) == 1
             fig = stree.find("Figure")
@@ -68,13 +68,11 @@ def test_all_mcids(self):
             assert 1 in pages
             assert 2 in pages
 
-            page = list(pdf.get_pages())[1]
-            stree = PDFStructTree(pdf, page)
+            pages = list(pdf.get_pages())
+            stree = PDFStructTree(pdf, [(2, pages[1])])
             sect = next(stree.find_all("Sect"))
             mcids = list(sect.all_mcids())
             pages = set(page for page, mcid in mcids)
-            assert None in pages
-            assert 1 not in pages
-            assert 2 not in pages
+            assert pages == {2}
             for p in sect.find_all("P"):
                 assert set(mcid for page, mcid in p.all_mcids()) == set(p.mcids)
diff --git a/tests/test_pdftypes.py b/tests/test_pdftypes.py
new file mode 100644
index 00000000..9a4393bb
--- /dev/null
+++ b/tests/test_pdftypes.py
@@ -0,0 +1,41 @@
+"""
+Test PDF types and data structures.
+"""
+
+from playa.data_structures import NumberTree
+
+
+NUMTREE1 = {
+    "Kids": [
+        {"Nums": [1, "a", 3, "b", 7, "c"], "Limits": [1, 7]},
+        {
+            "Kids": [
+                {"Nums": [8, 123, 9, {"x": "y"}, 10, "forty-two"], "Limits": [8, 10]},
+                {"Nums": [11, "zzz", 12, "xxx", 15, "yyy"], "Limits": [11, 15]},
+            ],
+            "Limits": [8, 15],
+        },
+        {"Nums": [20, 456], "Limits": [20, 20]},
+    ]
+}
+
+
+def test_number_tree():
+    """Test NumberTrees."""
+    nt = NumberTree(NUMTREE1)
+    assert 15 in nt
+    assert 20 in nt
+    assert nt[20] == 456
+    assert nt[9] == {"x": "y"}
+    assert list(nt) == [
+        (1, "a"),
+        (3, "b"),
+        (7, "c"),
+        (8, 123),
+        (9, {"x": "y"}),
+        (10, "forty-two"),
+        (11, "zzz"),
+        (12, "xxx"),
+        (15, "yyy"),
+        (20, 456),
+    ]