diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 55ab63e..8aed826 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -16,7 +16,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.8", "3.9", "3.10", "3.11"]
+        python-version: ["3.9", "3.10", "3.11"]
 
     steps:
     - uses: actions/checkout@v3
diff --git a/README.md b/README.md
index 7de83c6..628ae82 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
 [![Python package](https://github.com/UUDigitalHumanitieslab/ianalyzer-readers/actions/workflows/python-package.yml/badge.svg)](https://github.com/UUDigitalHumanitieslab/ianalyzer-readers/actions/workflows/python-package.yml)
 [![Documentation Status](https://readthedocs.org/projects/ianalyzer-readers/badge/?version=latest)](https://ianalyzer-readers.readthedocs.io/en/latest/?badge=latest)
 
-`ianalyzer-readers` is a python module to extract data from XML, HTML, CSV, XLSX or TTL files.
+`ianalyzer-readers` is a python module to extract data from XML, HTML, CSV, JSON, XLSX or RDF (Linked Data) files.
 
 This module was originally created for [I-analyzer](https://github.com/UUDigitalHumanitieslab/I-analyzer), a web application that extracts data from a variety of datasets, indexes them and presents a search interface. To do this, we wanted a way to extract data from source files without having to write a new script "from scratch" for each dataset, and an API that would work the same regardless of the source file type.
 
@@ -11,7 +11,7 @@ The basic usage is that you will use the utilities in this package to create a "
 
 ## Prerequisites
 
-Requires Python 3.8 or later.
+Requires Python 3.9 or later.
 
 ## Contents
 
@@ -25,7 +25,7 @@ Our primary use for this package is to pre-process data for I-analyzer, but you
 
 Using this package makes sense if you want to extract data in the shape that it is designed for (i.e., a list of flat dictionaries).
 
-What we find especially useful is that all subclasses of `Reader` have the same interface - regardless of whether they are processing CSV, XML, HTML, or XLSX data. That common interface is crucial in an application that needs to process corpora from different source types, like I-analyzer.
+What we find especially useful is that all subclasses of `Reader` have the same interface - regardless of whether they are processing CSV, JSON, XML, HTML, RDF or XLSX data. That common interface is crucial in an application that needs to process corpora from different source types, like I-analyzer.
 
 ## Usage
 
diff --git a/docs/api.md b/docs/api.md
index 6833b96..5859d25 100644
--- a/docs/api.md
+++ b/docs/api.md
@@ -36,6 +36,12 @@ __Module:__ `ianalyzer_readers.readers.rdf`
 
 ::: ianalyzer_readers.readers.rdf
 
+## JSON reader
+
+__Module:__ `ianalyzer_readers.readers.json`
+
+::: ianalyzer_readers.readers.json
+
 ## Extractors
 
 __Module:__ `ianalyzer_readers.extract`
diff --git a/ianalyzer_readers/extract.py b/ianalyzer_readers/extract.py
index c6d9599..82d29a6 100644
--- a/ianalyzer_readers/extract.py
+++ b/ianalyzer_readers/extract.py
@@ -467,6 +467,7 @@ def format(self, value):
         if value and value not in self.convert_to_none:
             return value
 
+
 class ExternalFile(Extractor):
     '''
     Free for all external file extractor that provides a stream to `stream_handler`
@@ -491,6 +492,29 @@ def _apply(self, metadata, *nargs, **kwargs):
         return self.stream_handler(open(metadata['associated_file'], 'r'))
 
 
+class JSON(Extractor):
+    '''
+    An extractor to extract data from JSON.
+    This extractor assumes that each source is dictionary without nested lists.
+    When working with nested lists, use JSONReader to unnest.
+
+    Parameters:
+        keys (Iterable[str]): the keys with which to retrieve a field value from the source
+    '''
+
+    def __init__(self, *keys, **kwargs):
+        self.keys = list(keys)
+        super().__init__(**kwargs)
+
+    def _apply(self, data: Union[str, dict], key_index: int = 0, **kwargs) -> str:
+        key = self.keys[key_index]
+        data = data.get(key)
+        if len(self.keys) > key_index + 1:
+            key_index += 1
+            return self._apply(data, key_index)
+        return data
+
+
 class RDF(Extractor):
     """An extractor to extract data from RDF triples
 
diff --git a/ianalyzer_readers/readers/core.py b/ianalyzer_readers/readers/core.py
index ba175b9..1a3f7eb 100644
--- a/ianalyzer_readers/readers/core.py
+++ b/ianalyzer_readers/readers/core.py
@@ -12,18 +12,25 @@
 import logging
 import csv
 
+from requests import Response
+
 logging.basicConfig(level=logging.WARNING)
-logging.getLogger('ianalyzer-readers').setLevel(logging.DEBUG)
+logger = logging.getLogger('ianalyzer-readers').setLevel(logging.DEBUG)
+
+SourceData = Union[str, Response, bytes]
+'''Type definition of the data types a Reader method can handle.'''
 
-Source = Union[str, Tuple[Union[str, bytes], Dict], bytes]
+Source = Union[SourceData, Tuple[SourceData, Dict]]
 '''
 Type definition for the source input to some Reader methods.
 
 Sources are either:
 
 - a string with the path to a filename
-- a tuple containing a path to a filename, and a dictionary with metadata
-- binary data with the file contents. This is not supported on all Reader subclasses.
+- binary data with the file contents. This is not supported on all Reader subclasses
+- a requests.Response
+- a tuple of one of the above, and a dictionary with metadata
+
 '''
 
 Document = Dict[str, Any]
diff --git a/ianalyzer_readers/readers/json.py b/ianalyzer_readers/readers/json.py
new file mode 100644
index 0000000..a2cba3a
--- /dev/null
+++ b/ianalyzer_readers/readers/json.py
@@ -0,0 +1,153 @@
+'''
+This module defines the JSONReader.
+
+It can parse documents nested in one file, for which it uses the pandas library,
+or multiple files with one document each, which use the generic Python json parser.
+'''
+
+import json
+from os.path import isfile
+from typing import Iterable, List, Optional, Union
+
+from pandas import json_normalize
+from requests import Response
+
+from .core import Reader, Document, Source
+import ianalyzer_readers.extract as extract
+
+class JSONReader(Reader):
+    '''
+    A base class for Readers of JSON encoded data.
+
+    The reader can either be used on a collection of JSON files (`single_document=True`), in which each file represents a document,
+    or for a JSON file containing lists of documents.
+
+    If the attributes `record_path` and `meta` are set, they are used as arguments to [pandas.json_normalize](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.json_normalize.html) to unnest the JSON data.
+
+    Attributes:
+        single_document: indicates whether the data is organized such that a file represents a single document
+        record_path: a path or list of paths by which a list of documents can be extracted from a large JSON file; irrelevant if `single_document = True`
+        meta: a list of paths, or list of lists of paths, by which metadata common for all documents can be located; irrelevant if `single_document = True`
+    """
+
+    Examples:
+        ### Multiple documents in one file:
+        ```python
+        example_data = {
+            'path': {
+                'sketch': 'Hungarian Phrasebook',
+                'episode': 25,
+                'to': {
+                    'records':
+                        [
+                            {'speech': 'I will not buy this record. It is scratched.', 'character': 'tourist'},
+                            {'speech': "No sir. This is a tobacconist's.", 'character': 'tobacconist'}
+                        ]
+                }
+            }
+        }
+
+        MyJSONReader(JSONReader):
+            record_path = ['path', 'to', 'records']
+            meta = [['path', 'sketch'], ['path', 'episode']]
+
+            speech = Field('speech', JSON('speech'))
+            character = Field('character', JSON('character'))
+            sketch = Field('sketch', JSON('path.sketch'))
+            episode = Field('episode', JSON('path.episode'))
+        ```
+        To define the paths used to extract the field values, consider the dataformat the `pandas.json_normalize` creates:
+        a table with each row representing a document, and columns corresponding to paths, either relative to documents within `record_path`,
+        or relative to the top level (`meta`), with list of paths indicated by dots.
+        ```csv
+        row,speech,character,path.sketch,path.episode
+        0,"I will not buy this record. It is scratched.","tourist","Hungarian Phrasebook",25
+        1,"No sir. This is a tobacconist's.","tobacconist","Hungarian Phrasebook",25
+        ```
+
+        ### Single document per file:
+        ```python
+        example_data = {
+            'sketch': 'Hungarian Phrasebook',
+            'episode': 25,
+            'scene': {
+                'character': 'tourist',
+                'speech': 'I will not buy this record. It is scratched.'
+            }
+        }
+
+        MyJSONReader(JSONReader):
+            single_document = True
+
+            speech = Field('speech', JSON('scene', 'speech'))
+            character = Field('character', JSON('scene', 'character))
+            sketch = Field('sketch', JSON('sketch'))
+            episode = Field('episode', JSON('episode))
+        ```
+
+    '''
+
+    single_document: bool = False
+    '''
+    set to `True` if the data is structured such that one document is encoded in one .json file
+    in that case, the reader assumes that there are no lists in such a file
+    '''
+
+    record_path: Optional[List[str]] = None
+    '''
+    a keyword or list of keywords by which a list of documents can be extracted from a large JSON file.
+    Only relevant if `single_document=False`.
+    '''
+
+    meta: Optional[List[Union[str, List[str]]]] = None
+    '''
+    a list of keywords, or list of lists of keywords, by which metadata for each document can be located,
+    if it is in a different path than `record_path`. Only relevant if `single_document=False`.
+    '''
+
+    def source2dicts(self, source: Source, *nargs, **kwargs) -> Iterable[Document]:
+        """
+        Given a Python dictionary, returns an iterable of extracted documents.
+
+        Parameters:
+            source: the input data
+
+        Returns:
+            list of documents
+        """
+        if isinstance(source, tuple):
+            metadata = source[1]
+            json_data = self._get_json_data(source[0])
+        else:
+            metadata = None
+            json_data = self._get_json_data(source)
+
+        if not self.single_document:
+            documents = json_normalize(
+                json_data, record_path=self.record_path, meta=self.meta
+            ).to_dict('records')
+        else:
+            documents = [json_data]
+
+        self._reject_extractors(extract.XML, extract.CSV, extract.RDF)
+
+        for doc in documents:
+            field_dict = {
+                field.name: field.extractor.apply(
+                    doc, metadata=metadata, *nargs, **kwargs
+                )
+                for field in self.fields
+            }
+
+            yield field_dict
+
+    def _get_json_data(self, source: Source) -> dict:
+        if isfile(source):
+            with open(source, "r") as f:
+                return json.load(f)
+        elif isinstance(source, Response):
+            return source.json()
+        elif isinstance(source, bytes):
+            return json.loads(source)
+        else:
+            raise Exception("Unexpected source type for JSON Reader")
diff --git a/ianalyzer_readers/readers/rdf.py b/ianalyzer_readers/readers/rdf.py
index 755532d..544618c 100644
--- a/ianalyzer_readers/readers/rdf.py
+++ b/ianalyzer_readers/readers/rdf.py
@@ -35,8 +35,8 @@ def source2dicts(self, source: Source) -> Iterable[Document]:
                 are based on the extractor of each field.
         '''
         self._reject_extractors(extract.CSV, extract.XML)
-        
-        if type(source) == bytes:
+
+        if isinstance(source, bytes):
             raise Exception('The current reader cannot handle sources of bytes type, provide a file path as string instead')
         try:
             (filename, metadata) = source
@@ -45,12 +45,14 @@ def source2dicts(self, source: Source) -> Iterable[Document]:
             metadata = None
 
         logger.info(f"parsing {filename}")
-        g = self.parse_graph_from_filename(filename)
-        
+        g = self.parse_graph_from_filename(
+            filename
+        )  # TODO: we could also allow Response as source data here, but that would mean the response would also need to include information of the data format, see [this example](https://github.com/RDFLib/rdflib/blob/4.1.2/rdflib/graph.py#L209)
+
         document_subjects = self.document_subjects(g)
         for subject in document_subjects:
             yield self._document_from_subject(g, subject, metadata)
-    
+
     def parse_graph_from_filename(self, filename: str) -> Graph:
         ''' Read a RDF file as indicated by source, return a graph 
         Override this function to parse multiple source files into one graph
@@ -64,7 +66,7 @@ def parse_graph_from_filename(self, filename: str) -> Graph:
         g = Graph()
         g.parse(filename)
         return g
-            
+
     def document_subjects(self, graph: Graph) -> Iterable[Union[BNode, Literal, URIRef]]:
         ''' Override this function to return all subjects (i.e., first part of RDF triple) 
         with which to search for data in the RDF graph.
diff --git a/ianalyzer_readers/readers/xml.py b/ianalyzer_readers/readers/xml.py
index 048a2b4..e9122af 100644
--- a/ianalyzer_readers/readers/xml.py
+++ b/ianalyzer_readers/readers/xml.py
@@ -7,6 +7,7 @@
 import bs4
 import logging
 from os.path import isfile
+from requests import Response
 from typing import Dict, Iterable, Tuple, List
 
 from .. import extract
@@ -178,13 +179,20 @@ def _filename_soup_and_metadata_from_source(self, source: Source) -> Tuple[str,
             soup = self._soup_from_data(source)
             filename = None
             metadata = {}
+        elif isinstance(source, Response):
+            soup = self._soup_from_data(source.text)
+            filename = None
+            metadata = {}
         else:
-            if isfile(source[0]):
+            if isinstance(source[0], str):
                 filename = source[0]
                 soup = self._soup_from_xml(filename)
             else:
                 filename = None
-                soup = self._soup_from_data(source[0])
+                if isinstance(source[0], bytes):
+                    soup = self._soup_from_data(source[0])
+                elif isinstance(source[0], Response):
+                    soup = self._soup_from_data(source[0].text)
             metadata = source[1] or None
         return filename, soup, metadata
 
diff --git a/pyproject.toml b/pyproject.toml
index f52f069..26cf5a6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,6 +16,8 @@ dependencies = [
   "beautifulsoup4",
   "lxml",
   "openpyxl",
+  "pandas",
+  "requests",
   "rdflib",
 ]
 
diff --git a/requirements.txt b/requirements.txt
index 4ac765f..575d362 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,6 +6,10 @@
 #
 beautifulsoup4==4.12.3
     # via ianalyzer_readers (setup.py)
+certifi==2024.8.30
+    # via requests
+charset-normalizer==3.4.0
+    # via requests
 click==8.1.7
     # via
     #   mkdocs
@@ -20,6 +24,8 @@ ghp-import==2.1.0
     # via mkdocs
 griffe==0.42.0
     # via mkdocstrings-python
+idna==3.10
+    # via requests
 iniconfig==2.0.0
     # via pytest
 isodate==0.6.1
@@ -56,12 +62,16 @@ mkdocstrings==0.24.1
     # via mkdocstrings-python
 mkdocstrings-python==1.9.0
     # via ianalyzer_readers (setup.py)
+numpy==2.1.3
+    # via pandas
 openpyxl==3.1.2
     # via ianalyzer_readers (setup.py)
 packaging==24.0
     # via
     #   mkdocs
     #   pytest
+pandas==2.2.3
+    # via ianalyzer_readers (setup.py)
 pathspec==0.12.1
     # via mkdocs
 platformdirs==4.2.0
@@ -77,7 +87,11 @@ pyparsing==3.1.2
 pytest==8.1.1
     # via ianalyzer_readers (setup.py)
 python-dateutil==2.9.0.post0
-    # via ghp-import
+    # via
+    #   ghp-import
+    #   pandas
+pytz==2024.2
+    # via pandas
 pyyaml==6.0.1
     # via
     #   mkdocs
@@ -87,6 +101,8 @@ pyyaml-env-tag==0.1
     # via mkdocs
 rdflib==7.0.0
     # via ianalyzer_readers (setup.py)
+requests==2.32.3
+    # via ianalyzer_readers (setup.py)
 six==1.16.0
     # via
     #   isodate
@@ -95,5 +111,9 @@ soupsieve==2.5
     # via beautifulsoup4
 tomli==2.0.1
     # via pytest
+tzdata==2024.2
+    # via pandas
+urllib3==2.2.3
+    # via requests
 watchdog==4.0.0
     # via mkdocs
diff --git a/tests/json/data/Macbeth.json b/tests/json/data/Macbeth.json
new file mode 100644
index 0000000..b290e09
--- /dev/null
+++ b/tests/json/data/Macbeth.json
@@ -0,0 +1,61 @@
+{"TITLE":"ACT I",
+"SCENE":[
+  {
+    "TITLE":"SCENE I.  A desert place.",
+    "STAGEDIR":[
+       "Thunder and lightning. Enter three Witches",
+       "Exeunt"
+    ],
+    "SPEECH":[
+       {
+          "SPEAKER":"First Witch",
+          "LINE":[
+             "When shall we three meet again",
+             "In thunder, lightning, or in rain?"
+          ]
+       },
+       {
+          "SPEAKER":"Second Witch",
+          "LINE":[
+             "When the hurlyburly's done,",
+             "When the battle's lost and won."
+          ]
+       },
+       {
+          "SPEAKER":"Third Witch",
+          "LINE":"That will be ere the set of sun."
+       },
+       {
+          "SPEAKER":"First Witch",
+          "LINE":"Where the place?"
+       },
+       {
+          "SPEAKER":"Second Witch",
+          "LINE":"Upon the heath."
+       },
+       {
+          "SPEAKER":"Third Witch",
+          "LINE":"There to meet with Macbeth."
+       },
+       {
+          "SPEAKER":"First Witch",
+          "LINE":"I come, Graymalkin!"
+       },
+         {
+            "SPEAKER":"Second Witch",
+            "LINE":"Paddock calls."
+         },
+         {
+            "SPEAKER":"Third Witch",
+            "LINE":"Anon."
+         },
+      {
+        "SPEAKER":"ALL",
+        "LINE":[
+           "Fair is foul, and foul is fair:",
+           "Hover through the fog and filthy air."
+        ]
+      }
+    ]
+  }]
+}
\ No newline at end of file
diff --git a/tests/json/json_reader.py b/tests/json/json_reader.py
new file mode 100644
index 0000000..3ad67c5
--- /dev/null
+++ b/tests/json/json_reader.py
@@ -0,0 +1,69 @@
+from glob import glob
+import json
+import os
+from typing import Union
+
+from ianalyzer_readers.extract import JSON
+from ianalyzer_readers.readers.core import Field
+from ianalyzer_readers.readers.json import JSONReader
+
+
+def merge_lines(lines: Union[list, str]) -> str:
+    if isinstance(lines, list):
+        return "\n".join(lines)
+    return lines
+
+
+class JSONDocumentReader(JSONReader):
+    """
+    Example reader that would operate on corpora with one json file per document
+    """
+
+    data_directory = os.path.join(os.path.dirname(__file__), "data")
+    single_document = True
+
+    def sources(self, **kwargs):
+        for i in range(1):
+            data = json.dumps(
+                {
+                    "TITLE": "ACT I",
+                    "SCENE": {
+                        "TITLE": "SCENE I.  A desert place.",
+                        "STAGEDIR": [
+                            "Thunder and lightning. Enter three Witches",
+                            "Exeunt",
+                        ],
+                        "SPEECH": {
+                            "SPEAKER": "First Witch",
+                        },
+                    },
+                }
+            )
+            yield data.encode('utf-8')
+
+    act = Field("act", JSON("TITLE"))
+    character = Field("character", JSON("SCENE", "SPEECH", "SPEAKER"))
+    scene = Field("scene", JSON("SCENE", "TITLE"))
+
+    fields = [act, character, scene]
+
+
+class JSONMultipleDocumentReader(JSONReader):
+    """
+    Example JSON reader for testing parsing arrays in JSON, using JSON data from https://github.com/tux255/analyzing-shakespeare
+    """
+    data_directory = os.path.join(os.path.dirname(__file__), "data")
+    record_path = ["SCENE", "SPEECH"]
+    meta = ["TITLE", ["SCENE", "TITLE"], ["SCENE", "STAGEDIR"]]
+
+    def sources(self, **kwargs):
+        for filename in glob(f"{self.data_directory}/*.json"):
+            yield filename
+
+    act = Field("act", JSON("TITLE"))
+    scene = Field("scene", JSON("SCENE.TITLE"))
+    character = Field("character", JSON("SPEAKER"))
+    lines = Field("lines", JSON("LINE", transform=merge_lines))
+    stage_dir = Field("stage_direction", JSON("SCENE.STAGEDIR", transform=merge_lines))
+
+    fields = [act, scene, character, lines, stage_dir]
diff --git a/tests/test_json_reader.py b/tests/test_json_reader.py
new file mode 100644
index 0000000..d8111cb
--- /dev/null
+++ b/tests/test_json_reader.py
@@ -0,0 +1,43 @@
+from tests.json.json_reader import JSONDocumentReader, JSONMultipleDocumentReader
+
+expected = [
+    {
+        'act': 'ACT I',
+        'scene': 'SCENE I.  A desert place.',
+        'stage_direction': 'Thunder and lightning. Enter three Witches\nExeunt',
+        'character': 'First Witch',
+        'lines': 'When shall we three meet again\nIn thunder, lightning, or in rain?',
+    },
+    *[{}] * 8,
+    {
+        'act': 'ACT I',
+        'scene': 'SCENE I.  A desert place.',
+        'stage_direction': 'Thunder and lightning. Enter three Witches\nExeunt',
+        'character': 'ALL',
+        'lines': "Fair is foul, and foul is fair:\nHover through the fog and filthy air.",
+    },
+]
+
+
+def test_json_parse_single_document():
+    reader = JSONDocumentReader()
+    docs = list(reader.documents())
+    assert len(docs) == 1
+    assert docs[0].get('act') == 'ACT I'
+    assert docs[0].get('character') == 'First Witch'
+    assert docs[0].get('scene') == 'SCENE I.  A desert place.'
+
+
+def test_json_parse_multiple_documents():
+    '''test that JSON reader can parse multiple documents from an array in a single file'''
+    reader = JSONMultipleDocumentReader()
+    docs = list(reader.documents())
+    assert len(docs) == len(expected)
+    _assert_matches(expected[0], docs[0])
+    _assert_matches(expected[-1], docs[-1])
+
+
+def _assert_matches(target: dict, doc: dict):
+    assert len(target.keys()) == len(doc.keys())
+    for key in target.keys():
+        assert doc.get(key) == target.get(key)
diff --git a/tests/xml/test_xml_reader.py b/tests/xml/test_xml_reader.py
index 262505c..a0288a4 100644
--- a/tests/xml/test_xml_reader.py
+++ b/tests/xml/test_xml_reader.py
@@ -1,5 +1,7 @@
 import os
 
+import requests
+
 from ianalyzer_readers.readers.xml import XMLReader
 from ianalyzer_readers.readers.core import Field
 from ianalyzer_readers.extract import XML
@@ -44,6 +46,16 @@ def sources(self, **kwargs):
 
     fields = [title, character, lines]
 
+url_list = ['mock_path']
+
+
+class HamletXMLResponseReader(HamletXMLReader):
+    def sources(self, **kwargs):
+        for document_url in url_list:
+            response = requests.get(document_url)
+            yield response
+
+
 target_documents = [
     {
         'title': 'Hamlet',
@@ -94,3 +106,22 @@ def test_xml_reader():
 
     for doc, target in zip(docs, target_documents):
         assert doc == target
+
+
+class MockResponse(requests.Response):
+
+    @property
+    def text(self):
+        test_directory = os.path.dirname(__file__)
+        filename = os.path.join(test_directory, 'data', 'hamlet.xml')
+        with open(filename, "r") as f:
+            return f.read()
+
+
+def test_xml_response_reader(monkeypatch):
+    monkeypatch.setattr(requests, "get", lambda x: MockResponse())
+    reader = HamletXMLResponseReader()
+    docs = reader.documents()
+
+    for doc, target in zip(docs, target_documents):
+        assert doc == target