diff --git a/.flake8 b/.flake8
new file mode 100644
index 00000000..2bcd70e3
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,2 @@
+[flake8]
+max-line-length = 88
diff --git a/src/doms_databasen/text_extraction.py b/src/doms_databasen/text_extraction.py
index e98dd3c2..49f1fca1 100644
--- a/src/doms_databasen/text_extraction.py
+++ b/src/doms_databasen/text_extraction.py
@@ -55,11 +55,14 @@ def __init__(self, config: DictConfig):
     def extract_text(self, pdf_path: Path | str) -> str:
         """Extracts text from a PDF using easyocr or pypdf.
 
-        Some text is anonymized with boxes, and some text is anonymized with underlines.
-        This function tries to find these anonymization, read the anonymized text,
+        Some text is anonymized with boxes, and some text
+        is anonymized with underlines.
+        This function tries to find these anonymization,
+        read the anonymized text,
         and then remove the anonymized text from the image before
         reading the rest of the text with easyocr.
-        If a page has no anonymization or tables, the text is read with pypdf.
+        If a page has no anonymization or tables,
+        the text is read with pypdf.
 
         Args:
             pdf_path (Path | str):
@@ -290,7 +293,7 @@ def _extract_underline_anonymization_boxes(self, image: np.ndarray) -> Tuple:
         ]
         return anonymized_boxes_underlines_, underlines
 
-    def _get_images(self, pdf_path: Path | str) -> Mapping[np.ndarray]:
+    def _get_images(self, pdf_path: Path | str) -> List[np.ndarray]:
         """Get images from PDF.
 
         Returns all images from PDF, except if debugging a single page.
@@ -316,10 +319,10 @@ def _get_images(self, pdf_path: Path | str) -> Mapping[np.ndarray]:
                 ),
             )
         else:
-            images = map(np.array, convert_from_path(pdf_path=pdf_path, dpi=DPI))
+            images = list(map(np.array, convert_from_path(pdf_path=pdf_path, dpi=DPI)))
 
         # Grayscale
-        images = map(lambda image: cv2.cvtColor(image, cv2.COLOR_BGR2GRAY), images)
+        images = list(map(lambda image: cv2.cvtColor(image, cv2.COLOR_BGR2GRAY), images))
         return images
 
     def _find_tables(self, image: np.ndarray, read_tables: bool = False) -> List[dict]:
@@ -909,7 +912,7 @@ def _union(self, box_1: dict, box_2: dict) -> float:
         return area_1 + area_2 - self._intersection(box_1=box_1, box_2=box_2)
 
     @staticmethod
-    def _area(box: dict) -> float:
+    def _area(box: dict) -> int:
         """Calculates the area of a box.
 
         Args:
@@ -917,7 +920,7 @@ def _area(box: dict) -> float:
                 Anonymized box with coordinates.
 
         Returns:
-            float:
+            int:
                 Area of the box.
         """
         row_min, col_min, row_max, col_max = box["coordinates"]
@@ -981,7 +984,8 @@ def _on_same_line(self, y: int, y_prev: int) -> bool:
             y_prev (int):
                 y coordinate of top left corner of previous bounding box.
             max_y_difference (int):
-                Maximum difference between y coordinates of two bounding boxes on the same line.
+                Maximum difference between y coordinates of two
+                bounding boxes on the same line.
 
         Returns:
             bool:
@@ -1730,20 +1734,6 @@ def _remove_inner_boxes(self, boxes: List[dict]) -> List[dict]:
                 boxes_.append(box)
         return boxes_
 
-    def _area(self, box: dict) -> int:
-        """Calculates the area of a box.
-
-        Args:
-            box (dict):
-                Anonymized box with coordinates.
-
-        Returns:
-            int:
-                Area of the box.
-        """
-        row_min, col_min, row_max, col_max = box["coordinates"]
-        return (row_max - row_min) * (col_max - col_min)
-
     def _inner_box(self, boxes: List[dict], box: dict) -> bool:
         """Determine if box is inside another box.
 
@@ -1844,7 +1834,8 @@ def _process_crop_before_read(
         scale = self._get_scale(box_length=box_length)
         crop_scaled = self._scale_image(image=crop_refined, scale=scale)
 
-        # Ensure that highest pixel value is 255, else sharpening might not work as expected.
+        # Ensure that highest pixel value is 255, else 
+        # sharpening might not work as expected.
         crop_scaled = np.array(crop_scaled / crop_scaled.max() * 255, dtype=np.uint8)
 
         crop_boundary = self._add_boundary(
@@ -2060,7 +2051,8 @@ def _remove_black_border(self, blob_image: np.ndarray) -> np.ndarray:
         return blob_image
 
     def _split_blob_to_multiple_boxes(self, blob: RegionProperties) -> List[dict]:
-        """This function is called if a blob is not splitted correctly with initial methods.
+        """This function is called if a blob is not splitted 
+        correctly with initial methods.
 
         Args:
             blob (RegionProperties):
@@ -2462,8 +2454,10 @@ def _remove_boundary_noise(
     ) -> np.ndarray:
         """Removes noise on the boundary of an anonymized box.
 
-        All white pixels in a perfect bounding box should be a pixel of a relevant character.
-        Some images have white pixel defect at the boundary of the bounding box, and
+        All white pixels in a perfect bounding box 
+        should be a pixel of a relevant character.
+        Some images have white pixel defect at the 
+        boundary of the bounding box, and
         this function removes those white pixels.
 
         Args:
@@ -2517,7 +2511,8 @@ def _too_few_pixels(self, blob: RegionProperties, touches_boundary: bool) -> boo
 
         Returns:
             bool:
-                True if blob has too few pixels to be a relevant character. False otherwise.
+                True if blob has too few pixels to 
+                be a relevant character. False otherwise.
         """
         coords = blob.coords
         return (
@@ -2540,7 +2535,8 @@ def _low_longest_distance_from_boundary(
 
         Returns:
             bool:
-                True if blob has a low longest distance from the boundary of the image. False otherwise.
+                True if blob has a low longest distance from the 
+                boundary of the image. False otherwise.
         """
         n = min(crop.shape)
         return self._maximum_distance_from_boundary(crop=crop, blob=blob) < n * 0.3
@@ -2550,9 +2546,12 @@ def _maximum_distance_from_boundary(
     ) -> float:
         """Get maximum distance from blob to boundary of image.
 
-        E.g. if the minimum distance from the blob to the top boundary of the image is 5,
-        and the minimum distance from the blob to the bottom boundary of the image is 10,
-        to the left boundary is 3, and to the right boundary is 7, then the maximum distance
+        E.g. if the minimum distance from the blob to 
+        the top boundary of the image is 5,
+        and the minimum distance from the blob to 
+        the bottom boundary of the image is 10,
+        to the left boundary is 3, and to the right 
+        boundary is 7, then the maximum distance
         from the blob to the boundary of the image is 10.
 
         Used in _remove_boundary_noise to determine if a blob is noise or not.
@@ -2838,6 +2837,20 @@ def _read_text_with_tika(pdf_path: str) -> str:
         except:
             pass
         return text.strip()
+    
+    def _get_text_from_pages(pages: dict) -> str:
+        """Get text from pages.
+
+        Args:
+            pages (dict):
+                Pages with text and extraction method.
+
+        Returns:
+            pdf_text (str):
+                Text from pages.
+        """
+        pdf_text = "\n\n".join(page["text"] for page in pages.values())
+        return pdf_text
 
 
 # This class is not used, but is kept for future reference.