diff --git a/torch_em/data/datasets/cameylon.py b/torch_em/data/datasets/cameylon.py
new file mode 100644
index 00000000..1c596b17
--- /dev/null
+++ b/torch_em/data/datasets/cameylon.py
@@ -0,0 +1,43 @@
+import os
+import warnings
+import numpy as np
+from glob import glob
+
+import openslide
+
+
+def _download_cameylon(path):
+    is_cam16 = os.path.exists(os.path.join(path, "CAMELYON16"))
+    is_cam17 = os.path.exists(os.path.join(path, "CAMELYON17"))
+    if is_cam16 and is_cam17 is True:
+        return
+
+    try:
+        import awscli
+    except ModuleNotFoundError:
+        os.system("pip install awscli")
+
+    warnings.warn("The CAMELYON dataset could take a couple of hours to download the dataset.")
+
+    os.system(f"aws s3 cp --no-sign-request s3://camelyon-dataset/ {path} --recursive")
+
+
+def get_cameylon_dataset(path):
+    """Take a look at two things for histopathology WSI reading:
+        - tiatoolbox - https://tia-toolbox.readthedocs.io/
+        - openslide - (example: https://github.com/computationalpathologygroup/Camelyon16/blob/master/Python/Evaluation_FROC.py)
+    """
+    all_paths = sorted(glob(os.path.join(path, "CAMELYON16", "images", "*")))
+    print(all_paths[-1])
+
+    level = 5  # Image level at which the evaluation is done
+
+    slide = openslide.open_slide(all_paths[-1])
+    dims = slide.level_dimensions[level]
+    pixelarray = np.array(slide.read_region((0, 0), level, dims))
+
+
+def get_cameylon_loader(path):
+    # TODO: get a dataset for creating the dataloader
+    _download_cameylon(path)
+    get_cameylon_dataset(path)