diff --git a/geospaas_harvesting/crawlers.py b/geospaas_harvesting/crawlers.py
index 262fb062..e0cad613 100644
--- a/geospaas_harvesting/crawlers.py
+++ b/geospaas_harvesting/crawlers.py
@@ -99,17 +99,17 @@ class WebDirectoryCrawler(Crawler):
         f'^.*/{YEAR_PATTERN}/{MONTH_PATTERN}/{DAY_OF_MONTH_PATTERN}/.*$')
     DAY_OF_YEAR_MATCHER = re.compile(f'^.*/{YEAR_PATTERN}/{DAY_OF_YEAR_PATTERN}(/.*)?$')
 
-    def __init__(self, root_url, time_range=(None, None), excludes=None):
+    def __init__(self, root_url, time_range=(None, None), include=None):
         """
         `root_url` is the URL of the data repository to explore.
         `time_range` is a 2-tuple of datetime.datetime objects defining the time range
         of the datasets returned by the crawler.
-        `excludes` is the list of string that are the associated url is ignored during
-        the harvesting process if these strings are found in the crawled url.
+        `include` is a regular expression string used to filter the crawler's output.
+        Only URLs matching it are returned.
         """
         self.root_url = urlparse(root_url)
         self.time_range = time_range
-        self.excludes = (self.EXCLUDE or []) + (excludes or [])
+        self.include = re.compile(include) if include else None
         self.set_initial_state()
 
     @property
@@ -209,10 +209,6 @@ def _is_folder(self, path):
         """Returns True if path points to a folder"""
         raise NotImplementedError("_is_folder is abstract in WebDirectoryCrawler")
 
-    def _is_file(self, path):
-        """Returns True if path points to a file"""
-        raise NotImplementedError("_is_file is abstract in WebDirectoryCrawler")
-
     def _add_url_to_return(self, path):
         """
         Add a URL to the list of URLs returned by the crawler after
@@ -233,15 +229,20 @@ def _add_folder_to_process(self, path):
                 self._to_process.append(path)
 
     def _process_folder(self, folder_path):
-        """Get the contents of a folder and feed the _urls and _to_process attributes"""
+        """
+        Get the contents of a folder and feed the _urls (based on includes) and _to_process
+        attributes
+        """
         self.LOGGER.info("Looking for resources in '%s'...", folder_path)
         for path in self._list_folder_contents(folder_path):
-            # Select paths which do not contain any of the self.excludes strings
-            if all(excluded_string not in path for excluded_string in self.excludes):
-                if self._is_folder(path):
-                    self._add_folder_to_process(path)
-                elif self._is_file(path):
-                    self._add_url_to_return(path)
+            # deselect paths which contains any of the excludes strings
+            if self.EXCLUDE and self.EXCLUDE.search(path):
+                continue
+            if self._is_folder(path):
+                self._add_folder_to_process(path)
+            # select paths which are matched based on input config file
+            if self.include and self.include.search(path):
+                self._add_url_to_return(path)
 
     def get_download_url(self, resource_url):
         """
@@ -264,9 +265,6 @@ def _list_folder_contents(self, folder_path):
     def _is_folder(self, path):
         return os.path.isdir(path)
 
-    def _is_file(self, path):
-        return os.path.isfile(path)
-
 
 class HTMLDirectoryCrawler(WebDirectoryCrawler):
     """Implementation of WebDirectoryCrawler for repositories exposed as HTML pages."""
@@ -285,9 +283,6 @@ def _strip_folder_page(folder_path):
     def _is_folder(self, path):
         return path.endswith(self.FOLDERS_SUFFIXES)
 
-    def _is_file(self, path):
-        return path.endswith(self.FILES_SUFFIXES)
-
     @classmethod
     def _get_links(cls, html):
         """Returns the list of links contained in an HTML page, passed as a string"""
@@ -325,7 +320,7 @@ class OpenDAPCrawler(HTMLDirectoryCrawler):
     LOGGER = logging.getLogger(__name__ + '.OpenDAPCrawler')
     FOLDERS_SUFFIXES = ('/contents.html',)
     FILES_SUFFIXES = ('.nc', '.nc.gz')
-    EXCLUDE = ['?']
+    EXCLUDE = re.compile(r'\?')
 
 
 class ThreddsCrawler(HTMLDirectoryCrawler):
@@ -335,7 +330,7 @@ class ThreddsCrawler(HTMLDirectoryCrawler):
     LOGGER = logging.getLogger(__name__ + '.ThreddsCrawler')
     FOLDERS_SUFFIXES = ('/catalog.html',)
     FILES_SUFFIXES = ('.nc',)
-    EXCLUDE = ['/thredds/catalog.html']
+    EXCLUDE = re.compile(r'/thredds/catalog.html$')
 
     def get_download_url(self, resource_url):
         result = None
@@ -354,18 +349,17 @@ class FTPCrawler(WebDirectoryCrawler):
     """
     LOGGER = logging.getLogger(__name__ + '.FTPCrawler')
 
-    def __init__(self, root_url, time_range=(None, None), excludes=None,
-                 username='anonymous', password='anonymous', files_suffixes=''):
+    def __init__(self, root_url, time_range=(None, None), include=None,
+                 username='anonymous', password='anonymous'):
 
         if not root_url.startswith('ftp://'):
             raise ValueError("The root url must start with 'ftp://'")
 
         self.username = username
         self.password = password
-        self.files_suffixes = files_suffixes
         self.ftp = None
 
-        super().__init__(root_url, time_range, excludes)
+        super().__init__(root_url, time_range, include)
 
     def set_initial_state(self):
         """
@@ -433,9 +427,6 @@ def _is_folder(self, path):
         else:
             return True
 
-    def _is_file(self, path):
-        return path.endswith(self.files_suffixes)
-
 
 class HTTPPaginatedAPICrawler(Crawler):
     """Base class for crawlers used on repositories exposing a paginated API over HTTP"""
diff --git a/geospaas_harvesting/harvest.yml b/geospaas_harvesting/harvest.yml
index 56fa0f97..3d76127c 100644
--- a/geospaas_harvesting/harvest.yml
+++ b/geospaas_harvesting/harvest.yml
@@ -3,43 +3,61 @@
 #  dump_on_interruption: False
 #  poll_interval: 600
 #  harvesters:
-#    OSISAF:
-#      class: 'OSISAFHarvester'
-#      max_fetcher_threads: 30
-#      # We exclude "EASE-Grid map projections" and "southern hemispheres" from harvesting process
-#      excludes: ['ease', '_sh_polstere',]
-#      max_db_threads: 1
-#      urls:
-#        - 'https://thredds.met.no/thredds/catalog/osisaf/met.no/ice/amsr2_conc/catalog.html'
-#        - 'https://thredds.met.no/thredds/catalog/osisaf/met.no/ice/conc/catalog.html'
-#    podaac:
-#      class: 'PODAACHarvester'
-#      max_fetcher_threads: 30
-#      max_db_threads: 1
-#      urls:
-#        - 'https://opendap.jpl.nasa.gov/opendap/allData/ghrsst/data/GDS2/L2P/VIIRS_NPP/NAVO/v1/2014/005/contents.html'
-#        - 'https://opendap.jpl.nasa.gov/opendap/allData/ghrsst/data/GDS2/L2P/VIIRS_N20/'
-#        - 'https://opendap.jpl.nasa.gov/opendap/allData/ghrsst/data/GDS2/L2P/VIIRS_NPP/'
-#        - 'https://opendap.jpl.nasa.gov/opendap/allData/ghrsst/data/GDS2/L2P/MODIS_A/'
-#    copernicus_sentinel:
-#      class: 'CopernicusSentinelHarvester'
-#      max_fetcher_threads: 30
-#      max_db_threads: 1
-#      url: 'https://scihub.copernicus.eu/apihub/search'
-#      search_terms:
-#        - 'platformname:Sentinel-1 AND NOT L0'
-#        - 'platformname:Sentinel-2 AND NOT L0'
-#        - 'platformname:Sentinel-3 AND NOT L0'
-#      username: 'username'
-#      # Environment variable name
-#      password: !ENV 'COPERNICUS_OPEN_HUB_PASSWORD'
-#    FTP_jaxa:
-#      class: 'FTPHarvester'
-#      max_fetcher_threads: 30
-#      max_db_threads: 1
-#      username: username
-#      password: !ENV 'JAXA_PASSWORD'
-#      fileformat: '.h5'
-#      urls:
-#        - 'ftp://ftp.gportal.jaxa.jp/standard/GCOM-W/GCOM-W.AMSR2/L3.SST_25/3/2012/07/'
+#   radarsat_local:
+#     class: 'LOCALHarvester'
+#     include: 'RS2_\w+(?!.)'
+#     max_fetcher_threads: 1
+#     max_db_threads: 1
+#     paths:
+#       - "/src/sample/test_multi_nansat"
+#   FTP_jaxa:
+#    class: 'FTPHarvester'
+#    max_fetcher_threads: 1
+#    max_db_threads: 1
+#    username: username
+#    password: !ENV 'JAXA_PASSWORD'
+#    include: '\.h5$'
+#    urls:
+#      - 'ftp://ftp.gportal.jaxa.jp/standard/GCOM-W/GCOM-W.AMSR2/L3.SST_25/3/2012/07/'
+#   OSISAF:
+#     class: 'OSISAFHarvester'
+#     max_fetcher_threads: 1
+#     # We include "_nh_polstere" in order to only harvest the northen-hemisphere data
+#     include: '_nh_polstere'
+#     max_db_threads: 1
+#     #time_range:
+#     #   - !ENV HARVESTING_START_TIME
+#     #   - !ENV HARVESTING_END_TIME
+#     urls:
+#       - 'https://thredds.met.no/thredds/catalog/osisaf/met.no/ice/amsr2_conc/catalog.html'
+#       - 'https://thredds.met.no/thredds/catalog/osisaf/met.no/ice/conc/catalog.html'
+#       - 'https://thredds.met.no/thredds/catalog/osisaf/met.no/ice/type/catalog.html'
+#       - 'https://thredds.met.no/thredds/catalog/osisaf/met.no/ice/drift_mr/catalog.html'
+#       - 'https://thredds.met.no/thredds/catalog/osisaf/met.no/ice/drift_lr/merged/catalog.html'
+#
+#   podaac:
+#     class: 'PODAACHarvester'
+#     max_fetcher_threads: 1
+#     max_db_threads: 1
+#     include: '\.nc$|\.h5$'
+#     urls:
+#         - 'https://opendap.jpl.nasa.gov/opendap/allData/ghrsst/data/GDS2/L2P/VIIRS_NPP/NAVO/v1/2014/005/contents.html'
+#         - 'https://opendap.jpl.nasa.gov/opendap/allData/ghrsst/data/GDS2/L2P/VIIRS_N20/'
+#         - 'https://opendap.jpl.nasa.gov/opendap/allData/ghrsst/data/GDS2/L2P/VIIRS_NPP/'
+#         - 'https://opendap.jpl.nasa.gov/opendap/allData/ghrsst/data/GDS2/L2P/MODIS_A/'
+#   copernicus_sentinel:
+#    class: 'CopernicusSentinelHarvester'
+#    max_fetcher_threads: 30
+#    max_db_threads: 1
+#    include: '.*'
+#    #time_range:
+#    # - !ENV HARVESTING_START_TIME
+#    # - !ENV HARVESTING_END_TIME
+#    url: 'https://scihub.copernicus.eu/apihub/search'
+#    search_terms:
+#       - 'platformname:Sentinel-1 AND NOT L0'
+#       - 'platformname:Sentinel-2 AND NOT L0'
+#       - 'platformname:Sentinel-3 AND NOT L0'
+#    username: "username"
+#    password: !ENV COPERNICUS_OPEN_HUB_PASSWORD
 ...
diff --git a/geospaas_harvesting/harvesters.py b/geospaas_harvesting/harvesters.py
index 0296be7f..74817107 100644
--- a/geospaas_harvesting/harvesters.py
+++ b/geospaas_harvesting/harvesters.py
@@ -95,10 +95,10 @@ class for harvesting online data sources that rely on webpages (and most of the
 
     def __init__(self, **config):
         super().__init__(**config)
-        if 'excludes' in config:
-            if not isinstance(config['excludes'], list):
+        if 'include' in config:
+            if not isinstance(config['include'], str):
                 raise HarvesterConfigurationError(
-                    "'excludes' field must be fed with a python list of excluded names ")
+                    "The 'include' field must be fed with a regex matching URLs to include")
 
     def _create_crawlers(self):
         if self.crawler is None:
@@ -107,7 +107,7 @@ def _create_crawlers(self):
         try:
             return [
                 self.crawler(url, time_range=(self.get_time_range()),
-                             excludes=self.config.get('excludes', None))
+                             include=self.config.get('include', None))
                 for url in self.config['urls']
             ]
         except TypeError as error:
@@ -150,9 +150,8 @@ def _create_crawlers(self):
                 root_url=url,
                 username=self.config.get('username', None),
                 password=self.config.get('password'),
-                files_suffixes=self.config.get('fileformat', None),
                 time_range=(self.get_time_range()),
-                excludes=self.config.get('excludes', None)
+                include=self.config.get('include', None)
             )
             for url in self.config['urls']
         ]
@@ -200,3 +199,17 @@ def _create_ingester(self):
             if parameter_name in self.config:
                 parameters[parameter_name] = self.config[parameter_name]
         return ingesters.CreodiasEOFinderIngester(**parameters)
+
+
+class LOCALHarvester(WebDirectoryHarvester):
+    """ Harvester class for some specific local files """
+    def _create_crawlers(self):
+        return [
+            crawlers.LocalDirectoryCrawler(
+                url,
+                include = self.config.get('include', None),
+                time_range = self.get_time_range()
+                )
+            for url in self.config['paths']
+        ]
+    ingester = ingesters.NansatIngester
diff --git a/geospaas_harvesting/ingesters.py b/geospaas_harvesting/ingesters.py
index fdcd74be..ea58f796 100644
--- a/geospaas_harvesting/ingesters.py
+++ b/geospaas_harvesting/ingesters.py
@@ -11,6 +11,7 @@
 import uuid
 import xml.etree.ElementTree as ET
 from urllib.parse import urlparse
+from dateutil.tz import tzutc
 import dateutil.parser
 import django.db
 import django.db.utils
@@ -28,7 +29,7 @@
                                           ISOTopicCategory, Location, Parameter, Platform)
 from nansat import Nansat
 from metanorm.handlers import GeospatialMetadataHandler
-
+from metanorm.utils import get_cf_or_wkv_standard_name
 logging.getLogger(__name__).addHandler(logging.NullHandler())
 
 
@@ -520,6 +521,16 @@ def _get_normalized_attributes(self, dataset_info, *args, **kwargs):
         normalized_attributes = {}
         n_points = int(kwargs.get('n_points', 10))
         nansat_options = kwargs.get('nansat_options', {})
+        url_scheme = urlparse(dataset_info).scheme
+        if not 'http' in url_scheme and not 'ftp' in url_scheme:
+            normalized_attributes['geospaas_service_name'] = FILE_SERVICE_NAME
+            normalized_attributes['geospaas_service'] = LOCAL_FILE_SERVICE
+        elif 'http' in url_scheme and not 'ftp' in url_scheme:
+            normalized_attributes['geospaas_service_name'] = DAP_SERVICE_NAME
+            normalized_attributes['geospaas_service'] = OPENDAP_SERVICE
+        elif 'ftp' in url_scheme:
+            raise ValueError("LOCALHarvester (which uses NansatIngester) is only for local file"
+        " addresses or http addresses, not for ftp protocol")
 
         # Open file with Nansat
         nansat_object = Nansat(nansat_filename(dataset_info), **nansat_options)
@@ -527,22 +538,13 @@ def _get_normalized_attributes(self, dataset_info, *args, **kwargs):
         # get metadata from Nansat and get objects from vocabularies
         n_metadata = nansat_object.get_metadata()
 
-        # set service info attributes
-        url_scheme = urlparse(dataset_info).scheme
-        if 'http' in url_scheme:
-            normalized_attributes['geospaas_service_name'] = DAP_SERVICE_NAME
-            normalized_attributes['geospaas_service'] = OPENDAP_SERVICE
-        else:
-            normalized_attributes['geospaas_service_name'] = FILE_SERVICE_NAME
-            normalized_attributes['geospaas_service'] = LOCAL_FILE_SERVICE
-
         # set compulsory metadata (source)
         normalized_attributes['entry_title'] = n_metadata.get('entry_title', 'NONE')
         normalized_attributes['summary'] = n_metadata.get('summary', 'NONE')
         normalized_attributes['time_coverage_start'] = dateutil.parser.parse(
-            n_metadata['time_coverage_start'])
+            n_metadata['time_coverage_start']).replace(tzinfo=tzutc())
         normalized_attributes['time_coverage_end'] = dateutil.parser.parse(
-            n_metadata['time_coverage_end'])
+            n_metadata['time_coverage_end']).replace(tzinfo=tzutc())
         normalized_attributes['platform'] = json.loads(n_metadata['platform'])
         normalized_attributes['instrument'] = json.loads(n_metadata['instrument'])
         normalized_attributes['specs'] = n_metadata.get('specs', '')
@@ -551,15 +553,30 @@ def _get_normalized_attributes(self, dataset_info, *args, **kwargs):
         # set optional ForeignKey metadata from Nansat or from defaults
         normalized_attributes['gcmd_location'] = n_metadata.get(
             'gcmd_location', pti.get_gcmd_location('SEA SURFACE'))
-        normalized_attributes['provider'] = n_metadata.get(
-            'data_center', pti.get_gcmd_provider('NERSC'))
+        normalized_attributes['provider'] = pti.get_gcmd_provider(
+            n_metadata.get('provider', 'NERSC'))
         normalized_attributes['iso_topic_category'] = n_metadata.get(
             'ISO_topic_category', pti.get_iso19115_topic_category('Oceans'))
 
         # Find coverage to set number of points in the geolocation
-        if len(nansat_object.vrt.dataset.GetGCPs()) > 0:
+        if nansat_object.vrt.dataset.GetGCPs():
             nansat_object.reproject_gcps()
         normalized_attributes['location_geometry'] = GEOSGeometry(
-            nansat_object.get_border_wkt(nPoints=n_points), srid=4326)
+            nansat_object.get_border_wkt(n_points=n_points), srid=4326)
+
+        json_dumped_dataset_parameters = n_metadata.get('dataset_parameters', None)
+        if json_dumped_dataset_parameters:
+            json_loads_result = json.loads(json_dumped_dataset_parameters)
+            if isinstance(json_loads_result, list):
+                normalized_attributes['dataset_parameters'] = [
+                        get_cf_or_wkv_standard_name(dataset_param)
+                        for dataset_param in json_loads_result
+                    ]
+            else:
+                self.LOGGER.error(
+                    "'dataset_parameters' section of metadata is not a json-dumped python list",
+                    exc_info=True)
+                raise TypeError(
+                    "'dataset_parameters' section of metadata is not a json-dumped python list")
 
         return normalized_attributes
diff --git a/runtests.py b/runtests.py
index 3538cf64..f936fa5c 100644
--- a/runtests.py
+++ b/runtests.py
@@ -18,6 +18,6 @@
     test_module = f".{sys.argv[1]}" if len(sys.argv) >= 2 else ''
 
     TestRunner = get_runner(settings)
-    test_runner = TestRunner()
+    test_runner = TestRunner(interactive=False)
     failures = test_runner.run_tests(["tests" + test_module])
     sys.exit(bool(failures))
diff --git a/tests/data/nansat/arc_metno_dataset.nc b/tests/data/nansat/arc_metno_dataset.nc
deleted file mode 100644
index 406ba2c0..00000000
Binary files a/tests/data/nansat/arc_metno_dataset.nc and /dev/null differ
diff --git a/tests/test_crawlers.py b/tests/test_crawlers.py
index 148410f9..9d65dceb 100644
--- a/tests/test_crawlers.py
+++ b/tests/test_crawlers.py
@@ -5,9 +5,10 @@
 import json
 import logging
 import os
-import os.path
+import re
 import unittest
 import unittest.mock as mock
+from unittest.mock import call
 from datetime import datetime, timezone
 from urllib.parse import urlparse
 
@@ -56,14 +57,6 @@ def test_is_folder(self):
         with self.assertRaises(NotImplementedError):
             crawler._is_folder('')
 
-    def test_is_file(self):
-        """
-        A NotImplementedError should be raised if the _is_file() method
-        is accessed directly on the WebDirectoryCrawler class
-        """
-        crawler = crawlers.WebDirectoryCrawler('')
-        with self.assertRaises(NotImplementedError):
-            crawler._is_file('')
 
     def test_get_download_url(self):
         """
@@ -134,33 +127,31 @@ def test_add_folder_to_process(self):
 
     def test_process_folder_with_file(self):
         """_process_folder() should feed the _urls stack
-        with file paths which are not excluded
+        with only file paths which are included
         """
-        crawler = crawlers.WebDirectoryCrawler('http://foo/bar')
-        crawler.excludes = ['.gz']
+        crawler = crawlers.WebDirectoryCrawler('http://foo/bar', include='\.nc$')
+        crawler.EXCLUDE = re.compile(r'\.h5$')
         crawler.LOGGER = mock.Mock()
         with mock.patch.object(crawler, '_list_folder_contents') as mock_folder_contents, \
-                mock.patch.object(crawler, '_is_file', return_value=True), \
                 mock.patch.object(crawler, '_is_folder', return_value=False), \
                 mock.patch.object(crawler, '_add_url_to_return') as mock_add_url:
             mock_folder_contents.return_value = ['/bar/baz.nc', '/bar/qux.gz']
             crawler._process_folder('')
-        mock_add_url.assert_called_with('/bar/baz.nc')
+        mock_add_url.assert_called_once_with('/bar/baz.nc')
 
     def test_process_folder_with_folder(self):
         """_process_folder() should feed the _to_process stack
         with folder paths which are not excluded
         """
-        crawler = crawlers.WebDirectoryCrawler('http://foo/bar')
-        crawler.excludes = ['qux']
+        crawler = crawlers.WebDirectoryCrawler('http://foo/bar', include='baz')
+        crawler.EXCLUDE = re.compile(r'qux')
         crawler.LOGGER = mock.Mock()
         with mock.patch.object(crawler, '_list_folder_contents') as mock_folder_contents, \
-                mock.patch.object(crawler, '_is_file', return_value=False), \
                 mock.patch.object(crawler, '_is_folder', return_value=True), \
                 mock.patch.object(crawler, '_add_folder_to_process') as mock_add_folder:
             mock_folder_contents.return_value = ['/bar/baz', '/bar/qux']
             crawler._process_folder('')
-        mock_add_folder.assert_called_with('/bar/baz')
+        mock_add_folder.assert_called_once_with('/bar/baz')
 
     def test_get_year_folder_coverage(self):
         """Get the correct time range from a year folder"""
@@ -341,14 +332,6 @@ def test_is_folder(self):
         with mock.patch('os.path.isdir', return_value=False):
             self.assertFalse(self.crawler._is_folder(''), "_is_folder() should return False")
 
-    def test_is_file(self):
-        """_is_file() should return True if the path points
-        to a regular file, False otherwise"""
-        with mock.patch('os.path.isfile', return_value=True):
-            self.assertTrue(self.crawler._is_file(''), "_is_file() should return True")
-        with mock.patch('os.path.isfile', return_value=False):
-            self.assertFalse(self.crawler._is_file(''), "_is_file() should return False")
-
 
 class HTMLDirectoryCrawlerTestCase(unittest.TestCase):
     """Tests for the HTMLDirectoryCrawler crawler"""
@@ -498,7 +481,7 @@ def test_process_folder(self):
         Explore root page and make sure the _url and _to_process attributes of the crawler have the
         right values
         """
-        crawler = crawlers.OpenDAPCrawler(self.TEST_DATA['root']['urls'][0])
+        crawler = crawlers.OpenDAPCrawler(self.TEST_DATA['root']['urls'][0],include='\.nc$')
         with self.assertLogs(crawler.LOGGER):
             crawler._process_folder(crawler._to_process.pop())
         self.assertListEqual(crawler._urls, [self.TEST_DATA['dataset']['urls'][0]])
@@ -506,7 +489,8 @@ def test_process_folder(self):
 
     def test_process_folder_with_duplicates(self):
         """If the same URL is present twice in the page, it should only be processed once"""
-        crawler = crawlers.OpenDAPCrawler(self.TEST_DATA['root_duplicates']['urls'][0])
+        crawler = crawlers.OpenDAPCrawler(self.TEST_DATA['root_duplicates']['urls'][0],
+        include='\.nc$')
         with self.assertLogs(crawler.LOGGER):
             crawler._process_folder(crawler._to_process.pop())
         self.assertListEqual(crawler._urls, [self.TEST_DATA['dataset']['urls'][1]])
@@ -524,7 +508,7 @@ def test_process_folder_with_time_restriction(self):
         the crawler's time range.
         """
         crawler = crawlers.OpenDAPCrawler(
-            self.TEST_DATA['folder_day_of_year']['urls'][0],
+            self.TEST_DATA['folder_day_of_year']['urls'][0], include='\.nc$',
             time_range=(datetime(2019, 2, 15, 11, 0, 0), datetime(2019, 2, 15, 13, 0, 0)))
         with self.assertLogs(crawler.LOGGER):
             crawler._process_folder(crawler._to_process.pop())
@@ -540,7 +524,7 @@ def test_process_folder_with_time_restriction(self):
     def test_iterating(self):
         """Test the call to the __iter__ method"""
         crawler = crawlers.OpenDAPCrawler(
-            self.TEST_DATA['root']['urls'][0],
+            self.TEST_DATA['root']['urls'][0], include='\.nc$',
             time_range=(datetime(2019, 2, 14, 0, 0, 0), datetime(2019, 2, 14, 9, 0, 0)))
         crawler_iterator = iter(crawler)
 
@@ -863,7 +847,7 @@ def emulate_cwd_of_ftp(self, name):
     def test_ftp_correct_navigation(self, mock_ftp):
         """check that file URLs and folders paths are added to the right stacks"""
 
-        test_crawler = crawlers.FTPCrawler('ftp://foo', files_suffixes='.gz')
+        test_crawler = crawlers.FTPCrawler('ftp://foo', include='\.gz$')
         test_crawler.ftp.nlst.return_value = ['file1.gz', 'folder_name', 'file3.bb', 'file2.gz', ]
         test_crawler.ftp.cwd = self.emulate_cwd_of_ftp
         test_crawler.ftp.host = ''
@@ -883,7 +867,7 @@ def test_ftp_correct_exception(self, mock_ftp):
         """
 
         test_crawler = crawlers.FTPCrawler(
-            'ftp://', username="d", password="d", files_suffixes='.gz')
+            'ftp://', username="d", password="d", include='\.gz$')
 
         mock_ftp.side_effect = ftplib.error_perm("503")
         test_crawler.set_initial_state()
diff --git a/tests/test_harvesters.py b/tests/test_harvesters.py
index 3d5e6088..09f89331 100644
--- a/tests/test_harvesters.py
+++ b/tests/test_harvesters.py
@@ -1,10 +1,10 @@
 """Tests for the harvesters"""
 #pylint: disable=protected-access
 
+import re
 import unittest
 import unittest.mock as mock
 from datetime import datetime
-
 from geospaas.vocabularies.models import Parameter
 
 import geospaas_harvesting.crawlers as crawlers
@@ -191,33 +191,17 @@ def test_creodias_harvester(self):
         self.assertIsInstance(harvester._current_crawler, crawlers.CreodiasEOFinderCrawler)
         self.assertIsInstance(harvester._ingester, ingesters.CreodiasEOFinderIngester)
 
-    def test_osisaf_harvester_extra_excludes(self):
-        """ extra excludes should have passed by the excludes as a list in configuration file.
+    def test_osisaf_harvester_include(self):
+        """ include criteria should have passed by the "includes" as a regex in configuration file.
         Otherwise, accossiated error must be raised """
         harvester = harvesters.OSISAFHarvester(urls=[''], max_fetcher_threads=1, max_db_threads=1,
-                                               excludes=['ease', '_sh_polstere', ])
-        self.assertListEqual(harvester._current_crawler.excludes,
-                             crawlers.ThreddsCrawler.EXCLUDE + ['ease', '_sh_polstere'])
+                                               include='ease|_sh_polstere')
+        self.assertEqual(harvester._current_crawler.include, re.compile('ease|_sh_polstere'))
         harvester = harvesters.OSISAFHarvester(urls=[''], max_fetcher_threads=1, max_db_threads=1)
-        self.assertListEqual(harvester._current_crawler.excludes, crawlers.ThreddsCrawler.EXCLUDE)
-
-        with self.assertRaises(HarvesterConfigurationError):
+        self.assertIsNone(harvester._current_crawler.include)
+        with self.assertRaises(HarvesterConfigurationError):#incorrectly passsed as a list
             harvester = harvesters.OSISAFHarvester(
-                urls=[''], max_fetcher_threads=1, max_db_threads=1, excludes='ease')
-
-    def test_extra_excludes_with_no_CLASS_EXCLUDE(self):
-        """ shall return the excludes from the config file """
-        class TestCrawler(crawlers.WebDirectoryCrawler):
-            EXCLUDE = None
-
-        class TestHarvester(harvesters.WebDirectoryHarvester):
-            ingester = ingesters.DDXIngester
-            crawler = TestCrawler
-        harvester = TestHarvester(urls=[''], max_fetcher_threads=1, max_db_threads=1,
-                                  excludes=['ease', '_sh_polstere', ])
-        self.assertListEqual(harvester._current_crawler.excludes, ['ease', '_sh_polstere'])
-        harvester = TestHarvester(urls=[''], max_fetcher_threads=1, max_db_threads=1,)
-        self.assertEqual(harvester._current_crawler.excludes, [])
+                urls=[''], max_fetcher_threads=1, max_db_threads=1, include=['ease'])
 
 
 class HarvesterExceptTestCase(unittest.TestCase):
diff --git a/tests/test_ingesters.py b/tests/test_ingesters.py
index cba32ed3..f07b07c0 100644
--- a/tests/test_ingesters.py
+++ b/tests/test_ingesters.py
@@ -18,7 +18,8 @@
 from geospaas.catalog.models import Dataset, DatasetURI
 from geospaas.vocabularies.models import DataCenter, ISOTopicCategory, Parameter
 import geospaas_harvesting.ingesters as ingesters
-
+from geospaas.catalog.managers import (DAP_SERVICE_NAME, FILE_SERVICE_NAME,
+                                       LOCAL_FILE_SERVICE, OPENDAP_SERVICE)
 
 class IngesterTestCase(django.test.TransactionTestCase):
     """Test the base ingester class"""
@@ -739,8 +740,8 @@ def test_get_normalized_attributes(self):
         """
         dataset_info = {'services': {'download': {'url': 'http://something'}}}
         with mock.patch.object(
-                self.ingester._metadata_handler, 'get_parameters', return_value={'foo': 'bar'}), \
-             mock.patch.object(self.ingester, 'add_url') as mock_add_url:
+            self.ingester._metadata_handler, 'get_parameters', return_value={'foo': 'bar'}), \
+                mock.patch.object(self.ingester, 'add_url') as mock_add_url:
             self.assertDictEqual(
                 self.ingester._get_normalized_attributes(dataset_info),
                 {
@@ -821,21 +822,46 @@ def setUp(self):
         self.mock_param_count = self.patcher_param_count.start()
         self.mock_param_count.return_value = 2
 
+        self.patcher_get_metadata = mock.patch('geospaas_harvesting.ingesters.Nansat')
+        self.mock_get_metadata = self.patcher_get_metadata.start()
+
+        self.mock_get_metadata.return_value.get_border_wkt.return_value = (
+            'POLYGON((24.88 68.08,22.46 68.71,19.96 69.31,17.39 69.87,24.88 68.08))')
+
     def tearDown(self):
         self.patcher_param_count.stop()
+        self.patcher_get_metadata.stop()
 
     def test_normalize_netcdf_attributes_with_nansat(self):
         """Test the ingestion of a netcdf file using nansat"""
+        self.mock_get_metadata.return_value.get_metadata.side_effect = [
+            {'bulletin_type': 'Forecast', 'Conventions': 'CF-1.4', 'field_date': '2017-05-29',
+             'field_type': 'Files based on file type nersc_daily',
+             'filename': '/vsimem/343PBWM116.vrt', 'Forecast_range': '10 days',
+             'history': '20170521:Created by program hyc2proj, version V0.3',
+             'institution': 'MET Norway, Henrik Mohns plass 1, N-0313 Oslo, Norway',
+             'instrument':
+             '{"Category": "In Situ/Laboratory Instruments", "Class": "Data Analysis", '
+             '"Type": "Environmental Modeling", "Subtype": "", "Short_Name": "Computer", '
+             '"Long_Name": "Computer"}',
+             'platform':
+             '{"Category": "Models/Analyses", "Series_Entity": "", "Short_Name": "MODELS", '
+             '"Long_Name": ""}',
+             'references': 'http://marine.copernicus.eu', 'source': 'NERSC-HYCOM model fields',
+             'time_coverage_end': '2017-05-27T00:00:00', 'time_coverage_start':
+             '2017-05-18T00:00:00',
+             'title':
+             'Arctic Ocean Physics Analysis and Forecast, 12.5km daily mean '
+             '(dataset-topaz4-arc-myoceanv2-be)',
+             'dataset_parameters': '["surface_backwards_scattering_coefficient_of_radar_wave"]'}]
         ingester = ingesters.NansatIngester()
-        normalized_attributes = ingester._get_normalized_attributes(
-            os.path.join(os.path.dirname(__file__), 'data/nansat/arc_metno_dataset.nc'))
-
+        normalized_attributes = ingester._get_normalized_attributes('')
         self.assertEqual(normalized_attributes['entry_title'], 'NONE')
         self.assertEqual(normalized_attributes['summary'], 'NONE')
         self.assertEqual(normalized_attributes['time_coverage_start'], datetime(
-            year=2017, month=5, day=18, hour=0, minute=0, second=0))
+            year=2017, month=5, day=18, hour=0, minute=0, second=0, tzinfo=tzutc()))
         self.assertEqual(normalized_attributes['time_coverage_end'], datetime(
-            year=2017, month=5, day=27, hour=0, minute=0, second=0))
+            year=2017, month=5, day=27, hour=0, minute=0, second=0, tzinfo=tzutc()))
 
         self.assertEqual(normalized_attributes['instrument']['Short_Name'], 'Computer')
         self.assertEqual(normalized_attributes['instrument']['Long_Name'], 'Computer')
@@ -850,16 +876,7 @@ def test_normalize_netcdf_attributes_with_nansat(self):
         self.assertEqual(normalized_attributes['platform']['Series_Entity'], '')
 
         expected_geometry = GEOSGeometry((
-            'POLYGON(('
-            '20.7042 89.9999,24.9957 89.9999,28.0373 89.9998,30.2939 89.9998,32.0298 89.9998,'
-            '33.4042 89.9998,34.5181 89.9998,35.4387 89.9997,36.2117 89.9997,36.8699 89.9997,'
-            '37.6088 89.9997,37.6088 89.9997,33.5816 89.9997,29.6653 89.9996,25.8904 89.9996,'
-            '22.28 89.9996,18.8504 89.9996,15.611 89.9996,12.5653 89.9996,9.7123 89.9996,'
-            '7.0469 89.9996,4.1286 89.9995,4.1286 89.9995,1.3791 89.9996,-0.8831 89.9996,'
-            '-3.3327 89.9996,-5.984 89.9996,-8.85 89.9996,-11.9418 89.9996,-15.2668 89.9997,'
-            '-18.8277 89.9997,-22.6199 89.9997,-26.6303 89.9997,-26.6303 89.9997,-24.7751 89.9997,'
-            '-22.9012 89.9997,-20.6677 89.9998,-17.9691 89.9998,-14.6602 89.9998,-10.5392 89.9998,'
-            '-5.3282 89.9998,1.34 89.9999,9.8983 89.9999,20.7042 89.9999))'), srid=4326)
+            'POLYGON((24.88 68.08,22.46 68.71,19.96 69.31,17.39 69.87,24.88 68.08))'), srid=4326)
 
         # This fails, which is why string representations are compared. Any explanation is welcome.
         # self.assertTrue(normalized_attributes['location_geometry'].equals(expected_geometry))
@@ -877,6 +894,24 @@ def test_normalize_netcdf_attributes_with_nansat(self):
         self.assertEqual(
             normalized_attributes['gcmd_location']['Location_Category'], 'VERTICAL LOCATION')
         self.assertEqual(normalized_attributes['gcmd_location']['Location_Type'], 'SEA SURFACE')
+        self.assertEqual(
+            normalized_attributes['dataset_parameters'],
+            [
+                OrderedDict(
+                    [('standard_name', 'surface_backwards_scattering_coefficient_of_radar_wave'),
+                     ('canonical_units', '1'),
+                        ('grib', ''),
+                        ('amip', ''),
+                        ('description',
+                         'The scattering/absorption/attenuation coefficient is assumed to be an '
+                         'integral over all wavelengths, unless a coordinate of '
+                         'radiation_wavelength is included to specify the wavelength. Scattering of'
+                         ' radiation is its deflection from its incident path without loss of '
+                         'energy. Backwards scattering refers to the sum of scattering into all '
+                         'backward angles i.e. scattering_angle exceeding pi/2 radians. A '
+                         'scattering_angle should not be specified with this quantity.')
+                     ])
+            ])
 
     # TODO: make this work
     # def test_ingest_dataset_twice_different_urls(self):
@@ -893,3 +928,91 @@ def test_normalize_netcdf_attributes_with_nansat(self):
 
     #     self.assertTrue(logger_cm.records[0].msg.endswith('already exists in the database.'))
     #     self.assertEqual(Dataset.objects.count(), initial_datasets_count + 1)
+
+    def test_exception_handling_of_bad_development_of_mappers(self):
+        """Test the exception handling of bad development of 'dataset_parameters' of metadata.
+        ANY mapper should return a python list as 'dataset_parameters' of metadata."""
+        self.mock_get_metadata.return_value.get_metadata.side_effect = [
+            {
+            'time_coverage_end': '2017-05-27T00:00:00', 'time_coverage_start':
+                '2017-05-18T00:00:00',
+                'platform':
+                '{"Category": "Models/Analyses", "Series_Entity": "", "Short_Name": "MODELS", '
+                '"Long_Name": ""}',
+                'instrument':
+                '{"Category": "In Situ/Laboratory Instruments", "Class": "Data Analysis", '
+                '"Type": "Environmental Modeling", "Subtype": "", "Short_Name": "Computer", '
+                '"Long_Name": "Computer"}',
+                'dataset_parameters': "{}"}]
+        ingester = ingesters.NansatIngester()
+        with self.assertRaises(TypeError) as err:
+            normalized_attributes = ingester._get_normalized_attributes('')
+        self.assertEqual(
+            err.exception.args[0],
+            "'dataset_parameters' section of metadata is not a json-dumped python list")
+
+    def test_usage_of_nansat_ingester_with_http_protocol_in_the_OPENDAP_cases(self):
+        """LOCALHarvester(which uses NansatIngester) can be used for `OPENDAP provided` files """
+        ingester = ingesters.NansatIngester()
+        self.mock_get_metadata.return_value.get_metadata.side_effect = [{
+            'time_coverage_end': '2017-05-27T00:00:00', 'time_coverage_start':
+                '2017-05-18T00:00:00',
+                'platform':
+                '{"Category": "Models/Analyses", "Series_Entity": "", "Short_Name": "MODELS", '
+                '"Long_Name": ""}',
+                'instrument':
+                '{"Category": "In Situ/Laboratory Instruments", "Class": "Data Analysis", '
+                '"Type": "Environmental Modeling", "Subtype": "", "Short_Name": "Computer", '
+                '"Long_Name": "Computer"}',
+        }]
+        normalized_attributes = ingester._get_normalized_attributes('http://')
+        self.assertEqual(normalized_attributes['geospaas_service_name'], DAP_SERVICE_NAME)
+        self.assertEqual(normalized_attributes['geospaas_service'], OPENDAP_SERVICE)
+
+    def test_usage_of_nansat_ingester_with_local_file(self):
+        """LOCALHarvester(which uses NansatIngester) can be used for local files """
+        ingester = ingesters.NansatIngester()
+        self.mock_get_metadata.return_value.get_metadata.side_effect = [{
+            'time_coverage_end': '2017-05-27T00:00:00', 'time_coverage_start':
+                '2017-05-18T00:00:00',
+                'platform':
+                '{"Category": "Models/Analyses", "Series_Entity": "", "Short_Name": "MODELS", '
+                '"Long_Name": ""}',
+                'instrument':
+                '{"Category": "In Situ/Laboratory Instruments", "Class": "Data Analysis", '
+                '"Type": "Environmental Modeling", "Subtype": "", "Short_Name": "Computer", '
+                '"Long_Name": "Computer"}',
+        }]
+        normalized_attributes = ingester._get_normalized_attributes('/src/blabla')
+        self.assertEqual(normalized_attributes['geospaas_service_name'], FILE_SERVICE_NAME)
+        self.assertEqual(normalized_attributes['geospaas_service'], LOCAL_FILE_SERVICE)
+
+
+    def test_exception_handling_of_bad_inputting_of_nansat_ingester_with_ftp_protocol(self):
+        """LOCALHarvester(which uses NansatIngester) is only for local file addresses"""
+        ingester = ingesters.NansatIngester()
+        self.mock_get_metadata.return_value.get_metadata.side_effect = ['']
+        with self.assertRaises(ValueError) as err:
+            normalized_attributes = ingester._get_normalized_attributes('ftp://')
+        self.assertEqual(
+            err.exception.args[0],
+            "LOCALHarvester (which uses NansatIngester) is only for local file addresses or http "
+            "addresses, not for ftp protocol")
+
+    def test_reprojection_based_on_gcps(self):
+        """Nansat ingester should reproject if there is any GC point in the metadata"""
+        self.mock_get_metadata.return_value.vrt.dataset.GetGCPs.return_value = True
+        self.mock_get_metadata.return_value.get_metadata.side_effect = [{
+            'time_coverage_end': '2017-05-27T00:00:00', 'time_coverage_start':
+                '2017-05-18T00:00:00',
+                'platform':
+                '{"Category": "Models/Analyses", "Series_Entity": "", "Short_Name": "MODELS", '
+                '"Long_Name": ""}',
+                'instrument':
+                '{"Category": "In Situ/Laboratory Instruments", "Class": "Data Analysis", '
+                '"Type": "Environmental Modeling", "Subtype": "", "Short_Name": "Computer", '
+                '"Long_Name": "Computer"}',
+        }]
+        ingester = ingesters.NansatIngester()
+        normalized_attributes = ingester._get_normalized_attributes('')
+        self.mock_get_metadata.return_value.reproject_gcps.assert_called_once()