diff --git a/lmpy/data_preparation/build_grid.py b/lmpy/data_preparation/build_grid.py index fb67b6c06..a62377bf0 100644 --- a/lmpy/data_preparation/build_grid.py +++ b/lmpy/data_preparation/build_grid.py @@ -152,7 +152,7 @@ def build_grid( "min_y_coordinate": min_y, "max_y_coordinate": max_y, "cell_sides": cell_sides, - "size": cell_size + "cell_size": cell_size } script_name = os.path.splitext(os.path.basename(__file__))[0] if min_x >= max_x or min_y >= max_y: @@ -197,6 +197,8 @@ def build_grid( f"{cell_sides}-sided cells of {cell_size} size and x-extent {min_x} " + f"to {max_x}, y-extent {min_y} to {max_y}.", refname=script_name) + # Note that site_id is 0-based + site_headers = [] shape_id = 0 for cell_wkt in wkt_generator: geom = ogr.CreateGeometryFromWkt(cell_wkt) @@ -208,11 +210,13 @@ def build_grid( feat.SetField(site_x, centroid.GetX()) feat.SetField(site_y, centroid.GetY()) feat.SetField(site_id, shape_id) + site_headers.append((shape_id, centroid.GetX(), centroid.GetY())) layer.CreateFeature(feat) shape_id += 1 feat.Destroy() data_set.Destroy() - report["size"] = shape_id + report["grid_size"] = shape_id + # report["site_headers"] = site_headers if logger is not None: logger.log( f"Wrote {grid_file_name} with {shape_id} sites.", refname=script_name) diff --git a/lmpy/matrix.py b/lmpy/matrix.py index 015270b70..05192120e 100644 --- a/lmpy/matrix.py +++ b/lmpy/matrix.py @@ -235,7 +235,7 @@ def concatenate(cls, mtx_list, axis=0): mtx.set_headers([''], axis=str(axis)) # Cast mtx to Matrix in case it is not mtx = mtx.view(Matrix) - h = mtx.get_headers(axis=str(axis)) + h = deepcopy(mtx.get_headers(axis=str(axis))) if h is None: h = [''] axis_headers.extend(h) @@ -246,8 +246,8 @@ def concatenate(cls, mtx_list, axis=0): # metadata new_mtx = cls( np.concatenate(mtx_objs, axis=axis), - headers=first_mtx.get_headers(), - metadata=first_mtx.get_metadata(), + headers=deepcopy(first_mtx.get_headers()), + metadata=deepcopy(first_mtx.get_metadata()), ) # Set the headers for the new axis new_mtx.set_headers(axis_headers, axis=str(axis)) @@ -593,114 +593,6 @@ def get_report(self): self._report["min"] = self.min().item() return self._report - # # ........................... - # def write_csv(self, flo, *slice_args): - # """Writes the Matrix object to a CSV file-like object. - # - # Args: - # flo (file-like): The file-like object to write to. - # *slice_args: A variable length argument list of iterables to use - # for a slice operation prior to generating CSV content. - # - # Todo: - # Handle header overlap (where the header for one axis is for another - # axis header. - # - # Note: - # Currently only works for 2-D tables. - # """ - # if list(slice_args): - # mtx = self.slice(*slice_args) - # else: - # mtx = self - # - # if mtx.ndim > 2: - # mtx = mtx.flatten_2d() - # - # # ..................... - # # Inner functions - # - # # ..................... - # def already_lists(x): - # """Use this function for processing headers when they are lists. - # - # Args: - # x (:obj:`list`): A list value to return. - # - # Returns: - # list: A list of data. - # """ - # return x - # - # # ..................... - # def make_lists(x): - # """Use this function for processing non-list headers. - # - # Args: - # x (:obj:`object`): A non-list value to modify. - # - # Returns: - # list: A list of data. - # """ - # return [x] - # - # # ..................... - # def csv_generator(): - # """Generator that yields rows of values to be output as CSV. - # - # Yields: - # list: A list of data for a row. - # """ - # try: - # row_headers = mtx.headers['0'] - # except (KeyError, TypeError): - # # No row headers - # row_headers = [[] for _ in range(mtx.shape[0])] - # - # if isinstance(row_headers[0], list): - # listify = already_lists - # else: - # listify = make_lists - # - # # # Start with the header row, if we have one - # # if '1' in mtx.headers and mtx.headers['1']: - # # header_row = [''] * len( - # # stringify(row_headers[0]) if row_headers else "") - # # Start with the header row, if we have one - # if '1' in mtx.headers and mtx.headers['1']: - # # Make column headers lists of lists - # if not isinstance(mtx.headers['1'][0], (tuple, list)): - # header_row = [''] * len( - # listify(row_headers[0]) if row_headers else [] - # ) - # header_row.extend(mtx.headers['1']) - # yield header_row - # else: - # for i in range(len(mtx.headers['1'][0])): - # header_row = [''] * len( - # listify(row_headers[0]) if row_headers else [] - # ) - # header_row.extend( - # [ - # mtx.headers['1'][j][i] - # for j in range(len(mtx.headers['1'])) - # ] - # ) - # yield header_row - # # For each row in the data set - # for i in range(mtx.shape[0]): - # # Add the row headers if exists - # row = [] - # row.extend(listify(row_headers[i])) - # # Get the data from the data array - # row.extend(mtx[i].tolist()) - # yield row - # - # # ..................... - # # Main write_csv function - # for row in csv_generator(): - # flo.write(u"{}\n".format(','.join([str(v) for v in row]))) - # ..................... def _get_header_value_combined(self, x): """Use this function for processing list headers into a single string. @@ -711,7 +603,7 @@ def _get_header_value_combined(self, x): Returns: str: A string. """ - if isinstance(x, list): + if isinstance(x, list) or isinstance(x, tuple): tmp = [str(elt) for elt in x] return " ".join(tmp) else: @@ -727,21 +619,73 @@ def _get_header_as_list_of_string(self, x): Returns: list: A list of data. """ - if isinstance(x, list): + if isinstance(x, list) or isinstance(x, tuple): return [str(elt) for elt in x] else: return [str(x)] + # ..................................................................................... + @classmethod + def get_map_resolution_headers_from_sites(cls, site_headers): + """Creates an empty 2-d matrix to use for mapping. + + Args: + site_headers (list of tuples): A list containing the site headers + (siteid, x, y) for a flattened geospatial matrix containing sites along + the y/0 axis + + Returns: + x_headers: list of x center coordinates for column headers + y_headers: list of y center coordinates for row headers + + Notes: + If the matrix is compressed, with empty rows (sites) and columns removed, + the missing site coordinates will be filled in to the map matrix headers + so the map will represent the geospatial region without holes. If all sites + are missing from an edge (upper, lower, right, left boundary) of the area + of interest, those will not be retained, the extent of the map will be + smaller. + """ + # First and second sites along the upper boundary + site_1, x_1, y_1 = site_headers[0] + site_2, x_2, y_2 = site_headers[1] + # Find resolution in a regular or compressed matrix by dividing + # map distance by number of cells between sites + x_resolution = (x_2 - x_1) / (site_2 - site_1) + + # Extent of matrix, using centroid coordinate values (not cell edges) + minx = maxx = x_1 + miny = maxy = y_1 + for _, x, y in site_headers: + if x < minx: + minx = x + if x > maxx: + maxx = x + if y < miny: + miny = y + if y > maxy: + maxy = y + # Fill in any x or y centroids missing from the input site_headers/matrix + x_centers = list(np.arange(minx, (maxx + x_resolution), x_resolution)) + y_centers = list(np.arange(maxy, (miny - x_resolution), (x_resolution * -1))) + return x_resolution, x_centers, y_centers + # ........................... def write_csv( - self, flo, *slice_args, delimiter=","): - """Writes the Matrix object to a CSV file-like object. + self, filename, *slice_args, delimiter=",", is_pam=False): + """Writes the Matrix object to a CSV file. Args: - flo (file-like): The file-like object to write to. + filename (str): The filename to write to. *slice_args: A variable length argument list of iterables to use for a slice operation prior to generating CSV content. delimiter (str): 1-character delimiter to separate columns + is_pam (bool): If true, input matrix is boolean, and output will be + written as 0/1 + + Raises: + OSError: on open or write error + IOError: on open or write error Note: Currently only works for 2-D tables. @@ -777,21 +721,147 @@ def write_csv( # Assemble column headers, each element will be a row col_headers_listed = [ self._get_header_as_list_of_string(chdr) for chdr in col_headers] - for i in range(col_header_elt_count): - # Start with one empty column for each element in individual row header - # so that column headers correctly head data (not row headers) - header_row = [""] * row_header_elt_count - header_row.extend(elt[i] for elt in col_headers_listed) - # Write column headers as first line - flo.write(u"{}\n".format(delimiter.join(header_row))) - - # Write each line of data, preceded by its header - for r in range(mtx.shape[0]): - # Start with each element in row header - line = self._get_header_as_list_of_string(row_headers[r]) - # Extend with data - line.extend(str(v) for v in mtx[r]) - flo.write(u"{}\n".format(delimiter.join(line))) + + try: + with open(filename, mode='wt') as csv_out: + # Write each line of column headers, one per element + for i in range(col_header_elt_count): + # Start with one empty column for each element in individual row + # header so that column headers are aligned over data columns + header_row = [""] * row_header_elt_count + # Then a row for the ith column header element + header_row.extend(elt[i] for elt in col_headers_listed) + # Write column header line + csv_out.write(u"{}\n".format(delimiter.join(header_row))) + + # Write each line of data, preceded by its header + for r in range(mtx.shape[0]): + # Address the column differently if 1-D + if mtx.ndim == 1: + column = mtx + else: + column = mtx[r] + # Start with each element in row header + line = self._get_header_as_list_of_string(row_headers[r]) + # Extend with data + # TODO: not working for 1d matrix, treat as a vector + if is_pam is True: + for v in column: + line.append("1" if v is True else "0") + else: + line.extend(str(v) for v in column) + csv_out.write(u"{}\n".format(delimiter.join(line))) + except OSError: + raise + except IOError: + raise + + # ..................................................................................... + def is_flattened_geospatial(self): + """Identifies whether matrix has x and y coordinates along 1 and 0 axes. + + Returns: + True if flattened geospatial matrix, + False if x coordinates in columns, y coordinates in rows + """ + row_headers = self.get_row_headers() + # row/site headers in flattened geospatial matrix are tuples of + # (siteid, x_coord, y_coord) + if type(row_headers[0]) in (list, tuple) and len(row_headers[0]) == 3: + return True + else: + return False + + # ..................................................................................... + def is_geospatial(self): + """Identifies whether matrix has x and y coordinates along 1 and 0 axes. + + Returns: + True if flattened geospatial matrix, + False if x coordinates in columns, y coordinates in rows + + Note: + This checks only that headers can be converted to floats, not whether the + values are reasonable geospatial coordinates. + """ + row_headers = self.get_row_headers() + # row/site headers in flattened geospatial matrix are tuples of + # (siteid, x_coord, y_coord) + if type(row_headers[0]) in (list, tuple) and len(row_headers[0]) == 3: + return True + elif type(row_headers) is list and len(row_headers) > 0: + try: + float(row_headers[0]) + except ValueError: + return False + col_headers = self.get_column_headers() + try: + float(col_headers[0]) + except ValueError: + return False + return True + + # ..................................................................................... + def get_coordinate_headers_resolution(self): + """Get coordinate headers from a matrix with coordinates along one or two axes. + + Returns: + x_headers (list): list of x coordinates, centroid of each cell in column + y_headers (list): list of y coordinates, centroid of each cell in row + + Raises: + Exception: on matrix contains < 2 columns + Exception: on matrix contains < 2 rows + + Notes: + Assume that if the matrix is compressed, there are at least one pair of + neighboring rows or columns. + Assumes x and y resolution are equal + """ + row_headers = self.get_row_headers() + if self.is_flattened_geospatial(): + (x_resolution, + x_centers, y_centers) = self.get_map_resolution_headers_from_sites( + row_headers) + else: + # If getting from a map matrix, sites should not be compressed + x_centers = self.get_column_headers() + y_centers = row_headers + x_resolution = x_centers[1] - x_centers[0] + + if len(x_centers) <= 1: + raise Exception( + f"Matrix contains only {len(x_centers)} columns on the x-axis ") + if len(y_centers) <= 1: + raise Exception( + f"Matrix contains only {len(y_centers)} rows on the y-axis") + + return x_centers, y_centers, x_resolution + + # ..................................................................................... + def get_extent_resolution_coords(self): + """Gets x and y extents and resolution of an uncompressed geospatial matrix. + + Returns: + min_x (numeric): The minimum x value of the map extent. + min_y (numeric): The minimum y value of the map extent. + max_x (numeric): The maximum x value of the map extent. + max_y (numeric): The maximum y value of the map extent. + x_res (numeric): The width of each matrix cell. + y_res (numeric): The height of each matrix cell. + height (numeric): the number of rows, axis 0, of the matrix + width (numeric): the number of columns, axis 1, of the matrix + """ + # Headers are coordinate centroids + x_centers, y_centers, resolution = self.get_coordinate_headers_resolution() + + # Extend to edges by 1/2 resolution + min_x = x_centers[0] - resolution/2.0 + min_y = y_centers[-1] - resolution/2.0 + max_x = x_centers[-1] + resolution/2.0 + max_y = y_centers[0] + resolution/2.0 + + return min_x, min_y, max_x, max_y, resolution, x_centers, y_centers # ............................................................................. diff --git a/lmpy/plots/plot.py b/lmpy/plots/plot.py index 8bdc7fd0c..77a257843 100644 --- a/lmpy/plots/plot.py +++ b/lmpy/plots/plot.py @@ -3,8 +3,6 @@ import matplotlib.pyplot as plt import numpy as np -from lmpy.spatial.map import get_extent_resolution_coords_from_matrix - # ..................................................................................... def plot_matrix( @@ -34,8 +32,7 @@ def plot_matrix( vmax = matrix.max() if vmin == -9999: vmin = matrix.min() - min_x, min_y, max_x, max_y, _, _, _, _ = get_extent_resolution_coords_from_matrix( - matrix) + min_x, min_y, max_x, max_y, _, _, _, _ = matrix.get_extent_resolution_coords() extent = (min_x, max_x, min_y, max_y) base_layer_params = {k: v for k, v in dict(extent=extent).items() if v is not None} diff --git a/lmpy/spatial/geojsonify.py b/lmpy/spatial/geojsonify.py index 36c8960bc..86c75ad97 100644 --- a/lmpy/spatial/geojsonify.py +++ b/lmpy/spatial/geojsonify.py @@ -5,7 +5,6 @@ from osgeo import ogr from lmpy.log import logit -from lmpy.spatial.map import get_coordinate_headers_resolution # ..................................................................................... @@ -92,7 +91,7 @@ def geojsonify_matrix(matrix, resolution=None, omit_values=None, logger=None): f"Found {len(row_headers)} sites and {len(column_headers)} taxa in matrix.", refname=refname) - _, _, res = get_coordinate_headers_resolution(matrix) + _, _, res = matrix.get_coordinate_headers_resolution() if resolution is None: resolution = res diff --git a/lmpy/spatial/map.py b/lmpy/spatial/map.py index dca0e03fd..6e5e5978c 100644 --- a/lmpy/spatial/map.py +++ b/lmpy/spatial/map.py @@ -25,7 +25,7 @@ def _create_empty_map_matrix_from_matrix(matrix): axis 1 represents the columns/x coordinate/longitude """ # Headers are coordinate centroids - x_headers, y_headers, resolution = get_coordinate_headers_resolution(matrix) + x_headers, y_headers, resolution = matrix.get_coordinate_headers_resolution() map_matrix = Matrix( np.zeros((len(y_headers), len(x_headers)), dtype=int), headers={ @@ -93,174 +93,105 @@ def _create_empty_map_matrix_from_centroids(x_centers, y_centers, dtype): # ..................................................................................... -def is_flattened_geospatial_matrix(matrix): - """Identifies whether matrix has x and y coordinates along 1 and 0 axes. +def _create_empty_map_1d_matrix_from_centroids( + x_centers, y_centers, dtype, data_label=""): + """Creates an empty geospatial vector with x,y coordinates on the y axis. Args: - matrix (lmpy.matrix.Matrix object): an input 2d geospatial matrix with - y centroids in row headers, x centroids in column headers OR - flattened geospatial matrix with site centroids in row headers. + x_centers (list of numeric): Center coordinate x values. + y_centers (list of numeric): Center coordinate y values. + dtype (numpy.ndarray.dtype): Data type for new matrix + data_label (str): header for the new vector. Returns: - True if flattened geospatial matrix, - False if x coordinates in columns, y coordinates in rows - """ - row_headers = matrix.get_row_headers() - # row/site headers in flattened geospatial matrix are tuples of - # (siteid, x_coord, y_coord) - if type(row_headers[0]) is list and len(row_headers[0]) == 3: - return True - else: - return False - - -# ..................................................................................... -def is_geospatial_matrix(matrix): - """Identifies whether matrix has x and y coordinates along 1 and 0 axes. - - Args: - matrix (lmpy.matrix.Matrix object): an input 2d geospatial matrix with - y centroids in row headers, x centroids in column headers OR - flattened geospatial matrix with site centroids in row headers. + Matrix: A Matrix of zeros for the coordinate centers. - Returns: - True if flattened geospatial matrix, - False if x coordinates in columns, y coordinates in rows + Note: + axis 0 represents the rows/sites with headers = (site_id,x,y) + axis 1 represents the columns/species with headers = data_label """ - row_headers = matrix.get_row_headers() - # row/site headers in flattened geospatial matrix are tuples of - # (siteid, x_coord, y_coord) - if type(row_headers[0]) is list and len(row_headers[0]) == 3: - return True - elif type(row_headers) is list and len(row_headers) > 0: - try: - float(row_headers[0]) - except ValueError: - return False - return True + site_headers = _create_site_headers_from_centroids(x_centers, y_centers) + site_count = len(site_headers) + map_1d_matrix = Matrix( + np.zeros((site_count, 1), dtype=dtype), + headers={ + "0": site_headers, + "1": [data_label] + } + ) + return map_1d_matrix # ..................................................................................... -def get_coordinate_headers_resolution(matrix): - """Get coordinate headers from a matrix with coordinates along one or two axes. +def _create_map_matrix_headers_from_extent(min_x, min_y, max_x, max_y, resolution): + """Gets header values for the x/1 and y/0 axis of a geospatial matrix. Args: - matrix (lmpy.matrix.Matrix object): an input 2d geospatial matrix with - y centroids in row headers, x centroids in column headers - OR - a flattened geospatial matrix with site centroids in row headers. + min_x (numeric): The minimum x value of the map extent. + min_y (numeric): The minimum y value of the map extent. + max_x (numeric): The maximum x value of the map extent. + max_y (numeric): The maximum y value of the map extent. + resolution (numeric): The size of each matrix cell. Returns: - x_headers (list): list of x coordinates, centroid of each cell in column - y_headers (list): list of y coordinates, centroid of each cell in row - - Raises: - Exception: on matrix contains < 2 columns - Exception: on matrix contains < 2 rows + x_headers: list of x center coordinates for column headers + y_headers: list of y center coordinates for row headers - Notes: - Assume that if the matrix is compressed, there are at least one pair of - neighboring rows or columns. - Assumes x and y resolution are equal + Note: + The function holds UL coordinates (min_x and max_y) static, but LR coordinates + (max_x, min_y) may change to accommodate the resolution requested. """ - row_headers = matrix.get_row_headers() - if is_flattened_geospatial_matrix(matrix): - x_resolution, x_centers, y_centers = _get_map_resolution_headers_from_sites( - row_headers) - else: - # If getting from a map matrix, sites should not be compressed - x_centers = matrix.get_column_headers() - y_centers = row_headers - x_resolution = x_centers[1] - x_centers[0] - - if len(x_centers) <= 1: - raise Exception( - f"Matrix contains only {len(x_centers)} columns on the x-axis ") - if len(y_centers) <= 1: - raise Exception( - f"Matrix contains only {len(y_centers)} rows on the y-axis") - - return x_centers, y_centers, x_resolution + # Y upper coordinates, goes from North to south + num_rows = len(np.arange(max_y, min_y, -resolution)) + # X left coordinates + num_cols = len(np.arange(min_x, max_x, resolution)) + # Center coordinates + y_headers = [max_y - (j + .5) * resolution for j in range(num_rows)] + x_headers = [min_x + (i + .5) * resolution for i in range(num_cols)] + return x_headers, y_headers # ..................................................................................... -def get_extent_resolution_coords_from_matrix(matrix): - """Gets x and y extents and resolution of an uncompressed geospatial matrix. +def create_site_headers_from_extent(min_x, min_y, max_x, max_y, resolution): + """Gets header values for the y/0/site axis of a flattened geospatial matrix. Args: - matrix (lmpy.matrix.Matrix object): an input 2d geospatial matrix with - y centroids in row headers, x centroids in column headers OR - flattened geospatial matrix with site centroids in row headers. - - Returns: min_x (numeric): The minimum x value of the map extent. min_y (numeric): The minimum y value of the map extent. max_x (numeric): The maximum x value of the map extent. max_y (numeric): The maximum y value of the map extent. - x_res (numeric): The width of each matrix cell. - y_res (numeric): The height of each matrix cell. - height (numeric): the number of rows, axis 0, of the matrix - width (numeric): the number of columns, axis 1, of the matrix - """ - # Headers are coordinate centroids - x_centers, y_centers, resolution = get_coordinate_headers_resolution(matrix) + resolution (numeric): The size of each matrix cell. - # Extend to edges by 1/2 resolution - min_x = x_centers[0] - resolution/2.0 - min_y = y_centers[-1] - resolution/2.0 - max_x = x_centers[-1] + resolution/2.0 - max_y = y_centers[0] + resolution/2.0 + Returns: + site_headers: list of x and y center coordinates for row headers in a flattened + geospatial matrix + """ + x_headers, y_headers = _create_map_matrix_headers_from_extent( + min_x, min_y, max_x, max_y, resolution) - return min_x, min_y, max_x, max_y, resolution, x_centers, y_centers + site_headers = [] + fid = 1 + for y in y_headers: + for x in x_headers: + site_headers.append((fid, x, y)) + fid += 1 + return site_headers # ..................................................................................... -def _get_map_resolution_headers_from_sites(site_headers): - """Creates an empty 2-d matrix to use for mapping. - - Args: - site_headers (list of tuples): A list containing the site headers (siteid, x, y) - for a flattened geospatial matrix containing sites along the y/0 axis - - Returns: - x_headers: list of x center coordinates for column headers - y_headers: list of y center coordinates for row headers - - Notes: - If the matrix is compressed, with empty rows (sites) and columns removed, the - missing site coordinates will be filled in to the map matrix headers - so the map will represent the geospatial region without holes. If all sites - are missing from an edge (upper, lower, right, left boundary) of the area - of interest, those will not be retained, the extent of the map will be smaller. - """ - # Second and third sites along the upper boundary - first siteid could be 0 - site_2, x_2, y_2 = site_headers[1] - site_3, x_3, y_3 = site_headers[2] - # Find resolution in a regular or compressed matrix by dividing - # map distance by number of cells between sites - x_resolution = (x_3 - x_2)/(site_3 - site_2) - - # Extent of matrix, using centroid coordinate values (not cell edges) - minx = maxx = x_2 - miny = maxy = y_2 - for _, x, y in site_headers: - if x < minx: - minx = x - if x > maxx: - maxx = x - if y < miny: - miny = y - if y > maxy: - maxy = y - # Fill in any x or y centroids missing from the input site_headers/matrix - x_centers = list(np.arange(minx, (maxx + x_resolution), x_resolution)) - y_centers = list(np.arange(maxy, (miny - x_resolution), (x_resolution * -1))) - return x_resolution, x_centers, y_centers +def _create_site_headers_from_centroids(x_centers, y_centers): + site_headers = [] + fid = 1 + for y in y_centers: + for x in x_centers: + site_headers.append((fid, x, y)) + fid += 1 + return site_headers # ..................................................................................... -def _create_map_matrix_headers_from_extent(min_x, min_y, max_x, max_y, resolution): - """Gets header values for the x/1 and y/0 axis of a geospatial matrix. +def _create_site_headers_from_shapegrid(min_x, min_y, max_x, max_y, resolution): + """Gets header values for the y/0/site axis of a flattened geospatial matrix. Args: min_x (numeric): The minimum x value of the map extent. @@ -270,21 +201,19 @@ def _create_map_matrix_headers_from_extent(min_x, min_y, max_x, max_y, resolutio resolution (numeric): The size of each matrix cell. Returns: - x_headers: list of x center coordinates for column headers - y_headers: list of y center coordinates for row headers - - Note: - The function holds UL coordinates (min_x and max_y) static, but LR coordinates - (max_x, min_y) may change to accommodate the resolution requested. + site_headers: list of x and y center coordinates for row headers in a flattened + geospatial matrix """ - # Y upper coordinates, goes from North to south - num_rows = len(np.arange(max_y, min_y, -resolution)) - # X left coordinates - num_cols = len(np.arange(min_x, max_x, resolution)) - # Center coordinates - y_headers = [max_y - (j + .5) * resolution for j in range(num_rows)] - x_headers = [min_x + (i + .5) * resolution for i in range(num_cols)] - return x_headers, y_headers + x_headers, y_headers = _create_map_matrix_headers_from_extent( + min_x, min_y, max_x, max_y, resolution) + + site_headers = _create_site_headers_from_centroids(x_headers, y_headers) + fid = 1 + for y in y_headers: + for x in x_headers: + site_headers.append((fid, x, y)) + fid += 1 + return site_headers # ..................................................................................... @@ -339,6 +268,81 @@ def xy_to_rc_func(x, y): return xy_to_rc_func +# ..................................................................................... +def _get_site_for_x_y_func(resolution, x_centers, y_centers): + """Get a function to return a site/row index for an x, y. + + Args: + resolution (float): cell size for geospatial grid/matrix. + x_centers (list of float): x centroid coordinates geospatial grid/matrix. + y_centers (list of float): y centroid coordinates for a geospatial grid/matrix. + + Returns: + Method: A method to generate column/site index for an x, y. + + Note: + Parallels the construction of shapegrid in lmpy.data_preparation.build_grid + """ + num_rows = len(y_centers) + num_cols = len(x_centers) + # Upper left coordinate + max_y = y_centers[0] + (resolution / 2.0) + min_x = x_centers[0] - (resolution / 2.0) + + # ....................... + def xy_to_site_func(x, y): + """Get the row and column where the point is located. + + Args: + x (numeric): The x value to convert. + y (numeric): The y value to convert. + + Returns: + int: The row where the point is located. + """ + row_calc = int((max_y - y) // resolution) + col_calc = int((x - min_x) // resolution) + + out_of_range = False + if row_calc not in range(0, num_rows) or col_calc not in range(0, num_cols): + out_of_range = True + + if out_of_range: + site = -1 + else: + site = (col_calc * num_rows) + row_calc + + return site + + return xy_to_site_func + + +# ..................................................................................... +def _get_reader_report(reader): + try: + rdr_rpt = { + "type": "DWCA", + "file": reader.archive_filename, + "x_field": reader.x_term, + "y_field": reader.y_term, + "out_of_range": 0, + "count": 0} + if reader.geopoint_term is not None: + rdr_rpt["geopoint_field"] = reader.geopoint_term + except AttributeError: + rdr_rpt = { + "type": "CSV", + "file": reader.filename, + "x_field": reader.x_field, + "y_field": reader.y_field, + "out_of_range": 0, + "count": 0} + if reader.geopoint is not None: + rdr_rpt["geopoint_field"] = reader.geopoint + + return rdr_rpt + + # ..................................................................................... def create_point_heatmap_matrix( readers, min_x, min_y, max_x, max_y, resolution, logger=None): @@ -379,27 +383,7 @@ def create_point_heatmap_matrix( if not isinstance(readers, list): readers = [readers] for reader in readers: - try: - rdr_rpt = { - "type": "DWCA", - "file": reader.archive_filename, - "x_field": reader.x_term, - "y_field": reader.y_term, - "out_of_range": 0, - "count": 0} - if reader.geopoint_term is not None: - rdr_rpt["geopoint_field"] = reader.geopoint_term - except AttributeError: - rdr_rpt = { - "type": "CSV", - "file": reader.filename, - "x_field": reader.x_field, - "y_field": reader.y_field, - "out_of_range": 0, - "count": 0} - if reader.geopoint is not None: - rdr_rpt["geopoint_field"] = reader.geopoint - + rdr_rpt = _get_reader_report(reader) reader.open() for points in reader: for point in points: @@ -425,6 +409,117 @@ def create_point_heatmap_matrix( return heatmap, report +# ..................................................................................... +def create_point_heatmap_vector(readers, site_headers, data_label, logger=None): + """Create a point heatmap matrix. + + Args: + readers (PointReader or list of PointReader): A source of point data for + creating the heatmap. + site_headers (list of tuples): site headers for a flattened geospatial matrix. + data_label (str): species header for this data. + logger (lmpy.log.Logger): An optional local logger to use for logging output + with consistent options + + Returns: + Matrix: A 2-d integer matrix of rows/sites and one column of point density for + the data. + report: report of metadata about the process, inputs, outputs + """ + refname = "create_point_heatmap_vector" + resolution, x_centers, y_centers = Matrix.get_map_resolution_headers_from_sites( + site_headers) + report = { + data_label: { + "input_data": [], + "x_size": len(x_centers), + "y_size": len(y_centers), + "resolution": resolution + } + } + heatmap = _create_empty_map_1d_matrix_from_centroids( + x_centers, y_centers, int, data_label=data_label) + + xy_2_site = _get_site_for_x_y_func(resolution, x_centers, y_centers) + logit( + logger, "Created flattened geospatial matrix, with 2d resolution " + + f"{resolution}, width {len(x_centers)}, height {len(y_centers)}", + refname=refname) + + if not isinstance(readers, list): + readers = [readers] + for reader in readers: + rdr_rpt = _get_reader_report(reader) + + reader.open() + for points in reader: + for point in points: + site_idx = xy_2_site(point.x, point.y) + if site_idx == -1: + rdr_rpt["out_of_range"] += 1 + else: + heatmap[site_idx] += 1 + rdr_rpt["count"] += 1 + reader.close() + logit( + logger, f"Read {rdr_rpt['count']} points within extent, and " + + f"{rdr_rpt['out_of_range']} out of range, from {rdr_rpt['type']} " + + f"file {rdr_rpt['file']}.", refname=refname) + + report[data_label]["input_data"].append(rdr_rpt) + report["min_cell_point_count"] = int(heatmap.min()) + report["max_cell_point_count"] = int(heatmap.max()) + logit( + logger, "Populated map vector with point counts for each cell ranging from " + + f"{report['min_cell_point_count']} to {report['max_cell_point_count']}", + refname=refname) + return heatmap, report + + +# ..................................................................................... +def create_point_pa_vector( + readers, site_headers, data_label, min_points=1, logger=None): + """Create a point heatmap matrix. + + Args: + readers (PointReader or list of PointReader): A source of point data for + creating the heatmap. + site_headers (list of tuples): site headers for a flattened geospatial matrix. + data_label (str): species header for this data. + min_points (int): minimum number of points to be considered present. + logger (lmpy.log.Logger): An optional local logger to use for logging output + with consistent options + + Returns: + Matrix: A 2-d boolean matrix of multiple rows/sites and one column of + presence/absence for the data. + report: report of metadata about the process, inputs, outputs + """ + heatmap, report = create_point_heatmap_vector( + readers, site_headers, data_label, logger=logger) + pav = Matrix( + np.where(heatmap >= min_points, True, False), headers=heatmap.get_headers()) + report["minimum_point_count"] = min_points + return pav, report + + +# ..................................................................................... +def create_point_pa_vector_from_vector(heatmap, min_points=1): + """Create a point heatmap matrix. + + Args: + heatmap (list of tuples): site headers for a flattened geospatial matrix. + min_points (int): minimum number of points to be considered present. + + Returns: + Matrix: A 2-d boolean matrix of multiple rows/sites and one column of + presence/absence for the data. + """ + pav = Matrix( + np.where(heatmap >= min_points, True, False), headers=heatmap.get_headers()) + return pav + + # ..................................................................................... def _get_geotransform(min_x, min_y, max_x, max_y, resolution): """Get the geotransform (described below) required for GDAL for the output raster. @@ -455,6 +550,83 @@ def _get_geotransform(min_x, min_y, max_x, max_y, resolution): return geotransform +# ................................................................................... +def _get_xy_info_from_geo_matrix( + matrix, columns=None, is_pam=False, nodata=-9999, logger=None): + refname = "_get_xy_matrix_from_geo_matrix" + if not matrix.is_geospatial(): + raise Exception("Matrix is not geospatial; cannot be converted to a raster") + # 0 axis represents x,y cell centroids, 1 axis represents a species or statistic + elif matrix.is_flattened_geospatial(): + column_headers = matrix.get_column_headers() + if columns is not None: + for col in columns: + if col not in column_headers: + raise Exception( + f"Column {col} is not present in matrix columns") + else: + columns = column_headers + + # Get geotransform elements and datatype from input matrix, flattened or not + (min_x, min_y, max_x, max_y, resolution, x_centers, + y_centers) = matrix.get_extent_resolution_coords() + logit( + logger, f"Found bounding box {min_x}, {min_y}, {max_x}, {max_y} for matrix", + refname=refname, log_level=logging.DEBUG) + # TODO: handle differing x and y resolutions + rst_type, _ = _get_osgeo_type(matrix, is_pam, is_raster=True) + # Modify the nodata value to fit within a byte + if rst_type == gdal.GDT_Byte: + nodata = 255 + report = { + "min_x": min_x, + "min_y": min_y, + "max_x": max_x, + "max_y": max_y, + "resolution": resolution, + "height": len(y_centers), + "width": len(x_centers), + "nodata": nodata, + "matrix_type": str(matrix.dtype) + } + return x_centers, y_centers, columns, report + + +# ................................................................................... +def create_2d_geospatial_matrix_from_sites_vector( + matrix, column=None, is_pam=False, nodata=-9999, logger=None): + """Create a 2d geospatial matrix from one column in a 2d geospatial matrix. + + Args: + matrix (lmpy.matrix.Matrix object): an input flattened geospatial matrix with + site centroids in row headers. + column (str): header of the columns to fill the new matrix. If None, + the first column will be used. + is_pam (bool): If true, input matrix is binary, and output raster will be + written with values stored as bytes + nodata (numeric): value for nodata in the input matrix and output vector + logger (lmpy.log.Logger): An optional local logger to use for logging output + with consistent options + + Returns: + report (dict): summary dictionary of inputs and outputs. + """ + x_centers, y_centers, columns, report = _get_xy_info_from_geo_matrix( + matrix, columns=None, is_pam=False, nodata=-9999, logger=None) + report["height"] = len(y_centers) + report["width"] = len(x_centers) + report["nodata"] = nodata + report["matrix_type"] = str(matrix.dtype) + report["column"] = column + + empty_map_mtx = _create_empty_map_matrix_from_centroids( + x_centers, y_centers, matrix.dtype) + col_map_mtx = _fill_map_matrix_with_column( + matrix, column, empty_map_mtx, is_pam=is_pam, nodata=nodata) + + return col_map_mtx, report + + # ................................................................................... def rasterize_geospatial_matrix( matrix, out_raster_filename, columns=None, is_pam=False, nodata=-9999, @@ -480,10 +652,10 @@ def rasterize_geospatial_matrix( Exception: on GDAL raster dataset creation """ refname = "rasterize_geospatial_matrix" - if not is_geospatial_matrix(matrix): + if not matrix.is_geospatial(): raise Exception("Matrix is not geospatial; cannot be converted to a raster") # 0 axis represents x,y cell centroids, 1 axis represents a species or statistic - elif is_flattened_geospatial_matrix(matrix): + elif matrix.is_flattened_geospatial(): is_flattened = True column_headers = matrix.get_column_headers() if columns is not None: @@ -501,7 +673,7 @@ def rasterize_geospatial_matrix( # Get geotransform elements and datatype from input matrix, flattened or not (min_x, min_y, max_x, max_y, resolution, x_centers, - y_centers) = get_extent_resolution_coords_from_matrix(matrix) + y_centers) = matrix.get_extent_resolution_coords() logit( logger, f"Found bounding box {min_x}, {min_y}, {max_x}, {max_y} for matrix", refname=refname, log_level=logging.DEBUG) @@ -521,7 +693,128 @@ def rasterize_geospatial_matrix( "width": len(x_centers), "nodata": nodata, "raster_data_type": rst_type_str, - "matrix_type": str(matrix.dtype) + "matrix_type": str(matrix.dtype), + "band_count": band_count, + "out_raster_filename": out_raster_filename + } + + # Create raster dataset + driver = gdal.GetDriverByName("GTiff") + try: + out_ds = driver.Create( + out_raster_filename, len(x_centers), len(y_centers), band_count, rst_type) + out_ds.SetGeoTransform(geotransform) + except Exception as e: + logit( + logger, f"Exception in GDAL function {e}", refname=refname, + log_level=logging.ERROR) + raise + else: + # band indexes start at 1 + band_idx = 1 + # Create band for entire matrix + if not is_flattened: + out_band = out_ds.GetRasterBand(band_idx) + out_band.WriteArray(matrix, 0, 0) + out_band.FlushCache() + out_band.ComputeStatistics(False) + logit( + logger, f"Added band {band_idx}", refname=refname, + log_level=logging.INFO) + else: + # Create band for each column + for col in columns: + empty_map_mtx = _create_empty_map_matrix_from_centroids( + x_centers, y_centers, matrix.dtype) + col_map_mtx = _fill_map_matrix_with_column( + matrix, col, empty_map_mtx, is_pam=is_pam, nodata=nodata) + out_band = out_ds.GetRasterBand(band_idx) + out_band.WriteArray(col_map_mtx, 0, 0) + out_band.FlushCache() + out_band.ComputeStatistics(False) + # Add band/column to metadata and report + out_band.SetMetadata(f"{col}") + # out_band.SetMetadata({f"band {band_idx}": f"{col}"}) + logit( + logger, f"Added band {band_idx} ({col})", refname=refname, + log_level=logging.INFO) + report[f"band {band_idx}"] = col + band_idx += 1 + logit( + logger, f"Wrote raster with {len(columns)} bands to {out_raster_filename}", + refname=refname, log_level=logging.INFO) + return report + + +# ................................................................................... +def rasterize_geospatial_matrix_old( + matrix, out_raster_filename, columns=None, is_pam=False, nodata=-9999, + logger=None): + """Create a geotiff raster file from one or all columns in a 2d geospatial matrix. + + Args: + matrix (lmpy.matrix.Matrix object): an input flattened geospatial matrix with + site centroids in row headers. + out_raster_filename: output filename. + columns (list of str): headers of the columns to be included as bands. If None, + all columns will be included as bands. + is_pam (bool): If true, input matrix is binary, and output raster will be + written with values stored as bytes + nodata (numeric): value for cells with no data in them + logger (lmpy.log.Logger): An optional local logger to use for logging output + with consistent options + + Returns: + report (dict): summary dictionary of inputs and outputs. + + Raises: + Exception: on GDAL raster dataset creation + """ + refname = "rasterize_geospatial_matrix" + if not matrix.is_geospatial(): + raise Exception("Matrix is not geospatial; cannot be converted to a raster") + # 0 axis represents x,y cell centroids, 1 axis represents a species or statistic + elif matrix.is_flattened_geospatial(): + is_flattened = True + column_headers = matrix.get_column_headers() + if columns is not None: + for col in columns: + if col not in column_headers: + raise Exception( + f"Column {col} is not present in matrix columns") + else: + columns = column_headers + band_count = len(columns) + else: + # 0 axis represents y coordinates, 1 axis represents x coordinates + is_flattened = False + band_count = 1 + + # Get geotransform elements and datatype from input matrix, flattened or not + (min_x, min_y, max_x, max_y, resolution, x_centers, + y_centers) = matrix.get_extent_resolution_coords() + logit( + logger, f"Found bounding box {min_x}, {min_y}, {max_x}, {max_y} for matrix", + refname=refname, log_level=logging.DEBUG) + # TODO: handle differing x and y resolutions + geotransform = _get_geotransform(min_x, min_y, max_x, max_y, resolution) + rst_type, rst_type_str = _get_osgeo_type(matrix, is_pam, is_raster=True) + # Modify the nodata value to fit within a byte + if rst_type == gdal.GDT_Byte: + nodata = 255 + report = { + "min_x": min_x, + "min_y": min_y, + "max_x": max_x, + "max_y": max_y, + "resolution": resolution, + "height": len(y_centers), + "width": len(x_centers), + "nodata": nodata, + "raster_data_type": rst_type_str, + "matrix_type": str(matrix.dtype), + "band_count": band_count, + "out_raster_filename": out_raster_filename } # Create raster dataset @@ -559,7 +852,8 @@ def rasterize_geospatial_matrix( out_band.FlushCache() out_band.ComputeStatistics(False) # Add band/column to metadata and report - out_band.SetMetadata({f"band {band_idx}": f"{col}"}) + out_band.SetMetadata(f"{col}") + # out_band.SetMetadata({f"band {band_idx}": f"{col}"}) logit( logger, f"Added band {band_idx} ({col})", refname=refname, log_level=logging.INFO) @@ -573,10 +867,11 @@ def rasterize_geospatial_matrix( # ................................................................................... def _get_osgeo_type(matrix, is_pam, is_raster=True): - if is_pam is True or matrix.dtype in (np.byte, np.intc, np.uintc, np.int_, np.uint): + if (is_pam is True or + matrix.dtype in (np.byte, np.bool, np.intc, np.uintc, np.int_, np.uint)): data_type_str = "ogr.OFTInteger" if is_raster: - if matrix.dtype == np.byte: + if matrix.dtype in (np.byte, np.bool): data_type_str = "gdal.GDT_Byte" else: data_type_str = "gdal.GDT_Int32" @@ -615,7 +910,7 @@ def rasterize_map_matrices(map_matrix_dict, out_raster_filename, logger=None): stat_names = list(map_matrix_dict.keys()) mmtx = map_matrix_dict[stat_names[0]] (min_x, min_y, max_x, max_y, resolution, x_centers, - y_centers) = get_extent_resolution_coords_from_matrix(mmtx) + y_centers) = mmtx.get_extent_resolution_coords() geotransform = _get_geotransform(min_x, min_y, max_x, max_y, resolution) if mmtx.dtype == np.float32: arr_type = gdal.GDT_Float32 @@ -687,13 +982,20 @@ def _fill_map_matrix_with_column( y_centers = map_matrix.get_row_headers() x_centers = map_matrix.get_column_headers() - # Get index of column of interest - orig_col_idx = matrix.get_column_headers().index(col_header) site_headers = matrix.get_row_headers() - # Fill matrix with value for each site in the column - for orig_row_idx, (_, x, y) in enumerate(site_headers): + orig_width = len(matrix.get_column_headers()) + + # Get column of interest + if orig_width == 1: + column = matrix + else: + orig_col_idx = matrix.get_column_headers().index(col_header) + column = matrix[:, orig_col_idx] + + # Fill new matrix with value for each site in the column + for orig_row_idx, (_siteid, x, y) in enumerate(site_headers): # Find the site column value in the original matrix - site_val = matrix[orig_row_idx, orig_col_idx] + site_val = column[orig_row_idx] # Find the x and y coordinates in the map_matrix col = x_centers.index(x) row = y_centers.index(y) @@ -731,7 +1033,7 @@ def _create_map_matrix_for_column(matrix, col_header, is_pam=False, nodata=-9999 map_mtx = _create_empty_map_matrix_from_matrix(matrix) num_cols = map_mtx.shape[1] (min_x, min_y, max_x, max_y, resolution, x_centers, - y_centers) = get_extent_resolution_coords_from_matrix(matrix) + y_centers) = matrix.get_extent_resolution_coords() report = { "min_x": min_x, "min_y": min_y, @@ -829,15 +1131,15 @@ def vectorize_geospatial_matrix( Exception: on non-flattened geospatial matrix """ refname = "vectorize_map_matrix" - if not is_geospatial_matrix(matrix): + if not matrix.is_geospatial(): raise Exception("Matrix is not geospatial; cannot be converted to a shapefile") - if not is_flattened_geospatial_matrix(matrix): + if not matrix.is_flattened_geospatial(): raise Exception("Conversion to shapefile of map matrix is unsupported") ogr_type, ogr_type_str = _get_osgeo_type(matrix, is_pam, is_raster=False) (min_x, min_y, max_x, max_y, resolution, x_centers, - y_centers) = get_extent_resolution_coords_from_matrix(matrix) + y_centers) = matrix.get_extent_resolution_coords() # Standard fields fields = [("site_id", ogr.OFTInteger), ("x", ogr.OFTReal), ("y", ogr.OFTReal)] @@ -927,8 +1229,7 @@ def vectorize_geospatial_matrix( # ..................................................................................... __all__ = [ "create_point_heatmap_matrix", - "get_coordinate_headers_resolution", - "get_extent_resolution_coords_from_matrix", - "is_flattened_geospatial_matrix", + "create_point_pa_vector", + "create_site_headers_from_extent", "rasterize_map_matrices" ] diff --git a/lmpy/statistics/pam_stats.py b/lmpy/statistics/pam_stats.py index b3c9737c2..d973bd45f 100644 --- a/lmpy/statistics/pam_stats.py +++ b/lmpy/statistics/pam_stats.py @@ -601,7 +601,7 @@ def calculate_diversity_statistics(self): f"resulting in {str(diversity_stat_vals)}", refname=self.__class__.__name__) diversity_matrix = Matrix( - np.array([func(self.pam) for _, func in self.diversity_stats]), + np.array(diversity_stat_vals), headers={'0': ['value'], '1': diversity_stat_names}, ) self._report["Diversity"] = diversity_matrix.get_report() diff --git a/lmpy/tools/convert_lmm_to_csv.py b/lmpy/tools/convert_lmm_to_csv.py index 9797e1e85..542cc4654 100644 --- a/lmpy/tools/convert_lmm_to_csv.py +++ b/lmpy/tools/convert_lmm_to_csv.py @@ -125,7 +125,8 @@ def cli(): "Apple Numbers (1,000,000 rows)", log_level=logging.WARNING, refname=script_name) - convert_lmm_to_csv(mtx, args.out_csv_filename) + # convert_lmm_to_csv(mtx, args.out_csv_filename) + mtx.write_csv(args.out_csv_filename) logger.log( f"Wrote matrix {args.in_lmm_filename} to CSV file {args.out_csv_filename}", diff --git a/lmpy/tools/convert_lmm_to_raster.py b/lmpy/tools/convert_lmm_to_raster.py index c3282b58b..cd404fb2a 100644 --- a/lmpy/tools/convert_lmm_to_raster.py +++ b/lmpy/tools/convert_lmm_to_raster.py @@ -6,8 +6,7 @@ from lmpy.log import Logger from lmpy.matrix import Matrix -from lmpy.spatial.map import ( - is_flattened_geospatial_matrix, rasterize_geospatial_matrix) +from lmpy.spatial.map import (rasterize_geospatial_matrix) from lmpy.statistics.pam_stats import PamStats from lmpy.tools._config_parser import _process_arguments, test_files @@ -158,7 +157,7 @@ def cli(): mtx = Matrix.load(args.in_lmm_filename) columns = args.column - if is_flattened_geospatial_matrix(mtx): + if mtx.is_flattened_geospatial(): column_headers = mtx.get_column_headers() if args.column is None: columns = column_headers diff --git a/tests/test_statistics/test_pam_stats.py b/tests/test_statistics/test_pam_stats.py index 5a8c848d5..a3d178071 100644 --- a/tests/test_statistics/test_pam_stats.py +++ b/tests/test_statistics/test_pam_stats.py @@ -107,10 +107,10 @@ class Test_Metrics: def test_covariance_metrics(self): """Test the covariance metrics.""" pam, _ = get_random_pam_and_tree(10, 20, 0.3, 1.0) - sigma_sites = stats.sigma_sites(pam) + sigma_sites, _ = stats.sigma_sites(pam) assert sigma_sites.shape == (20, 20) - sigma_species = stats.sigma_species(pam) + sigma_species, _ = stats.sigma_species(pam) assert sigma_species.shape == (10, 10) # ............................