Skip to content

Commit

Permalink
unit tests and linting
Browse files Browse the repository at this point in the history
  • Loading branch information
tkchafin committed Dec 11, 2023
1 parent 12cade7 commit d7e0e3f
Show file tree
Hide file tree
Showing 22 changed files with 726 additions and 842 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ General options:
-c, --clusterpop: Use DBSCAN algorithm to automatically cluster populations
--reachid_col : Attribute name representing primary key in shapefile [default="HYRIV_ID"]
--length_col : Attribute name giving length in kilometers [default="LENGTH_KM"]
--seed : Seed for RNG
Genetic distance options:
-d, --dist : Use which metric of distance? Options:
Expand Down
Binary file modified autostreamtree/data/test.clusteredPoints.pdf
Binary file not shown.
Binary file modified autostreamtree/data/test.isolationByDistance.pdf
Binary file not shown.
Binary file modified autostreamtree/data/test.obsVersusFittedD.pdf
Binary file not shown.
Binary file modified autostreamtree/data/test.snapDistances.pdf
Binary file not shown.
Binary file modified autostreamtree/data/test.streamTree.gpkg
Binary file not shown.
Binary file modified autostreamtree/data/test.streamsByEdgeID.pdf
Binary file not shown.
Binary file modified autostreamtree/data/test.streamsByFittedD.pdf
Binary file not shown.
Binary file modified autostreamtree/data/test.subGraph.pdf
Binary file not shown.
Binary file not shown.
741 changes: 0 additions & 741 deletions autostreamtree/examples/plots/example.incidenceMatrix.txt

This file was deleted.

40 changes: 0 additions & 40 deletions autostreamtree/examples/plots/example.leastSquaresConstrained.txt

This file was deleted.

Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file removed autostreamtree/examples/plots/example.snapDist.png
Binary file not shown.
Binary file removed autostreamtree/examples/plots/example.subGraph.pdf
Binary file not shown.
Binary file removed autostreamtree/examples/plots/example.subGraph.png
Binary file not shown.
74 changes: 19 additions & 55 deletions autostreamtree/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import matplotlib.pyplot as plt
import pickle
import mantel
import geopandas as gpd
from math import radians, sin, cos, acos

import autostreamtree.cluster_pops as clust
Expand Down Expand Up @@ -880,7 +881,7 @@ def output_fitted_d(pred, out):
del pred


def fit_least_squares_distances(D, X, iterative, out, weight=None):
def fit_least_squares_distances(D, X, iterative, out, weight="CSE67"):
"""
Computes least-squares branch lengths from a vector of genetic distances D
and incidence matrix X. When iterative=True, negative distances are
Expand All @@ -892,9 +893,7 @@ def fit_least_squares_distances(D, X, iterative, out, weight=None):
iterative (bool): Whether to use an iterative approach to constrain
negative distances.
out (str): Output file prefix.
weight (numpy.ndarray, optional): Weight matrix for weighted least
squares optimization. Defaults to
None.
weight: Weight type. Defaults to CSE67.
Returns:
numpy.ndarray: Least-squared optimized distances.
Expand Down Expand Up @@ -966,15 +965,20 @@ def generate_weights_matrix(d, weight):
W[row, col] = np.ones(len(d))
elif weight.upper() == "BEYER74":
if np.count_nonzero(d == 0) > 0:
print("WARNING: Divide-by-zero in weighted least-squares",
"(weight=1/D).")
W[row, col] = np.divide(1.0, d, out=np.zeros_like(d), where=d != 0)
print(
"WARNING: Divide-by-zero in weighted least-squares."
)
W[row, col] = np.divide(1.0, d, out=np.zeros_like(d, dtype=float),
where=d != 0)

elif weight.upper() == "FM67":
if np.count_nonzero(d == 0) > 0:
print("WARNING: Divide-by-zero in weighted least-squares",
"(weight=1/D^2).")
W[row, col] = np.divide(1.0, np.square(d),
out=np.zeros_like(d), where=d != 0)
print(
"WARNING: Divide-by-zero in weighted least-squares."
)
W[row, col] = np.divide(1.0, np.square(d), out=np.zeros_like(d,
dtype=float), where=d != 0)

else:
print(f"ERROR: Weight option {weight} not recognized. Using ordinary",
"least-squares instead.")
Expand Down Expand Up @@ -1222,49 +1226,6 @@ def extract_minimal_subgraph(subgraph, graph, nodelist, id_col, len_col, path):
continue


def extract_minimal_existing(subgraph, graph, nodelist, id_col, dist_col,
path):
"""
Extracts a simplified subgraph from paths, keeping only terminal and
junction nodes.
Args:
subgraph (NetworkX Graph): The subgraph to be modified.
graph (NetworkX Graph): The input graph.
nodelist (list): The list of nodes.
id_col (str): The column name for edge ID.
dist_col (str): Column name for dist attribute
path (list): The path between nodes.
"""
curr_edge = {id_col: list(), dist_col: 0}
curr_start = None

# Iterate through each pair of nodes in the path
for first, second in zip(path, path[1:]):
if not curr_start:
curr_start = first
if first in nodelist or len(graph[first]) > 2:
subgraph.add_node(first)

# Add path attributes to current edge
dat = graph.get_edge_data(first, second)
curr_edge[id_col] = dat[id_col]
curr_edge[dist_col] = dat[dist_col]

# If the second node is a STOP node (in nodelist or is a junction)
if second in nodelist or len(graph[second]) > 2:
# Add node to subgraph
subgraph.add_node(second)
# Link current attribute data
subgraph.add_edge(curr_start, second, **curr_edge)
# Empty edge attributes and set current second to curr_start
curr_edge = {id_col: list(), dist_col: 0}
curr_start = second
else:
# Otherwise, continue building the current edge
continue


def snap_to_node(graph, pos):
"""
Finds the closest node to the given [x, y] coordinates in the graph.
Expand All @@ -1282,15 +1243,18 @@ def snap_to_node(graph, pos):


def write_geodataframe(gdf, output_prefix, output_driver):
gpd.options.io_engine = "pyogrio"
extension = {
"SHP": ".shp",
"GPKG": ".gpkg",
"GDB": ".gdb"
}.get(output_driver.upper(), ".gpkg") # Default to .gpkg
if output_driver.upper() == "SHP":
output_driver = "ESRI Shapefile"

output_path = f"{output_prefix}{extension}"

if output_driver == 'GDB' and not os.path.exists(output_path):
os.makedirs(output_path)

gdf.to_file(output_path, driver=output_driver)
gdf.to_file(output_path, driver=output_driver.upper())
3 changes: 1 addition & 2 deletions conda-recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,12 @@ requirements:
- numpy
- pandas >2
- matplotlib
- scikit-learn
- seaborn
- geopandas
- pyogrio
- sortedcontainers
- momepy
- networkx >3
- pyogrio
- mantel
- pysam
- scikit-learn >1
Expand Down
181 changes: 181 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
import os
import sys
import pytest
import tempfile
import subprocess
import autostreamtree
import autostreamtree.cli as cli


@pytest.fixture
def output_directory_path():
# Create a temporary directory
with tempfile.TemporaryDirectory() as temp_dir:
yield temp_dir


@pytest.fixture
def shapefile_path():
base_path = os.path.dirname(autostreamtree.__file__)
file_path = os.path.join(base_path, 'data', 'test.shp')
return file_path


@pytest.fixture
def coords_file_path():
base_path = os.path.dirname(autostreamtree.__file__)
file_path = os.path.join(base_path, 'data', 'test.coords')
return file_path


@pytest.fixture
def vcf_file_path():
base_path = os.path.dirname(autostreamtree.__file__)
file_path = os.path.join(base_path, 'data', 'test_sub100.vcf.gz')
return file_path


@pytest.fixture
def popmap_file_path():
base_path = os.path.dirname(autostreamtree.__file__)
file_path = os.path.join(base_path, 'data', 'test.popmap')
return file_path


def test_autostreamtree_outputs(shapefile_path, coords_file_path,
vcf_file_path, popmap_file_path):
expected_files = [
"clusterCentroids.txt", "clusteredPoints.pdf", "full.network",
"incidenceMatrix.txt", "isolationByDistance.pdf",
"isolationByDistance.txt", "leastSquaresConstrained.txt", "network",
"obsVersusFittedD.pdf", "obsVersusFittedD.txt", "pointCoords.txt",
"popGenDistMat.txt", "reachToEdgeTable.txt", "snapDistances.pdf",
"snapDistances.txt", "streamDistMat.txt", "streamTree.gpkg",
"streamTree.txt", "streamsByEdgeID.pdf", "streamsByFittedD.pdf",
"subGraph.pdf"
]

with tempfile.TemporaryDirectory() as temp_dir:
output_prefix = os.path.join(temp_dir, "test")
args = [
"autostreamtree", "-s", shapefile_path, "-i", coords_file_path,
"-v", vcf_file_path, "-p", popmap_file_path, "-r", "ALL",
"--reachid_col", "HYRIV_ID", "--length_col", "LENGTH_KM",
"-o", output_prefix, "--seed", "1234", "--gdf_out", "GPKG"
]

# Temporarily replace sys.argv
original_argv = sys.argv
sys.argv = args

# Call the main function directly
cli.main()

# Restore original sys.argv
sys.argv = original_argv

# Check for the existence of each expected output file
for file_suffix in expected_files:
assert os.path.exists(f"{output_prefix}.{file_suffix}")


def test_autostreamtree_output_shp(shapefile_path, coords_file_path,
vcf_file_path, popmap_file_path):
expected_files = [
"clusterCentroids.txt", "clusteredPoints.pdf", "full.network",
"incidenceMatrix.txt", "isolationByDistance.pdf",
"isolationByDistance.txt", "leastSquaresConstrained.txt", "network",
"obsVersusFittedD.pdf", "obsVersusFittedD.txt", "pointCoords.txt",
"popGenDistMat.txt", "reachToEdgeTable.txt", "snapDistances.pdf",
"snapDistances.txt", "streamDistMat.txt", "streamTree.shp",
"streamTree.txt", "streamsByEdgeID.pdf", "streamsByFittedD.pdf",
"subGraph.pdf"
]

with tempfile.TemporaryDirectory() as temp_dir:
output_prefix = os.path.join(temp_dir, "test")
args = [
"autostreamtree", "-s", shapefile_path, "-i", coords_file_path,
"-v", vcf_file_path, "-p", popmap_file_path, "-r", "ALL",
"--reachid_col", "HYRIV_ID", "--length_col", "LENGTH_KM",
"-o", output_prefix, "--seed", "1234", "--gdf_out", "SHP"
]

# Temporarily replace sys.argv
original_argv = sys.argv
sys.argv = args

# Call the main function directly
cli.main()

# Restore original sys.argv
sys.argv = original_argv

# Check for the existence of each expected output file
for file_suffix in expected_files:
assert os.path.exists(f"{output_prefix}.{file_suffix}")


def test_autostreamtree_outputs_gendist(shapefile_path, coords_file_path,
vcf_file_path, popmap_file_path):
expected_files = [
"popGenDistMat.txt"
]

with tempfile.TemporaryDirectory() as temp_dir:
output_prefix = os.path.join(temp_dir, "test")
args = [
"autostreamtree", "-s", shapefile_path, "-i", coords_file_path,
"-v", vcf_file_path, "-p", popmap_file_path, "-r", "ALL",
"--reachid_col", "HYRIV_ID", "--length_col", "LENGTH_KM",
"-o", output_prefix, "--seed", "1234", "-r", "GENDIST"
]

# Temporarily replace sys.argv
original_argv = sys.argv
sys.argv = args

# Call the main function directly and catch SystemExit
with pytest.raises(SystemExit) as exit_info:
cli.main()
assert exit_info.value.code == 0, "SystemExit with non-zero exit code"

# Restore original sys.argv
sys.argv = original_argv

# Check for the existence of each expected output file
for file_suffix in expected_files:
assert os.path.exists(f"{output_prefix}.{file_suffix}")


def test_autostreamtree_outputs_sdist(shapefile_path, coords_file_path,
vcf_file_path, popmap_file_path):
expected_files = [
"full.network", "network", "snapDistances.pdf",
"snapDistances.txt", "streamDistMat.txt", "subGraph.pdf"
]

with tempfile.TemporaryDirectory() as temp_dir:
output_prefix = os.path.join(temp_dir, "test")
args = [
"autostreamtree", "-s", shapefile_path, "-i", coords_file_path,
"-v", vcf_file_path, "-p", popmap_file_path, "-r", "ALL",
"--reachid_col", "HYRIV_ID", "--length_col", "LENGTH_KM",
"-o", output_prefix, "--seed", "1234", "-r", "STREAMDIST"
]

# Temporarily replace sys.argv
original_argv = sys.argv
sys.argv = args

# Call the main function directly and catch SystemExit
with pytest.raises(SystemExit) as exit_info:
cli.main()
assert exit_info.value.code == 0, "SystemExit with non-zero exit code"

# Restore original sys.argv
sys.argv = original_argv

# Check for the existence of each expected output file
for file_suffix in expected_files:
assert os.path.exists(f"{output_prefix}.{file_suffix}")
Loading

0 comments on commit d7e0e3f

Please sign in to comment.