SynBioDex · rkrishnasanka · Mar 27, 2023 · Mar 30, 2023 · Mar 30, 2023 · Mar 30, 2023
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,3 +1,48 @@
+[tool.poetry]
+name = "sbol_utilities"
+description = "SBOL utilities"
+version = "1.0a17"
+readme = "README.md"
+authors = ["Your Name <[email protected]>"]
+
+[tool.poetry.dependencies]
+python = "^3.7"
+sbol3 = "^1.0b11"
+sbol2 = "^1.4"
+rdflib = "^6.2"
+biopython = "^1.79"
+graphviz = "^0.17"
+tyto = "^1.4"
+openpyxl = "^3.0"
+sbol_factory = "^1.0a11"
+
+[tool.poetry.dev-dependencies]
+pytest = "^6.0"
+interrogate = "^1.5"
+
+[tool.poetry.scripts]
+excel-to-sbol = "sbol_utilities.excel_to_sbol:main"
+graph-sbol = "sbol_utilities.graph_sbol:main"
+sbol-expand-derivations = "sbol_utilities.expand_combinatorial_derivations:main"
+sbol-calculate-sequences = "sbol_utilities.calculate_sequences:main"
+sbol-converter = "sbol_utilities.conversion:main"
+sbol2-to-sbol3 = "sbol_utilities.conversion:sbol2to3"
+sbol3-to-sbol2 = "sbol_utilities.conversion:sbol3to2"
+sbol-to-genbank = "sbol_utilities.conversion:sbol2genbank"
+sbol-to-fasta = "sbol_utilities.conversion:sbol2fasta"
+genbank-to-sbol = "sbol_utilities.conversion:genbank2sbol"
+fasta-to-sbol = "sbol_utilities.conversion:fasta2sbol"
+sbol-diff = "sbol_utilities.sbol_diff:main"
+
+[tool.poetry.extras]
+dev = ["pytest", "interrogate"]
+
+
+[build-system]
+requires = ["poetry-core>=1.1.0"]
+build-backend = "poetry.core.masonry.api"
+
+
 [tool.interrogate]
 
 # custom docstring conventions

diff --git a/sbol_utilities/excel_cell_selector.py b/sbol_utilities/excel_cell_selector.py
@@ -0,0 +1,83 @@
+from typing import List, Tuple
+import re
+
+CELL_SELECTOR_REGEX = r'((\w+)!)(\$[A-Z]+\$[\d]+):(\$[A-Z]+\$[\d]+)'
+CELL_LOCATION_REGEX = r'\$?(\w+)\$?(\d+)'
+
+def col_num_to_index(col_num):
+    """Converts a base-26 column number to a zero-based column index."""
+    col_idx = 0
+    for i, c in enumerate(reversed(col_num)):
+        col_idx += (ord(c) - ord('A') + 1) * (26 ** i)
+    return col_idx - 1
+
+def col_index_to_num(col_idx):
+    """Converts a zero-based column index to a base-26 column number."""
+    col_num = ''
+    while col_idx >= 0:
+        col_num = chr((col_idx % 26) + ord('A')) + col_num
+        col_idx = col_idx // 26 - 1
+    return col_num
+
+def get_area_enumeration(start_cell: str, end_cell: str) -> List[str]:
+    """Generates a list of cells from the start cell to the end cell.
+
+    Note this assumes a contiguous area of cells and the width and height of the area is not greater than 26.
+
+    Args:
+        start_cell (str): Start cell in the format of A1, $A1, A$1, or $A$1
+        end_cell (str): End cell in the format of A1, $A1, A$1, or $A$1
+
+    Raises:
+        ValueError: If the start or end cell is invalid.
+
+    Returns:
+        List[str]: A list of cells from the start cell to the end cell.
+    """    
+    ret = []
+
+    # Parse the start and end cells
+    start_cell_match = re.match(CELL_LOCATION_REGEX, start_cell)
+    end_cell_match = re.match(CELL_LOCATION_REGEX, end_cell)
+
+    if start_cell_match is None or end_cell_match is None:
+        raise ValueError(f'Invalid start/end cell: {start_cell}, {end_cell}')
+
+    start_col = col_num_to_index(start_cell_match[1])
+    start_row = int(start_cell_match[2])
+    end_col = col_num_to_index(end_cell_match[1])
+    end_row = int(end_cell_match[2])
+
+    # Enumerate the cells
+    for row in range(start_row, end_row + 1):
+        for col in range(start_col, end_col + 1):
+            ret.append(f'{col_index_to_num(col)}{row}')
+
+    return ret
+
+def get_selection(cell_selector_query: str) -> List[Tuple[str, str]]:
+    """Returns a selector that can be used to return the cells from the Excel sheet.
+
+    Args:
+        cell_selector_query (str): The query to use to select the cells.
+
+    Returns:
+        List[str]: Selector strings for the cells to return.
+    """
+    ret = []
+
+    # Perform the regex query
+    matches = re.findall(CELL_SELECTOR_REGEX, cell_selector_query)
+
+    if matches is None:
+        raise ValueError(f'Invalid cell selector query: {cell_selector_query}')
+
+    for match in matches:
+        sheet_name = match[1]
+        start_cell = match[2]
+        end_cell = match[3]
+        area_locations = get_area_enumeration(start_cell, end_cell)
+        for area_location in area_locations:
+            ret.append((sheet_name, area_location))
+
+    return ret
diff --git a/sbol_utilities/excel_to_sbol.py b/sbol_utilities/excel_to_sbol.py
@@ -1,3 +1,4 @@
+from typing import Dict, Optional
 import unicodedata
 import warnings
 import logging
@@ -17,7 +18,7 @@
 FINAL_PRODUCTS_COLLECTION = 'FinalProducts'
 
 
-def expand_configuration(values: dict) -> dict:
+def expand_configuration(values: Optional[Dict] = None) -> dict:
     """
     Initialize sheet configuration dictionary
     :param values: Dictionary of overrides for defaults
@@ -91,11 +92,21 @@ def read_metadata(wb: openpyxl.Workbook, doc: sbol3.Document, config: dict):
         cp_name = bp_name
         cp_description = bp_description
 
+    if bp_name is None:
+        raise ValueError('Basic parts collection name is required')
+    if bp_description is None:
+        raise ValueError('Basic parts collection description is required')
+    if cp_name is None:
+        raise ValueError('Composite parts collection name is required')
+    if cp_description is None:
+        raise ValueError('Composite parts collection description is required')
+
+    if 
     # Make the collections
-    basic_parts = sbol3.Collection(BASIC_PARTS_COLLECTION, name=bp_name, description=bp_description)
+    basic_parts = sbol3.Collection(BASIC_PARTS_COLLECTION, name=str(bp_name), description=str(bp_description))
     doc.add(basic_parts)
 
-    composite_parts = sbol3.Collection(COMPOSITE_PARTS_COLLECTION, name=cp_name, description=cp_description)
+    composite_parts = sbol3.Collection(COMPOSITE_PARTS_COLLECTION, name=str(cp_name), description=str(cp_description))
     doc.add(composite_parts)
 
     linear_products = sbol3.Collection(LINEAR_PRODUCTS_COLLECTION, name='Linear DNA Products',
@@ -207,11 +218,11 @@ def row_to_basic_part(doc: sbol3.Document, row, basic_parts: sbol3.Collection, l
 # form of a sub-component:
 # X: identifies a component or set thereof
 # RC(X): X is reversed
-reverse_complement_pattern = re.compile('RC\(.+\)')
+REVERSE_COMPLEMENT_PATTERN = re.compile(r'RC\(.+\)')
 # Returns sanitized text without optional reverse complement marker
 def strip_RC(name):
     sanitized = name.strip()
-    match = reverse_complement_pattern.match(sanitized)
+    match = REVERSE_COMPLEMENT_PATTERN.match(sanitized)
     return (sanitized[3:-1] if (match and len(match.group())==len(sanitized)) else sanitized)
 # returns true if part is reverse complement
 def is_RC(name):
@@ -255,17 +266,20 @@ def make_composite_component(display_id,part_lists,reverse_complements):
     # return the completed part
     return composite_part
 
-constraint_pattern = re.compile('Part (\d+) (.+) Part (\d+)')
-constraint_dict = {'same as': sbol3.SBOL_VERIFY_IDENTICAL,
-                   'different from': sbol3.SBOL_DIFFERENT_FROM,
-                   'same orientation as': sbol3.SBOL_SAME_ORIENTATION_AS,
-                   'different orientation from': sbol3.SBOL_SAME_ORIENTATION_AS}
-def make_constraint(constraint, part_list):
-    m = constraint_pattern.match(constraint)
+CONSTRAINT_PATTERN = re.compile(r'Part (\d+) (.+) Part (\d+)')
+CONSTRAINT_DICT = {
+    'same as': sbol3.SBOL_VERIFY_IDENTICAL,
+    'different from': sbol3.SBOL_DIFFERENT_FROM,
+    'same orientation as': sbol3.SBOL_SAME_ORIENTATION_AS,
+    'different orientation from': sbol3.SBOL_SAME_ORIENTATION_AS
+}
+
+def make_constraint(constraint:str, part_list):
+    m = CONSTRAINT_PATTERN.match(constraint)
     if not m:
         raise ValueError(f'Constraint "{constraint}" does not match pattern "Part X relation Part Y"')
     try:
-        restriction = constraint_dict[m.group(2)]
+        restriction = CONSTRAINT_DICT[m.group(2)]
     except KeyError:
         raise ValueError(f'Do not recognize constraint relation in "{constraint}"')
     x = int(m.group(1))
@@ -278,7 +292,7 @@ def make_constraint(constraint, part_list):
     return sbol3.Constraint(restriction, part_list[x-1], part_list[y-1])
 
 
-def make_combinatorial_derivation(document, display_id,part_lists,reverse_complements,constraints):
+def make_combinatorial_derivation(document, display_id, part_lists, reverse_complements, constraints):
     # Make the combinatorial derivation and its template
     template = sbol3.Component(display_id + "_template", sbol3.SBO_DNA)
     document.add(template)
@@ -420,7 +434,7 @@ def make_composite_part(document, row, composite_parts, linear_products, final_p
         plasmid.constraints.append(sbol3.Constraint(sbol3.SBOL_MEETS, backbone_sub, part_sub))
 
 
-def excel_to_sbol(wb: openpyxl.Workbook, config: dict = None) -> sbol3.Document:
+def excel_to_sbol(wb: openpyxl.Workbook, config: Optional[Dict] = None) -> sbol3.Document:
     """
     Take an open Excel file, return an SBOL document
     :param wb: openpyxl pointer to an Excel file

diff --git a/sbol_utilities/expand_combinatorial_derivations.py b/sbol_utilities/expand_combinatorial_derivations.py
@@ -45,7 +45,7 @@ def collection_values(self, c: sbol3.Collection) -> List[sbol3.Component]:
         assert all(isinstance(find_top_level(x), sbol3.Collection) or isinstance(find_top_level(x), sbol3.Component) for x in c.members)
         values = [find_top_level(x) for x in id_sort(c.members) if isinstance(find_top_level(x), sbol3.Component)] + \
             id_sort(itertools.chain(*([self.collection_values(x) for x in c.members if isinstance(find_top_level(x), sbol3.Collection)])))
-        logging.debug("Found "+str(len(values))+" values in collection "+c.display_id)
+        logging.debug(f"Found {str(len(values))} values in collection {c.display_id}")
         return values
 
     def cd_variable_values(self, v: sbol3.VariableFeature) -> List[sbol3.Component]:
@@ -54,12 +54,12 @@ def cd_variable_values(self, v: sbol3.VariableFeature) -> List[sbol3.Component]:
         :param v: Variable to be flattened
         :return: list of Component values found
         """
-        logging.debug("Finding values for " + find_child(v.variable).name)
+        logging.debug(f"Finding values for {find_child(v.variable).name}")
         sub_cd_collections = [self.derivation_to_collection(find_top_level(d)) for d in id_sort(v.variant_derivations)]
         values = [find_top_level(x) for x in id_sort(v.variants)] + \
                  id_sort(itertools.chain(*[self.collection_values(c) for c in id_sort(v.variant_collections)])) + \
                  id_sort(itertools.chain(*(self.collection_values(c) for c in id_sort(sub_cd_collections))))
-        logging.debug("Found " + str(len(values)) + " total values for " + find_child(v.variable).name)
+        logging.debug(f"Found {str(len(values))} total values for {find_child(v.variable).name}")
         return values
 
     def derivation_to_collection(self, cd: sbol3.CombinatorialDerivation) -> sbol3.Collection:
@@ -76,15 +76,15 @@ def derivation_to_collection(self, cd: sbol3.CombinatorialDerivation) -> sbol3.C
         sort_owned_objects(find_top_level(cd.template)) # TODO: https://github.com/SynBioDex/pySBOL3/issues/231
         # we've already converted this CombinatorialDerivation to a Collection, just return the conversion
         if cd in self.expanded_derivations.keys():
-            logging.debug('Found previous expansion of ' + cd.display_id)
+            logging.debug(f"Found previous expansion of {cd.display_id}")
             return self.expanded_derivations[cd]
         # if it doesn't already exist, we'll build it
-        logging.debug("Expanding combinatorial derivation " + cd.display_id)
+        logging.debug(f"Expanding combinatorial derivation {cd.display_id}")
         # first get all of the values
         values = [id_sort(self.cd_variable_values(v)) for v in id_sort(cd.variable_features)]
         # if this is de facto a collection rather than a CD, just return it directly
         if is_library(cd):
-            logging.debug("Interpreting combinatorial derivation " + cd.display_id + " as library")
+            logging.debug(f"Interpreting combinatorial derivation {cd.display_id} as library")
             derivatives = sbol3.Collection(cd.identity + "_collection")
             doc.add(derivatives)
             derivatives.members += values[0]
@@ -96,7 +96,7 @@ def derivation_to_collection(self, cd: sbol3.CombinatorialDerivation) -> sbol3.C
             for a in assignments:
                 # scratch_doc = sbol3.Document()
                 derived = find_top_level(cd.template).clone(cd_assigment_to_display_id(cd, a))
-                logging.debug("Considering derived combination " + derived.display_id)
+                logging.debug(f"Considering derived combination {derived.display_id}")
                 # scratch_doc.add(derived) # add to the scratch document to enable manipulation of children
                 doc.add(derived)  # add to the scratch document to enable manipulation of children
                 # Replace variables with values
@@ -139,7 +139,7 @@ def expand_derivations(targets: List[sbol3.CombinatorialDerivation]) -> List[sbo
     # Output document will contain the derivative collections for each target
     expander = CombinatorialDerivationExpander()
     for cd in targets:
-        logging.info('Expanding derivation '+cd.display_id)
+        logging.info(f"Expanding derivation {cd.display_id}")
         expander.derivation_to_collection(cd)
         logging.info("Expansion finished, producing "+str(len(expander.expanded_derivations[cd].members))+" designs")
 

diff --git a/sbol_utilities/igem/__init__.py b/sbol_utilities/igem/__init__.py
diff --git a/sbol_utilities/igem/qc_checker.py b/sbol_utilities/igem/qc_checker.py
@@ -0,0 +1,62 @@
+from __future__ import annotations
+from typing import Dict, List, Optional, Tuple
+
+from sbol_utilities.igem.qc_entity import QCEntity
+from sbol_utilities.igem.qc_field import QCFieldQualityScore
+from sbol_utilities.igem.qc_input_data import QCInputData
+
+class QCChecker:
+    """Class to perform the QC check on a package."""
+
+    def __init__(self):
+        """Initialize the QC checker.
+        """
+        self.entities: Dict[str, QCEntity]
+        self.overall_score: QCFieldQualityScore = QCFieldQualityScore()
+        self.errors: List[Dict[str, Optional[str]]] = [] # Key: location, Value: error/warning message
+
+    @staticmethod
+    def from_json(schema_json_dict: Dict) -> QCChecker:
+        """Read the QC JSON file and populate the QCChecker object.
+
+        Args:
+            schema_json_dict (Dict): _description_
+
+        Returns:
+            QCChecker: _description_
+        """        
+        # Read the 
+        ret = QCChecker()
+        for entity_id, entity_dict in schema_json_dict.items():
+            entity = QCEntity.from_json(entity_dict)
+            ret.entities[entity_id] = entity
+
+        return ret
+
+    def perform_qc_check(self, entity_id: str, data_to_validate: QCInputData) -> None:
+        """Perform the QC check on the package data.
+
+        Package data shape Example:
+
+        entity_id = "top_level_entity_1" // Refer to the entity_id in the QC JSON file
+        pakcage_data = {
+            "source_location": "Whatever Excel Location is",
+            "data": { whatever data is }
+        }
+
+        Args:
+            package_data: The package data to check.
+
+        Returns:
+            None.
+        """
+
+        # Pass the package data to the entity to validate
+        entity = self.entities[entity_id]
+        normailization_factor = len(data_to_validate.data)
+        # Since this can be a collection of matching entity types, we need to iterate through each
+        entity_qc_score, entity_error_messages = entity.validate(data_to_validate.data)
+        # Add the QC score to the item
+        self.overall_score += entity_qc_score/normailization_factor
+        # Add the error messages to the item
+        self.errors.append(entity_error_messages)