Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

iGEM Quality Checker System #202

Draft
wants to merge 16 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,016 changes: 1,016 additions & 0 deletions poetry.lock

Large diffs are not rendered by default.

45 changes: 45 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,48 @@
[tool.poetry]
name = "sbol_utilities"
description = "SBOL utilities"
version = "1.0a17"
readme = "README.md"
authors = ["Your Name <[email protected]>"]

[tool.poetry.dependencies]
python = "^3.7"
sbol3 = "^1.0b11"
sbol2 = "^1.4"
rdflib = "^6.2"
biopython = "^1.79"
graphviz = "^0.17"
tyto = "^1.4"
openpyxl = "^3.0"
sbol_factory = "^1.0a11"

[tool.poetry.dev-dependencies]
pytest = "^6.0"
interrogate = "^1.5"

[tool.poetry.scripts]
excel-to-sbol = "sbol_utilities.excel_to_sbol:main"
graph-sbol = "sbol_utilities.graph_sbol:main"
sbol-expand-derivations = "sbol_utilities.expand_combinatorial_derivations:main"
sbol-calculate-sequences = "sbol_utilities.calculate_sequences:main"
sbol-converter = "sbol_utilities.conversion:main"
sbol2-to-sbol3 = "sbol_utilities.conversion:sbol2to3"
sbol3-to-sbol2 = "sbol_utilities.conversion:sbol3to2"
sbol-to-genbank = "sbol_utilities.conversion:sbol2genbank"
sbol-to-fasta = "sbol_utilities.conversion:sbol2fasta"
genbank-to-sbol = "sbol_utilities.conversion:genbank2sbol"
fasta-to-sbol = "sbol_utilities.conversion:fasta2sbol"
sbol-diff = "sbol_utilities.sbol_diff:main"

[tool.poetry.extras]
dev = ["pytest", "interrogate"]


[build-system]
requires = ["poetry-core>=1.1.0"]
build-backend = "poetry.core.masonry.api"


[tool.interrogate]

# custom docstring conventions
Expand Down
83 changes: 83 additions & 0 deletions sbol_utilities/excel_cell_selector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
from typing import List, Tuple
import re

CELL_SELECTOR_REGEX = r'((\w+)!)(\$[A-Z]+\$[\d]+):(\$[A-Z]+\$[\d]+)'
CELL_LOCATION_REGEX = r'\$?(\w+)\$?(\d+)'

def col_num_to_index(col_num):
"""Converts a base-26 column number to a zero-based column index."""
col_idx = 0
for i, c in enumerate(reversed(col_num)):
col_idx += (ord(c) - ord('A') + 1) * (26 ** i)
return col_idx - 1

def col_index_to_num(col_idx):
"""Converts a zero-based column index to a base-26 column number."""
col_num = ''
while col_idx >= 0:
col_num = chr((col_idx % 26) + ord('A')) + col_num
col_idx = col_idx // 26 - 1
return col_num

def get_area_enumeration(start_cell: str, end_cell: str) -> List[str]:
"""Generates a list of cells from the start cell to the end cell.

Note this assumes a contiguous area of cells and the width and height of the area is not greater than 26.

Args:
start_cell (str): Start cell in the format of A1, $A1, A$1, or $A$1
end_cell (str): End cell in the format of A1, $A1, A$1, or $A$1

Raises:
ValueError: If the start or end cell is invalid.

Returns:
List[str]: A list of cells from the start cell to the end cell.
"""
ret = []

# Parse the start and end cells
start_cell_match = re.match(CELL_LOCATION_REGEX, start_cell)
end_cell_match = re.match(CELL_LOCATION_REGEX, end_cell)

if start_cell_match is None or end_cell_match is None:
raise ValueError(f'Invalid start/end cell: {start_cell}, {end_cell}')

start_col = col_num_to_index(start_cell_match[1])
start_row = int(start_cell_match[2])
end_col = col_num_to_index(end_cell_match[1])
end_row = int(end_cell_match[2])

# Enumerate the cells
for row in range(start_row, end_row + 1):
for col in range(start_col, end_col + 1):
ret.append(f'{col_index_to_num(col)}{row}')

return ret

def get_selection(cell_selector_query: str) -> List[Tuple[str, str]]:
"""Returns a selector that can be used to return the cells from the Excel sheet.

Args:
cell_selector_query (str): The query to use to select the cells.

Returns:
List[str]: Selector strings for the cells to return.
"""
ret = []

# Perform the regex query
matches = re.findall(CELL_SELECTOR_REGEX, cell_selector_query)

if matches is None:
raise ValueError(f'Invalid cell selector query: {cell_selector_query}')

for match in matches:
sheet_name = match[1]
start_cell = match[2]
end_cell = match[3]
area_locations = get_area_enumeration(start_cell, end_cell)
for area_location in area_locations:
ret.append((sheet_name, area_location))

return ret
44 changes: 29 additions & 15 deletions sbol_utilities/excel_to_sbol.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from typing import Dict, Optional
import unicodedata
import warnings
import logging
Expand All @@ -17,7 +18,7 @@
FINAL_PRODUCTS_COLLECTION = 'FinalProducts'


def expand_configuration(values: dict) -> dict:
def expand_configuration(values: Optional[Dict] = None) -> dict:
"""
Initialize sheet configuration dictionary
:param values: Dictionary of overrides for defaults
Expand Down Expand Up @@ -91,11 +92,21 @@ def read_metadata(wb: openpyxl.Workbook, doc: sbol3.Document, config: dict):
cp_name = bp_name
cp_description = bp_description

if bp_name is None:
raise ValueError('Basic parts collection name is required')
if bp_description is None:
raise ValueError('Basic parts collection description is required')
if cp_name is None:
raise ValueError('Composite parts collection name is required')
if cp_description is None:
raise ValueError('Composite parts collection description is required')

if
# Make the collections
basic_parts = sbol3.Collection(BASIC_PARTS_COLLECTION, name=bp_name, description=bp_description)
basic_parts = sbol3.Collection(BASIC_PARTS_COLLECTION, name=str(bp_name), description=str(bp_description))
doc.add(basic_parts)

composite_parts = sbol3.Collection(COMPOSITE_PARTS_COLLECTION, name=cp_name, description=cp_description)
composite_parts = sbol3.Collection(COMPOSITE_PARTS_COLLECTION, name=str(cp_name), description=str(cp_description))
doc.add(composite_parts)

linear_products = sbol3.Collection(LINEAR_PRODUCTS_COLLECTION, name='Linear DNA Products',
Expand Down Expand Up @@ -207,11 +218,11 @@ def row_to_basic_part(doc: sbol3.Document, row, basic_parts: sbol3.Collection, l
# form of a sub-component:
# X: identifies a component or set thereof
# RC(X): X is reversed
reverse_complement_pattern = re.compile('RC\(.+\)')
REVERSE_COMPLEMENT_PATTERN = re.compile(r'RC\(.+\)')
# Returns sanitized text without optional reverse complement marker
def strip_RC(name):
sanitized = name.strip()
match = reverse_complement_pattern.match(sanitized)
match = REVERSE_COMPLEMENT_PATTERN.match(sanitized)
return (sanitized[3:-1] if (match and len(match.group())==len(sanitized)) else sanitized)
# returns true if part is reverse complement
def is_RC(name):
Expand Down Expand Up @@ -255,17 +266,20 @@ def make_composite_component(display_id,part_lists,reverse_complements):
# return the completed part
return composite_part

constraint_pattern = re.compile('Part (\d+) (.+) Part (\d+)')
constraint_dict = {'same as': sbol3.SBOL_VERIFY_IDENTICAL,
'different from': sbol3.SBOL_DIFFERENT_FROM,
'same orientation as': sbol3.SBOL_SAME_ORIENTATION_AS,
'different orientation from': sbol3.SBOL_SAME_ORIENTATION_AS}
def make_constraint(constraint, part_list):
m = constraint_pattern.match(constraint)
CONSTRAINT_PATTERN = re.compile(r'Part (\d+) (.+) Part (\d+)')
CONSTRAINT_DICT = {
'same as': sbol3.SBOL_VERIFY_IDENTICAL,
'different from': sbol3.SBOL_DIFFERENT_FROM,
'same orientation as': sbol3.SBOL_SAME_ORIENTATION_AS,
'different orientation from': sbol3.SBOL_SAME_ORIENTATION_AS
}

def make_constraint(constraint:str, part_list):
m = CONSTRAINT_PATTERN.match(constraint)
if not m:
raise ValueError(f'Constraint "{constraint}" does not match pattern "Part X relation Part Y"')
try:
restriction = constraint_dict[m.group(2)]
restriction = CONSTRAINT_DICT[m.group(2)]
except KeyError:
raise ValueError(f'Do not recognize constraint relation in "{constraint}"')
x = int(m.group(1))
Expand All @@ -278,7 +292,7 @@ def make_constraint(constraint, part_list):
return sbol3.Constraint(restriction, part_list[x-1], part_list[y-1])


def make_combinatorial_derivation(document, display_id,part_lists,reverse_complements,constraints):
def make_combinatorial_derivation(document, display_id, part_lists, reverse_complements, constraints):
# Make the combinatorial derivation and its template
template = sbol3.Component(display_id + "_template", sbol3.SBO_DNA)
document.add(template)
Expand Down Expand Up @@ -420,7 +434,7 @@ def make_composite_part(document, row, composite_parts, linear_products, final_p
plasmid.constraints.append(sbol3.Constraint(sbol3.SBOL_MEETS, backbone_sub, part_sub))


def excel_to_sbol(wb: openpyxl.Workbook, config: dict = None) -> sbol3.Document:
def excel_to_sbol(wb: openpyxl.Workbook, config: Optional[Dict] = None) -> sbol3.Document:
"""
Take an open Excel file, return an SBOL document
:param wb: openpyxl pointer to an Excel file
Expand Down
16 changes: 8 additions & 8 deletions sbol_utilities/expand_combinatorial_derivations.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def collection_values(self, c: sbol3.Collection) -> List[sbol3.Component]:
assert all(isinstance(find_top_level(x), sbol3.Collection) or isinstance(find_top_level(x), sbol3.Component) for x in c.members)
values = [find_top_level(x) for x in id_sort(c.members) if isinstance(find_top_level(x), sbol3.Component)] + \
id_sort(itertools.chain(*([self.collection_values(x) for x in c.members if isinstance(find_top_level(x), sbol3.Collection)])))
logging.debug("Found "+str(len(values))+" values in collection "+c.display_id)
logging.debug(f"Found {str(len(values))} values in collection {c.display_id}")
return values

def cd_variable_values(self, v: sbol3.VariableFeature) -> List[sbol3.Component]:
Expand All @@ -54,12 +54,12 @@ def cd_variable_values(self, v: sbol3.VariableFeature) -> List[sbol3.Component]:
:param v: Variable to be flattened
:return: list of Component values found
"""
logging.debug("Finding values for " + find_child(v.variable).name)
logging.debug(f"Finding values for {find_child(v.variable).name}")
sub_cd_collections = [self.derivation_to_collection(find_top_level(d)) for d in id_sort(v.variant_derivations)]
values = [find_top_level(x) for x in id_sort(v.variants)] + \
id_sort(itertools.chain(*[self.collection_values(c) for c in id_sort(v.variant_collections)])) + \
id_sort(itertools.chain(*(self.collection_values(c) for c in id_sort(sub_cd_collections))))
logging.debug("Found " + str(len(values)) + " total values for " + find_child(v.variable).name)
logging.debug(f"Found {str(len(values))} total values for {find_child(v.variable).name}")
return values

def derivation_to_collection(self, cd: sbol3.CombinatorialDerivation) -> sbol3.Collection:
Expand All @@ -76,15 +76,15 @@ def derivation_to_collection(self, cd: sbol3.CombinatorialDerivation) -> sbol3.C
sort_owned_objects(find_top_level(cd.template)) # TODO: https://github.com/SynBioDex/pySBOL3/issues/231
# we've already converted this CombinatorialDerivation to a Collection, just return the conversion
if cd in self.expanded_derivations.keys():
logging.debug('Found previous expansion of ' + cd.display_id)
logging.debug(f"Found previous expansion of {cd.display_id}")
return self.expanded_derivations[cd]
# if it doesn't already exist, we'll build it
logging.debug("Expanding combinatorial derivation " + cd.display_id)
logging.debug(f"Expanding combinatorial derivation {cd.display_id}")
# first get all of the values
values = [id_sort(self.cd_variable_values(v)) for v in id_sort(cd.variable_features)]
# if this is de facto a collection rather than a CD, just return it directly
if is_library(cd):
logging.debug("Interpreting combinatorial derivation " + cd.display_id + " as library")
logging.debug(f"Interpreting combinatorial derivation {cd.display_id} as library")
derivatives = sbol3.Collection(cd.identity + "_collection")
doc.add(derivatives)
derivatives.members += values[0]
Expand All @@ -96,7 +96,7 @@ def derivation_to_collection(self, cd: sbol3.CombinatorialDerivation) -> sbol3.C
for a in assignments:
# scratch_doc = sbol3.Document()
derived = find_top_level(cd.template).clone(cd_assigment_to_display_id(cd, a))
logging.debug("Considering derived combination " + derived.display_id)
logging.debug(f"Considering derived combination {derived.display_id}")
# scratch_doc.add(derived) # add to the scratch document to enable manipulation of children
doc.add(derived) # add to the scratch document to enable manipulation of children
# Replace variables with values
Expand Down Expand Up @@ -139,7 +139,7 @@ def expand_derivations(targets: List[sbol3.CombinatorialDerivation]) -> List[sbo
# Output document will contain the derivative collections for each target
expander = CombinatorialDerivationExpander()
for cd in targets:
logging.info('Expanding derivation '+cd.display_id)
logging.info(f"Expanding derivation {cd.display_id}")
expander.derivation_to_collection(cd)
logging.info("Expansion finished, producing "+str(len(expander.expanded_derivations[cd].members))+" designs")

Expand Down
Empty file added sbol_utilities/igem/__init__.py
Empty file.
62 changes: 62 additions & 0 deletions sbol_utilities/igem/qc_checker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from __future__ import annotations
from typing import Dict, List, Optional, Tuple

from sbol_utilities.igem.qc_entity import QCEntity
from sbol_utilities.igem.qc_field import QCFieldQualityScore
from sbol_utilities.igem.qc_input_data import QCInputData

class QCChecker:
"""Class to perform the QC check on a package."""

def __init__(self):
"""Initialize the QC checker.
"""
self.entities: Dict[str, QCEntity]
self.overall_score: QCFieldQualityScore = QCFieldQualityScore()
self.errors: List[Dict[str, Optional[str]]] = [] # Key: location, Value: error/warning message

@staticmethod
def from_json(schema_json_dict: Dict) -> QCChecker:
"""Read the QC JSON file and populate the QCChecker object.

Args:
schema_json_dict (Dict): _description_

Returns:
QCChecker: _description_
"""
# Read the
ret = QCChecker()
for entity_id, entity_dict in schema_json_dict.items():
entity = QCEntity.from_json(entity_dict)
ret.entities[entity_id] = entity

return ret

def perform_qc_check(self, entity_id: str, data_to_validate: QCInputData) -> None:
"""Perform the QC check on the package data.

Package data shape Example:

entity_id = "top_level_entity_1" // Refer to the entity_id in the QC JSON file
pakcage_data = {
"source_location": "Whatever Excel Location is",
"data": { whatever data is }
}

Args:
package_data: The package data to check.

Returns:
None.
"""

# Pass the package data to the entity to validate
entity = self.entities[entity_id]
normailization_factor = len(data_to_validate.data)
# Since this can be a collection of matching entity types, we need to iterate through each
entity_qc_score, entity_error_messages = entity.validate(data_to_validate.data)
# Add the QC score to the item
self.overall_score += entity_qc_score/normailization_factor
# Add the error messages to the item
self.errors.append(entity_error_messages)
Loading