From 4029e26a7c2d0dfa54eccd84a294d52dce172f9c Mon Sep 17 00:00:00 2001 From: Jake Beal Date: Sun, 26 Mar 2023 16:32:07 -0500 Subject: [PATCH] Patch issue #200: Fuzzy locations were creating invalid SBOL documents be 1) add a non-standard sbol#locationPosition type, and 2) reference to an unrecognized Location from a SequenceFeature. Temporarily address this by reverting to the prior behavior of truncating fuzzy ranges into ranges. --- sbol_utilities/sbol3_genbank_conversion.py | 11 +-- .../test_location_types.nt | 72 +++++++++---------- test/test_genbank_sbol3_direct.py | 1 + 3 files changed, 36 insertions(+), 48 deletions(-) diff --git a/sbol_utilities/sbol3_genbank_conversion.py b/sbol_utilities/sbol3_genbank_conversion.py index 1287ae41..2f726240 100644 --- a/sbol_utilities/sbol3_genbank_conversion.py +++ b/sbol_utilities/sbol3_genbank_conversion.py @@ -195,13 +195,7 @@ class Location_GenBank_Extension(sbol3.Location): start and end position types (AfterPostion / BeforePosition / ExactPosition). :extends: sbol3.Location class """ - # Use the SBOL3 namespace for the type URI as a workaround for - # a bug in pySBOL3. - # TODO: use genbank namespace when the pySBOL3 bug is fixed - # NOTE: pySBOL3 BUG REPORT: https://github.com/SynBioDex/pySBOL3/issues/414 - # NOTE: pySBOL3 BUG FIX : https://github.com/SynBioDex/pySBOL3/pull/415 - # GENBANK_RANGE_NS = "http://www.ncbi.nlm.nih.gov/genbank#locationPosition" - GENBANK_RANGE_NS = sbol3.SBOL3_NS + "locationPosition" + GENBANK_RANGE_NS = "http://www.ncbi.nlm.nih.gov/genbank#locationPosition" def __init__(self, sequence: sbol3.Sequence = sbol3.Sequence("autoCreatedSequence"), *, identity: str = None, type_uri: str = GENBANK_RANGE_NS, @@ -734,7 +728,8 @@ def _handle_features_gb_to_sbol(self, record: SeqRecord, start_position = self.SBOL_LOCATION_POSITION[type(gb_loc.start)] # If both start and end positions are exact positions, the # feature location can be created simply as a range object - if start_position == 1 and end_position == 1: + # Kludge truncation of fuzzy ranges (https://github.com/SynBioDex/SBOL-utilities/issues/200) + if start_position == 1 and end_position == 1 or True: locs = sbol3.Range( sequence = seq, orientation = feat_loc_orientation, diff --git a/test/test_files/sbol3_genbank_conversion/test_location_types.nt b/test/test_files/sbol3_genbank_conversion/test_location_types.nt index 14c4f585..67132b46 100644 --- a/test/test_files/sbol3_genbank_conversion/test_location_types.nt +++ b/test/test_files/sbol3_genbank_conversion/test_location_types.nt @@ -36,14 +36,12 @@ "Direct Submission" . . . - "locationPosition1" . - . - "115"^^ . - "1"^^ . - "1"^^ . - "0"^^ . - . - . + "Range1" . + . + "115"^^ . + "1"^^ . + . + . "Range1" . "2299"^^ . . @@ -128,32 +126,28 @@ "6:KIG36583.1" . "7:MIIGNIHNLQPWLPQELRQAIEHIKAHVTAETPKGKHDIEGNHLFYLISEDMTEPYEARRAEYHARYLDIQIVLKGQEGMTFSTQPAGTPDTDWLADKDIAFLPEGVDEKTVILNEGDFVVFYPGEVHKPLCAVGAPAQVRKAVVKMLMA" . . - "locationPosition1" . - . - "3242"^^ . - "2"^^ . - "3019"^^ . - "1"^^ . - . - . + "Range1" . + . + "3242"^^ . + "3019"^^ . + . + . "SequenceFeature14" . - . + . "_converted_feature_13" . . . "0:locus_tag" . "0:PU64_23690" . . - "locationPosition1" . - . - "3242"^^ . - "2"^^ . - "3019"^^ . - "1"^^ . - . - . + "Range1" . + . + "3242"^^ . + "3019"^^ . + . + . "SequenceFeature15" . - . + . "_converted_feature_14" . . . @@ -175,23 +169,21 @@ "7:MSKISGWNFSQNITSADNCKQKNEDLDTWYVGMNDFARIAGGQNSRSNILSPRAFLEFLAKIFTLGYVDFSKRS" . . "SequenceFeature1" . - . + . "_converted_feature_0" . . . "0:locus_tag" . "0:PU64_23660" . . - "locationPosition1" . - . - "115"^^ . - "1"^^ . - "1"^^ . - "0"^^ . - . - . + "Range1" . + . + "115"^^ . + "1"^^ . + . + . "SequenceFeature2" . - . + . "_converted_feature_1" . . . @@ -437,10 +429,10 @@ "16-JAN-2015" . "BioProject:PRJNA266657::BioSample:SAMN03177677" . "BCT" . - . - . - . - . + . + . + . + . "JWYZ01000115.1" . "WGS" . "JWYZ01000115" . diff --git a/test/test_genbank_sbol3_direct.py b/test/test_genbank_sbol3_direct.py index e06f0cc6..7cd60530 100644 --- a/test/test_genbank_sbol3_direct.py +++ b/test/test_genbank_sbol3_direct.py @@ -266,6 +266,7 @@ def test_feature_location_types_ignore_fuzzy(self): sbol3.set_namespace(self.converter.TEST_NAMESPACE) self._test_genbank_to_sbol3(sample_sbol3_file=sbol3_file, sample_genbank_file=genbank_file) + @unittest.skip(reason="Round-tripping blocked by https://github.com/SynBioDex/SBOL-utilities/issues/200") def test_feature_location_types_round_trip_fuzzy(self): """Test ability to correctly round trip genbank test files in the iGEM distribution which have different FeatureLocation types like BeforePosition / AfterPosition / ExactPosition.