ISA-tools · terazus · Mar 7, 2024 · Jan 23, 2024 · Mar 8, 2024
diff --git a/isatools/isatab/dump/core.py b/isatools/isatab/dump/core.py
@@ -41,7 +41,7 @@ def dump(isa_obj, output_path,
         raise NameError('Investigation file must match pattern i_*.txt, got {}'.format(i_file_name))
 
     if path.exists(output_path):
-        fp = open(path.join(output_path, i_file_name), 'w', encoding='utf-8')
+        fp = open(path.join(output_path, i_file_name), 'wb')
     else:
         log.debug('output_path=', i_file_name)
         raise FileNotFoundError("Can't find " + output_path)
@@ -55,7 +55,7 @@ def dump(isa_obj, output_path,
 
     # Write ONTOLOGY SOURCE REFERENCE section
     ontology_source_references_df = _build_ontology_reference_section(investigation.ontology_source_references)
-    fp.write('ONTOLOGY SOURCE REFERENCE\n')
+    fp.write(b'ONTOLOGY SOURCE REFERENCE\n')
     #  Need to set index_label as top left cell
     ontology_source_references_df.to_csv(path_or_buf=fp, mode='a', sep='\t', encoding='utf-8',
                                          index_label='Term Source Name')
@@ -80,7 +80,7 @@ def dump(isa_obj, output_path,
         inv_df_rows.append(comment.value)
     investigation_df.loc[0] = inv_df_rows
     investigation_df = investigation_df.set_index('Investigation Identifier').T
-    fp.write('INVESTIGATION\n')
+    fp.write(b'INVESTIGATION\n')
     investigation_df.to_csv(
         path_or_buf=fp, mode='a', sep='\t', encoding='utf-8',
         index_label='Investigation Identifier')
@@ -90,14 +90,14 @@ def dump(isa_obj, output_path,
         prefix='Investigation',
         publications=investigation.publications
     )
-    fp.write('INVESTIGATION PUBLICATIONS\n')
+    fp.write(b'INVESTIGATION PUBLICATIONS\n')
     investigation_publications_df.to_csv(path_or_buf=fp, mode='a', sep='\t', encoding='utf-8',
                                          index_label='Investigation PubMed ID')
 
     # Write INVESTIGATION CONTACTS section
     investigation_contacts_df = _build_contacts_section_df(
         contacts=investigation.contacts)
-    fp.write('INVESTIGATION CONTACTS\n')
+    fp.write(b'INVESTIGATION CONTACTS\n')
     investigation_contacts_df.to_csv(path_or_buf=fp, mode='a', sep='\t', encoding='utf-8',
                                      index_label='Investigation Person Last Name')
 
@@ -127,40 +127,40 @@ def dump(isa_obj, output_path,
                 study_df_row.append(comment.value)
         study_df.loc[0] = study_df_row
         study_df = study_df.set_index('Study Identifier').T
-        fp.write('STUDY\n')
+        fp.write(b'STUDY\n')
         study_df.to_csv(path_or_buf=fp, mode='a', sep='\t', encoding='utf-8', index_label='Study Identifier')
         study_design_descriptors_df = _build_design_descriptors_section(design_descriptors=study.design_descriptors)
-        fp.write('STUDY DESIGN DESCRIPTORS\n')
+        fp.write(b'STUDY DESIGN DESCRIPTORS\n')
         study_design_descriptors_df.to_csv(path_or_buf=fp, mode='a', sep='\t', encoding='utf-8',
                                            index_label='Study Design Type')
 
         # Write STUDY PUBLICATIONS section
         study_publications_df = _build_publications_section_df(prefix='Study', publications=study.publications)
-        fp.write('STUDY PUBLICATIONS\n')
+        fp.write(b'STUDY PUBLICATIONS\n')
         study_publications_df.to_csv(path_or_buf=fp, mode='a', sep='\t', encoding='utf-8',
                                      index_label='Study PubMed ID')
 
         # Write STUDY FACTORS section
         study_factors_df = _build_factors_section_df(factors=study.factors)
-        fp.write('STUDY FACTORS\n')
+        fp.write(b'STUDY FACTORS\n')
         study_factors_df.to_csv(path_or_buf=fp, mode='a', sep='\t', encoding='utf-8',
                                 index_label='Study Factor Name')
 
         study_assays_df = _build_assays_section_df(assays=study.assays)
-        fp.write('STUDY ASSAYS\n')
+        fp.write(b'STUDY ASSAYS\n')
         study_assays_df.to_csv(path_or_buf=fp, mode='a', sep='\t', encoding='utf-8',
                                index_label='Study Assay File Name')
 
         # Write STUDY PROTOCOLS section
         study_protocols_df = _build_protocols_section_df(protocols=study.protocols)
-        fp.write('STUDY PROTOCOLS\n')
+        fp.write(b'STUDY PROTOCOLS\n')
         study_protocols_df.to_csv(path_or_buf=fp, mode='a', sep='\t', encoding='utf-8',
                                   index_label='Study Protocol Name')
 
         # Write STUDY CONTACTS section
         study_contacts_df = _build_contacts_section_df(
             prefix='Study', contacts=study.contacts)
-        fp.write('STUDY CONTACTS\n')
+        fp.write(b'STUDY CONTACTS\n')
         study_contacts_df.to_csv(path_or_buf=fp, mode='a', sep='\t', encoding='utf-8',
                                  index_label='Study Person Last Name')
 

diff --git a/isatools/isatab/dump/write.py b/isatools/isatab/dump/write.py
@@ -220,7 +220,7 @@ def flatten(current_list):
         DF = DF.replace('', nan)
         DF = DF.dropna(axis=1, how='all')
 
-        with open(path.join(output_dir, study_obj.filename), 'w') as out_fp:
+        with open(path.join(output_dir, study_obj.filename), 'wb') as out_fp:
             DF.to_csv(
                 path_or_buf=out_fp, index=False, sep='\t', encoding='utf-8')
 
@@ -480,7 +480,7 @@ def pbar(x):
             DF = DF.dropna(axis=1, how='all')
 
             with open(path.join(
-                    output_dir, assay_obj.filename), 'w') as out_fp:
+                    output_dir, assay_obj.filename), 'wb') as out_fp:
                 DF.to_csv(path_or_buf=out_fp, index=False, sep='\t',
                           encoding='utf-8')
 

diff --git a/isatools/isatab/load/core.py b/isatools/isatab/load/core.py
@@ -390,7 +390,7 @@ def merge_study_with_assay_tables(study_file_path, assay_file_path, target_file_
     log.info("Merging DataFrames...")
     merged_DF = merge(study_DF, assay_DF, on='Sample Name')
     log.info("Writing merged DataFrame to file %s", target_file_path)
-    with open(target_file_path, 'w', encoding='utf-8') as fp:
+    with open(target_file_path, 'wb') as fp:
         merged_DF.to_csv(fp, sep='\t', index=False, header=study_DF.isatab_header + assay_DF.isatab_header[1:])
 
 

diff --git a/isatools/magetab.py b/isatools/magetab.py
@@ -362,8 +362,7 @@ def write_idf_file(inv_obj, output_path):
     idf_df = idf_df.replace('', np.nan)
     with open(os.path.join(output_path, "{}.idf.txt".format(
             investigation.identifier if investigation.identifier != ""
-            else investigation.filename[2:-3])), "w",
-            encoding='utf-8') as idf_fp:
+            else investigation.filename[2:-3])), "wb") as idf_fp:
         idf_df.to_csv(
             path_or_buf=idf_fp,
             index=True,

diff --git a/isatools/utils.py b/isatools/utils.py
@@ -733,7 +733,7 @@ def replace_factor_with_source_characteristic(self, factor_name):
         table_file_df.columns = self.clean_isatab_field_names(
             field_names_modified)
 
-        with open(self.path, 'w') as out_fp:
+        with open(self.path, 'wb') as out_fp:
             table_file_df.to_csv(path_or_buf=out_fp, index=False, sep='\t',
                                  encoding='utf-8')
 
@@ -856,7 +856,7 @@ def replace_factor_with_protocol_parameter_value(
 
         with open(os.path.join(
                 os.path.dirname(self.path), '{s_filename}.fix'.format(
-                    s_filename=os.path.basename(self.path))), 'w') as out_fp:
+                    s_filename=os.path.basename(self.path))), 'wb') as out_fp:
             table_file_df.to_csv(path_or_buf=out_fp, index=False, sep='\t',
                                  encoding='utf-8')
 

diff --git a/tests/isatab/test_isatab.py b/tests/isatab/test_isatab.py
@@ -440,7 +440,7 @@ def test_isatab_dump_source_sample_char_quant(self):
         s.process_sequence = [sample_collection_process]
         s.samples.append(sample1)
         i.studies = [s]
-        actual = isatab.dumps(i)
+        actual = replace_windows_newlines(isatab.dumps(i))
         expected = """Source Name\tMaterial Type\tCharacteristics[organism]\tTerm Source REF\tTerm Accession Number\tCharacteristics[body weight]\tUnit\tTerm Source REF\tTerm Accession Number\tProtocol REF\tParameter Value[vessel]\tTerm Source REF\tTerm Accession Number\tParameter Value[storage temperature]\tUnit\tTerm Source REF\tTerm Accession Number\tSample Name\tCharacteristics[organism part]\tTerm Source REF\tTerm Accession Number\tCharacteristics[specimen mass]\tUnit\tTerm Source REF\tTerm Accession Number
 source1\tspecimen\tHuman\tNCBITAXON\thttp://purl.bioontology.org/ontology/STY/T016\t72\tkilogram\tUO\thttp://purl.obolibrary.org/obo/UO_0000009\tsample collection\teppendorf tube\tOBI\tpurl.org\t-20\tdegree Celsius\tUO\thttp://purl.obolibrary.org/obo/UO_0000027\tsample1\tliver\tUBERON\thttp://purl.obolibrary.org/obo/UBERON_0002107\t450.5\tmilligram\tUO\thttp://purl.obolibrary.org/obo/UO_0000022"""
         self.assertIn(expected, actual)

diff --git a/tests/validators/test_validate_test_data.py b/tests/validators/test_validate_test_data.py
@@ -305,7 +305,7 @@ class TestIsaJsonCreateTestData(unittest.TestCase):
 
     def setUp(self):
         self._reporting_level = logging.ERROR
-        self.v2_create_schemas_path = pathlib.PurePosixPath(
+        self.v2_create_schemas_path = pathlib.Path(
             pathlib.Path(__file__).parents[0], '..', '..', 'isatools', 'resources', 'schemas',
             'isa_model_version_2_0_schemas', 'create')
 
@@ -315,7 +315,7 @@ def test_validate_testdata_sampleassayplan_json(self):
             with open(os.path.join(self.v2_create_schemas_path,
                                    'sample_assay_plan_schema.json')) as fp:
                 sample_assay_plan_schema = json.load(fp)
-                res_path = pathlib.PurePosixPath("file://", self.v2_create_schemas_path,
+                res_path = pathlib.Path("file://", self.v2_create_schemas_path,
                              'sample_assay_plan_schema.json').as_uri()
                 resolver = RefResolver(res_path, sample_assay_plan_schema)
             validator = Draft4Validator(sample_assay_plan_schema,
@@ -342,7 +342,7 @@ def test_validate_testdata_treatment_sequence_json(self):
             with open(os.path.join(self.v2_create_schemas_path,
                                    'treatment_sequence_schema.json')) as fp:
                 treatment_sequence_schema = json.load(fp)
-            res_path = pathlib.PurePosixPath("file://", self.v2_create_schemas_path,
+            res_path = pathlib.Path("file://", self.v2_create_schemas_path,
                          'treatment_sequence_schema.json').as_uri()
             resolver = RefResolver(res_path, treatment_sequence_schema)
             validator = Draft4Validator(treatment_sequence_schema,