diff --git a/isatools/isatab/dump/core.py b/isatools/isatab/dump/core.py index 12b94712..9cb575a8 100644 --- a/isatools/isatab/dump/core.py +++ b/isatools/isatab/dump/core.py @@ -41,7 +41,7 @@ def dump(isa_obj, output_path, raise NameError('Investigation file must match pattern i_*.txt, got {}'.format(i_file_name)) if path.exists(output_path): - fp = open(path.join(output_path, i_file_name), 'w', encoding='utf-8') + fp = open(path.join(output_path, i_file_name), 'wb') else: log.debug('output_path=', i_file_name) raise FileNotFoundError("Can't find " + output_path) @@ -55,7 +55,7 @@ def dump(isa_obj, output_path, # Write ONTOLOGY SOURCE REFERENCE section ontology_source_references_df = _build_ontology_reference_section(investigation.ontology_source_references) - fp.write('ONTOLOGY SOURCE REFERENCE\n') + fp.write(b'ONTOLOGY SOURCE REFERENCE\n') # Need to set index_label as top left cell ontology_source_references_df.to_csv(path_or_buf=fp, mode='a', sep='\t', encoding='utf-8', index_label='Term Source Name') @@ -80,7 +80,7 @@ def dump(isa_obj, output_path, inv_df_rows.append(comment.value) investigation_df.loc[0] = inv_df_rows investigation_df = investigation_df.set_index('Investigation Identifier').T - fp.write('INVESTIGATION\n') + fp.write(b'INVESTIGATION\n') investigation_df.to_csv( path_or_buf=fp, mode='a', sep='\t', encoding='utf-8', index_label='Investigation Identifier') @@ -90,14 +90,14 @@ def dump(isa_obj, output_path, prefix='Investigation', publications=investigation.publications ) - fp.write('INVESTIGATION PUBLICATIONS\n') + fp.write(b'INVESTIGATION PUBLICATIONS\n') investigation_publications_df.to_csv(path_or_buf=fp, mode='a', sep='\t', encoding='utf-8', index_label='Investigation PubMed ID') # Write INVESTIGATION CONTACTS section investigation_contacts_df = _build_contacts_section_df( contacts=investigation.contacts) - fp.write('INVESTIGATION CONTACTS\n') + fp.write(b'INVESTIGATION CONTACTS\n') investigation_contacts_df.to_csv(path_or_buf=fp, mode='a', sep='\t', encoding='utf-8', index_label='Investigation Person Last Name') @@ -127,40 +127,40 @@ def dump(isa_obj, output_path, study_df_row.append(comment.value) study_df.loc[0] = study_df_row study_df = study_df.set_index('Study Identifier').T - fp.write('STUDY\n') + fp.write(b'STUDY\n') study_df.to_csv(path_or_buf=fp, mode='a', sep='\t', encoding='utf-8', index_label='Study Identifier') study_design_descriptors_df = _build_design_descriptors_section(design_descriptors=study.design_descriptors) - fp.write('STUDY DESIGN DESCRIPTORS\n') + fp.write(b'STUDY DESIGN DESCRIPTORS\n') study_design_descriptors_df.to_csv(path_or_buf=fp, mode='a', sep='\t', encoding='utf-8', index_label='Study Design Type') # Write STUDY PUBLICATIONS section study_publications_df = _build_publications_section_df(prefix='Study', publications=study.publications) - fp.write('STUDY PUBLICATIONS\n') + fp.write(b'STUDY PUBLICATIONS\n') study_publications_df.to_csv(path_or_buf=fp, mode='a', sep='\t', encoding='utf-8', index_label='Study PubMed ID') # Write STUDY FACTORS section study_factors_df = _build_factors_section_df(factors=study.factors) - fp.write('STUDY FACTORS\n') + fp.write(b'STUDY FACTORS\n') study_factors_df.to_csv(path_or_buf=fp, mode='a', sep='\t', encoding='utf-8', index_label='Study Factor Name') study_assays_df = _build_assays_section_df(assays=study.assays) - fp.write('STUDY ASSAYS\n') + fp.write(b'STUDY ASSAYS\n') study_assays_df.to_csv(path_or_buf=fp, mode='a', sep='\t', encoding='utf-8', index_label='Study Assay File Name') # Write STUDY PROTOCOLS section study_protocols_df = _build_protocols_section_df(protocols=study.protocols) - fp.write('STUDY PROTOCOLS\n') + fp.write(b'STUDY PROTOCOLS\n') study_protocols_df.to_csv(path_or_buf=fp, mode='a', sep='\t', encoding='utf-8', index_label='Study Protocol Name') # Write STUDY CONTACTS section study_contacts_df = _build_contacts_section_df( prefix='Study', contacts=study.contacts) - fp.write('STUDY CONTACTS\n') + fp.write(b'STUDY CONTACTS\n') study_contacts_df.to_csv(path_or_buf=fp, mode='a', sep='\t', encoding='utf-8', index_label='Study Person Last Name') diff --git a/isatools/isatab/dump/write.py b/isatools/isatab/dump/write.py index 7d7b50fe..b0dab0ae 100644 --- a/isatools/isatab/dump/write.py +++ b/isatools/isatab/dump/write.py @@ -220,7 +220,7 @@ def flatten(current_list): DF = DF.replace('', nan) DF = DF.dropna(axis=1, how='all') - with open(path.join(output_dir, study_obj.filename), 'w') as out_fp: + with open(path.join(output_dir, study_obj.filename), 'wb') as out_fp: DF.to_csv( path_or_buf=out_fp, index=False, sep='\t', encoding='utf-8') @@ -480,7 +480,7 @@ def pbar(x): DF = DF.dropna(axis=1, how='all') with open(path.join( - output_dir, assay_obj.filename), 'w') as out_fp: + output_dir, assay_obj.filename), 'wb') as out_fp: DF.to_csv(path_or_buf=out_fp, index=False, sep='\t', encoding='utf-8') diff --git a/isatools/isatab/load/core.py b/isatools/isatab/load/core.py index d953c385..26400de1 100644 --- a/isatools/isatab/load/core.py +++ b/isatools/isatab/load/core.py @@ -390,7 +390,7 @@ def merge_study_with_assay_tables(study_file_path, assay_file_path, target_file_ log.info("Merging DataFrames...") merged_DF = merge(study_DF, assay_DF, on='Sample Name') log.info("Writing merged DataFrame to file %s", target_file_path) - with open(target_file_path, 'w', encoding='utf-8') as fp: + with open(target_file_path, 'wb') as fp: merged_DF.to_csv(fp, sep='\t', index=False, header=study_DF.isatab_header + assay_DF.isatab_header[1:]) diff --git a/isatools/magetab.py b/isatools/magetab.py index 0f3d0d6f..3b0a1d86 100644 --- a/isatools/magetab.py +++ b/isatools/magetab.py @@ -362,8 +362,7 @@ def write_idf_file(inv_obj, output_path): idf_df = idf_df.replace('', np.nan) with open(os.path.join(output_path, "{}.idf.txt".format( investigation.identifier if investigation.identifier != "" - else investigation.filename[2:-3])), "w", - encoding='utf-8') as idf_fp: + else investigation.filename[2:-3])), "wb") as idf_fp: idf_df.to_csv( path_or_buf=idf_fp, index=True, diff --git a/isatools/utils.py b/isatools/utils.py index 3fb21247..049c7bcb 100644 --- a/isatools/utils.py +++ b/isatools/utils.py @@ -733,7 +733,7 @@ def replace_factor_with_source_characteristic(self, factor_name): table_file_df.columns = self.clean_isatab_field_names( field_names_modified) - with open(self.path, 'w') as out_fp: + with open(self.path, 'wb') as out_fp: table_file_df.to_csv(path_or_buf=out_fp, index=False, sep='\t', encoding='utf-8') @@ -856,7 +856,7 @@ def replace_factor_with_protocol_parameter_value( with open(os.path.join( os.path.dirname(self.path), '{s_filename}.fix'.format( - s_filename=os.path.basename(self.path))), 'w') as out_fp: + s_filename=os.path.basename(self.path))), 'wb') as out_fp: table_file_df.to_csv(path_or_buf=out_fp, index=False, sep='\t', encoding='utf-8') diff --git a/tests/isatab/test_isatab.py b/tests/isatab/test_isatab.py index 25eee0d4..9936febe 100644 --- a/tests/isatab/test_isatab.py +++ b/tests/isatab/test_isatab.py @@ -440,7 +440,7 @@ def test_isatab_dump_source_sample_char_quant(self): s.process_sequence = [sample_collection_process] s.samples.append(sample1) i.studies = [s] - actual = isatab.dumps(i) + actual = replace_windows_newlines(isatab.dumps(i)) expected = """Source Name\tMaterial Type\tCharacteristics[organism]\tTerm Source REF\tTerm Accession Number\tCharacteristics[body weight]\tUnit\tTerm Source REF\tTerm Accession Number\tProtocol REF\tParameter Value[vessel]\tTerm Source REF\tTerm Accession Number\tParameter Value[storage temperature]\tUnit\tTerm Source REF\tTerm Accession Number\tSample Name\tCharacteristics[organism part]\tTerm Source REF\tTerm Accession Number\tCharacteristics[specimen mass]\tUnit\tTerm Source REF\tTerm Accession Number source1\tspecimen\tHuman\tNCBITAXON\thttp://purl.bioontology.org/ontology/STY/T016\t72\tkilogram\tUO\thttp://purl.obolibrary.org/obo/UO_0000009\tsample collection\teppendorf tube\tOBI\tpurl.org\t-20\tdegree Celsius\tUO\thttp://purl.obolibrary.org/obo/UO_0000027\tsample1\tliver\tUBERON\thttp://purl.obolibrary.org/obo/UBERON_0002107\t450.5\tmilligram\tUO\thttp://purl.obolibrary.org/obo/UO_0000022""" self.assertIn(expected, actual) diff --git a/tests/validators/test_validate_test_data.py b/tests/validators/test_validate_test_data.py index 4d8c1a4e..35fda568 100644 --- a/tests/validators/test_validate_test_data.py +++ b/tests/validators/test_validate_test_data.py @@ -305,7 +305,7 @@ class TestIsaJsonCreateTestData(unittest.TestCase): def setUp(self): self._reporting_level = logging.ERROR - self.v2_create_schemas_path = pathlib.PurePosixPath( + self.v2_create_schemas_path = pathlib.Path( pathlib.Path(__file__).parents[0], '..', '..', 'isatools', 'resources', 'schemas', 'isa_model_version_2_0_schemas', 'create') @@ -315,7 +315,7 @@ def test_validate_testdata_sampleassayplan_json(self): with open(os.path.join(self.v2_create_schemas_path, 'sample_assay_plan_schema.json')) as fp: sample_assay_plan_schema = json.load(fp) - res_path = pathlib.PurePosixPath("file://", self.v2_create_schemas_path, + res_path = pathlib.Path("file://", self.v2_create_schemas_path, 'sample_assay_plan_schema.json').as_uri() resolver = RefResolver(res_path, sample_assay_plan_schema) validator = Draft4Validator(sample_assay_plan_schema, @@ -342,7 +342,7 @@ def test_validate_testdata_treatment_sequence_json(self): with open(os.path.join(self.v2_create_schemas_path, 'treatment_sequence_schema.json')) as fp: treatment_sequence_schema = json.load(fp) - res_path = pathlib.PurePosixPath("file://", self.v2_create_schemas_path, + res_path = pathlib.Path("file://", self.v2_create_schemas_path, 'treatment_sequence_schema.json').as_uri() resolver = RefResolver(res_path, treatment_sequence_schema) validator = Draft4Validator(treatment_sequence_schema,