Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changed file opens to binary mode for to_csv calls. #547

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 12 additions & 12 deletions isatools/isatab/dump/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def dump(isa_obj, output_path,
raise NameError('Investigation file must match pattern i_*.txt, got {}'.format(i_file_name))

if path.exists(output_path):
fp = open(path.join(output_path, i_file_name), 'w', encoding='utf-8')
fp = open(path.join(output_path, i_file_name), 'wb')
else:
log.debug('output_path=', i_file_name)
raise FileNotFoundError("Can't find " + output_path)
Expand All @@ -55,7 +55,7 @@ def dump(isa_obj, output_path,

# Write ONTOLOGY SOURCE REFERENCE section
ontology_source_references_df = _build_ontology_reference_section(investigation.ontology_source_references)
fp.write('ONTOLOGY SOURCE REFERENCE\n')
fp.write(b'ONTOLOGY SOURCE REFERENCE\n')
# Need to set index_label as top left cell
ontology_source_references_df.to_csv(path_or_buf=fp, mode='a', sep='\t', encoding='utf-8',
index_label='Term Source Name')
Expand All @@ -80,7 +80,7 @@ def dump(isa_obj, output_path,
inv_df_rows.append(comment.value)
investigation_df.loc[0] = inv_df_rows
investigation_df = investigation_df.set_index('Investigation Identifier').T
fp.write('INVESTIGATION\n')
fp.write(b'INVESTIGATION\n')
investigation_df.to_csv(
path_or_buf=fp, mode='a', sep='\t', encoding='utf-8',
index_label='Investigation Identifier')
Expand All @@ -90,14 +90,14 @@ def dump(isa_obj, output_path,
prefix='Investigation',
publications=investigation.publications
)
fp.write('INVESTIGATION PUBLICATIONS\n')
fp.write(b'INVESTIGATION PUBLICATIONS\n')
investigation_publications_df.to_csv(path_or_buf=fp, mode='a', sep='\t', encoding='utf-8',
index_label='Investigation PubMed ID')

# Write INVESTIGATION CONTACTS section
investigation_contacts_df = _build_contacts_section_df(
contacts=investigation.contacts)
fp.write('INVESTIGATION CONTACTS\n')
fp.write(b'INVESTIGATION CONTACTS\n')
investigation_contacts_df.to_csv(path_or_buf=fp, mode='a', sep='\t', encoding='utf-8',
index_label='Investigation Person Last Name')

Expand Down Expand Up @@ -127,40 +127,40 @@ def dump(isa_obj, output_path,
study_df_row.append(comment.value)
study_df.loc[0] = study_df_row
study_df = study_df.set_index('Study Identifier').T
fp.write('STUDY\n')
fp.write(b'STUDY\n')
study_df.to_csv(path_or_buf=fp, mode='a', sep='\t', encoding='utf-8', index_label='Study Identifier')
study_design_descriptors_df = _build_design_descriptors_section(design_descriptors=study.design_descriptors)
fp.write('STUDY DESIGN DESCRIPTORS\n')
fp.write(b'STUDY DESIGN DESCRIPTORS\n')
study_design_descriptors_df.to_csv(path_or_buf=fp, mode='a', sep='\t', encoding='utf-8',
index_label='Study Design Type')

# Write STUDY PUBLICATIONS section
study_publications_df = _build_publications_section_df(prefix='Study', publications=study.publications)
fp.write('STUDY PUBLICATIONS\n')
fp.write(b'STUDY PUBLICATIONS\n')
study_publications_df.to_csv(path_or_buf=fp, mode='a', sep='\t', encoding='utf-8',
index_label='Study PubMed ID')

# Write STUDY FACTORS section
study_factors_df = _build_factors_section_df(factors=study.factors)
fp.write('STUDY FACTORS\n')
fp.write(b'STUDY FACTORS\n')
study_factors_df.to_csv(path_or_buf=fp, mode='a', sep='\t', encoding='utf-8',
index_label='Study Factor Name')

study_assays_df = _build_assays_section_df(assays=study.assays)
fp.write('STUDY ASSAYS\n')
fp.write(b'STUDY ASSAYS\n')
study_assays_df.to_csv(path_or_buf=fp, mode='a', sep='\t', encoding='utf-8',
index_label='Study Assay File Name')

# Write STUDY PROTOCOLS section
study_protocols_df = _build_protocols_section_df(protocols=study.protocols)
fp.write('STUDY PROTOCOLS\n')
fp.write(b'STUDY PROTOCOLS\n')
study_protocols_df.to_csv(path_or_buf=fp, mode='a', sep='\t', encoding='utf-8',
index_label='Study Protocol Name')

# Write STUDY CONTACTS section
study_contacts_df = _build_contacts_section_df(
prefix='Study', contacts=study.contacts)
fp.write('STUDY CONTACTS\n')
fp.write(b'STUDY CONTACTS\n')
study_contacts_df.to_csv(path_or_buf=fp, mode='a', sep='\t', encoding='utf-8',
index_label='Study Person Last Name')

Expand Down
4 changes: 2 additions & 2 deletions isatools/isatab/dump/write.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ def flatten(current_list):
DF = DF.replace('', nan)
DF = DF.dropna(axis=1, how='all')

with open(path.join(output_dir, study_obj.filename), 'w') as out_fp:
with open(path.join(output_dir, study_obj.filename), 'wb') as out_fp:
DF.to_csv(
path_or_buf=out_fp, index=False, sep='\t', encoding='utf-8')

Expand Down Expand Up @@ -480,7 +480,7 @@ def pbar(x):
DF = DF.dropna(axis=1, how='all')

with open(path.join(
output_dir, assay_obj.filename), 'w') as out_fp:
output_dir, assay_obj.filename), 'wb') as out_fp:
DF.to_csv(path_or_buf=out_fp, index=False, sep='\t',
encoding='utf-8')

Expand Down
2 changes: 1 addition & 1 deletion isatools/isatab/load/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ def merge_study_with_assay_tables(study_file_path, assay_file_path, target_file_
log.info("Merging DataFrames...")
merged_DF = merge(study_DF, assay_DF, on='Sample Name')
log.info("Writing merged DataFrame to file %s", target_file_path)
with open(target_file_path, 'w', encoding='utf-8') as fp:
with open(target_file_path, 'wb') as fp:
merged_DF.to_csv(fp, sep='\t', index=False, header=study_DF.isatab_header + assay_DF.isatab_header[1:])


Expand Down
3 changes: 1 addition & 2 deletions isatools/magetab.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,8 +362,7 @@ def write_idf_file(inv_obj, output_path):
idf_df = idf_df.replace('', np.nan)
with open(os.path.join(output_path, "{}.idf.txt".format(
investigation.identifier if investigation.identifier != ""
else investigation.filename[2:-3])), "w",
encoding='utf-8') as idf_fp:
else investigation.filename[2:-3])), "wb") as idf_fp:
idf_df.to_csv(
path_or_buf=idf_fp,
index=True,
Expand Down
4 changes: 2 additions & 2 deletions isatools/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -733,7 +733,7 @@ def replace_factor_with_source_characteristic(self, factor_name):
table_file_df.columns = self.clean_isatab_field_names(
field_names_modified)

with open(self.path, 'w') as out_fp:
with open(self.path, 'wb') as out_fp:
table_file_df.to_csv(path_or_buf=out_fp, index=False, sep='\t',
encoding='utf-8')

Expand Down Expand Up @@ -856,7 +856,7 @@ def replace_factor_with_protocol_parameter_value(

with open(os.path.join(
os.path.dirname(self.path), '{s_filename}.fix'.format(
s_filename=os.path.basename(self.path))), 'w') as out_fp:
s_filename=os.path.basename(self.path))), 'wb') as out_fp:
table_file_df.to_csv(path_or_buf=out_fp, index=False, sep='\t',
encoding='utf-8')

Expand Down
2 changes: 1 addition & 1 deletion tests/isatab/test_isatab.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,7 +440,7 @@ def test_isatab_dump_source_sample_char_quant(self):
s.process_sequence = [sample_collection_process]
s.samples.append(sample1)
i.studies = [s]
actual = isatab.dumps(i)
actual = replace_windows_newlines(isatab.dumps(i))
expected = """Source Name\tMaterial Type\tCharacteristics[organism]\tTerm Source REF\tTerm Accession Number\tCharacteristics[body weight]\tUnit\tTerm Source REF\tTerm Accession Number\tProtocol REF\tParameter Value[vessel]\tTerm Source REF\tTerm Accession Number\tParameter Value[storage temperature]\tUnit\tTerm Source REF\tTerm Accession Number\tSample Name\tCharacteristics[organism part]\tTerm Source REF\tTerm Accession Number\tCharacteristics[specimen mass]\tUnit\tTerm Source REF\tTerm Accession Number
source1\tspecimen\tHuman\tNCBITAXON\thttp://purl.bioontology.org/ontology/STY/T016\t72\tkilogram\tUO\thttp://purl.obolibrary.org/obo/UO_0000009\tsample collection\teppendorf tube\tOBI\tpurl.org\t-20\tdegree Celsius\tUO\thttp://purl.obolibrary.org/obo/UO_0000027\tsample1\tliver\tUBERON\thttp://purl.obolibrary.org/obo/UBERON_0002107\t450.5\tmilligram\tUO\thttp://purl.obolibrary.org/obo/UO_0000022"""
self.assertIn(expected, actual)
Expand Down
6 changes: 3 additions & 3 deletions tests/validators/test_validate_test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ class TestIsaJsonCreateTestData(unittest.TestCase):

def setUp(self):
self._reporting_level = logging.ERROR
self.v2_create_schemas_path = pathlib.PurePosixPath(
self.v2_create_schemas_path = pathlib.Path(
pathlib.Path(__file__).parents[0], '..', '..', 'isatools', 'resources', 'schemas',
'isa_model_version_2_0_schemas', 'create')

Expand All @@ -315,7 +315,7 @@ def test_validate_testdata_sampleassayplan_json(self):
with open(os.path.join(self.v2_create_schemas_path,
'sample_assay_plan_schema.json')) as fp:
sample_assay_plan_schema = json.load(fp)
res_path = pathlib.PurePosixPath("file://", self.v2_create_schemas_path,
res_path = pathlib.Path("file://", self.v2_create_schemas_path,
'sample_assay_plan_schema.json').as_uri()
resolver = RefResolver(res_path, sample_assay_plan_schema)
validator = Draft4Validator(sample_assay_plan_schema,
Expand All @@ -342,7 +342,7 @@ def test_validate_testdata_treatment_sequence_json(self):
with open(os.path.join(self.v2_create_schemas_path,
'treatment_sequence_schema.json')) as fp:
treatment_sequence_schema = json.load(fp)
res_path = pathlib.PurePosixPath("file://", self.v2_create_schemas_path,
res_path = pathlib.Path("file://", self.v2_create_schemas_path,
'treatment_sequence_schema.json').as_uri()
resolver = RefResolver(res_path, treatment_sequence_schema)
validator = Draft4Validator(treatment_sequence_schema,
Expand Down
Loading