Skip to content

Commit

Permalink
Address first iteration of comments.
Browse files Browse the repository at this point in the history
  • Loading branch information
tneymanov committed Feb 10, 2020
1 parent 2895b87 commit 5dd0a66
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 6 deletions.
13 changes: 8 additions & 5 deletions gcp_variant_transforms/vcf_to_bq.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ def _run_annotation_pipeline(known_args, pipeline_args):
def _create_sample_info_table(pipeline, # type: beam.Pipeline
pipeline_mode, # type: PipelineModes
known_args, # type: argparse.Namespace,
pipeline_args, # type: List[str]
temp_directory, # str
):
# type: (...) -> None
headers = pipeline_common.read_headers(
Expand All @@ -410,6 +410,8 @@ def run(argv=None):
logging.info('Command: %s', ' '.join(argv or sys.argv))
known_args, pipeline_args = pipeline_common.parse_args(argv,
_COMMAND_LINE_OPTIONS)
if known_args.output_table and '--temp_location' not in pipeline_args:
raise ValueError('--temp_location is required for BigQuery imports.')
if known_args.auto_flags_experiment:
_get_input_dimensions(known_args, pipeline_args)

Expand Down Expand Up @@ -483,9 +485,10 @@ def run(argv=None):
file_to_write.write(schema_json)

for i in range(num_shards):
table_suffix = sharding.get_output_table_suffix(i)
table_name = sample_info_table_schema_generator.compose_table_name(
known_args.output_table, table_suffix)
table_suffix = ''
if sharding and sharding.get_shard_name(i):
table_suffix = '_' + sharding.get_shard_name(i)
table_name = known_args.output_table + table_suffix
_ = (variants[i] | 'VariantToBigQuery' + table_suffix >>
variant_to_bigquery.VariantToBigQuery(
table_name,
Expand All @@ -498,7 +501,7 @@ def run(argv=None):
known_args.null_numeric_value_replacement)))
if known_args.generate_sample_info_table:
_create_sample_info_table(
pipeline, pipeline_mode, known_args, pipeline_args)
pipeline, pipeline_mode, known_args)

if known_args.output_avro_path:
# TODO(bashir2): Add an integration test that outputs to Avro files and
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@
'google-api-python-client>=1.6',
'intervaltree>=2.1.0,<2.2.0',
'mmh3<2.6',
'google-cloud-storage',
# Refer to issue #528
'google-cloud-storage<1.23.0',
'pyfarmhash',
'pyyaml'
]
Expand Down

0 comments on commit 5dd0a66

Please sign in to comment.