Skip to content

Commit

Permalink
refactor: all fields can be missing except the variant itself
Browse files Browse the repository at this point in the history
  • Loading branch information
mkarmona committed Oct 1, 2018
1 parent c594000 commit 6eb0114
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 6 deletions.
9 changes: 5 additions & 4 deletions src/main/scala/ot/geckopipe/Main.scala
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,14 @@ class Commands(val ss: SparkSession, val sampleFactor: Double, val c: Configurat
val vIdx = vIdxBuilder.load
val nearests = vIdxBuilder.loadNearestGenes.map( df => {
logger.info("generate variant index LUT with nearest genes (prot-cod and not prot-cod")
vIdx.table.join(df, VariantIndex.variantColumnNames, "left_outer")
.write
.json(c.output.stripSuffix("/").concat("/variant-index-lut/"))
val joint = vIdx.table.join(df, VariantIndex.variantColumnNames, "left_outer")

joint.show(10, false)
joint.write.json(c.output.stripSuffix("/").concat("/variant-index-lut/"))
})

nearests match {
case scala.util.Success(lut) => logger.info("generated variant index LUT")
case scala.util.Success(_) => logger.info("generated variant index LUT")
case scala.util.Failure(ex) => logger.error(ex.getMessage)
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/main/scala/ot/geckopipe/index/VariantIndex.scala
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ object VariantIndex {

val nearestGenesSchema = StructType(
StructField("varid", StringType, false) ::
StructField("gene_id_prot_coding", StringType, false) ::
StructField("gene_id_prot_coding", StringType) ::
StructField("gene_id_prot_coding_distance", LongType) ::
StructField("gene_id", StringType, false) ::
StructField("gene_id", StringType) ::
StructField("gene_id_distance", LongType) :: Nil)

/** this class build based on the Configuration it creates a VariantIndex */
Expand Down

0 comments on commit 6eb0114

Please sign in to comment.