Skip to content

Commit

Permalink
Merge pull request #242 from Ferlab-Ste-Justine/fix/clin-2119
Browse files Browse the repository at this point in the history
fix: CLIN-2119 manage column mc with multiple entries
  • Loading branch information
meek0 authored Oct 21, 2024
2 parents fbb7c58 + f37cada commit 71838f7
Show file tree
Hide file tree
Showing 4 changed files with 4 additions and 3 deletions.
1 change: 1 addition & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ jobs:
distribution: 'adopt'
java-version: '11'
cache: 'sbt'
- uses: sbt/setup-sbt@v1
- name: Run datalake-commons tests
run: sbt 'project datalake-commons' 'test'
- name: Run datalake-spark3 tests
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ case class Clinvar(rc: RuntimeETLContext) extends SimpleETLP(rc) {
)
.withColumn("clndisdbincl", split(concat_ws("", col("clndisdbincl")), "\\|"))
.withColumn("clndnincl", split(concat_ws("", col("clndnincl")), "\\|"))
.withColumn("mc", split(concat_ws("|", col("mc")), "\\|"))
.withColumn("mc", split(array_join(col("mc"), "|"), "\\|"))
.withColumn("inheritance", inheritance_udf(col("origin")))
.drop("clin_sig_original", "clndn")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ case class NormalizedClinvar(chromosome: String = "2",
af_tgp: Double = 0.01118,
clnvc: String = "single_nucleotide_variant",
clnhgvs: List[String] = List("NC_000002.12:g.69359261T>A"),
mc: List[String] = List("SO:0001627", "intron_variant"),
mc: List[String] = List("SO:0001627", "intron_variant", "SO:0001589", "frameshift_variant"),
af_esp: Double = 0.01415,
clndisdbincl: List[String] = List(""),
conditions: List[String] = List("Congenital myasthenic syndrome 12", "not specified", "not provided"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ case class RawClinvar(contigName: String = "2",
INFO_AF_TGP: Double = 0.01118,
INFO_CLNVC: String = "single_nucleotide_variant",
INFO_CLNHGVS: List[String] = List("NC_000002.12:g.69359261T>A"),
INFO_MC: List[String] = List("SO:0001627|intron_variant"),
INFO_MC: List[String] = List("SO:0001627|intron_variant", "SO:0001589|frameshift_variant"),
INFO_CLNSIGCONF: Option[List[String]] = None,
INFO_AF_ESP: Double = 0.01415,
INFO_CLNDISDBINCL: Option[List[String]] = None,
Expand Down

0 comments on commit 71838f7

Please sign in to comment.