Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pre-select samples to speed up JOINs for NA filtering #11061

Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@
<!-- if 'NA' is selected, prepare NA samples -->
<if test="userSelectsNA">
SELECT DISTINCT ${unique_id}
FROM sample_derived sd
FROM (<include refid="selectAllSamplesInSelectedStudies"/>) AS sd
LEFT JOIN (<include refid="selectAllClinicalDataByAttribute"/>) AS categorical_clinical_data
ON
<choose>
Expand Down Expand Up @@ -428,7 +428,7 @@
<sql id="categoricalClinicalDataCountFilter">
(
SELECT ${unique_id}
FROM sample_derived sd
FROM (<include refid="selectAllSamplesInSelectedStudies"/>) AS sd
LEFT JOIN (<include refid="selectAllClinicalDataByAttribute"/>) AS categorical_clinical_data
ON
<choose>
Expand Down Expand Up @@ -462,6 +462,23 @@
)
</sql>

<sql id="categoricalGenomicDataFilterForCNA">
<!-- filter on study to reduce query size in preparation of the following LEFT JOIN -->
SELECT DISTINCT sample_query.sample_unique_id
<!-- join with sample table to get all 'NA' samples -->
FROM (<include refid="selectAllSamplesInSelectedStudies"/>) AS sample_query
LEFT JOIN (<include refid="selectAllGeneticAlterations"/>) AS cna_query ON sample_query.sample_unique_id = cna_query.sample_unique_id
WHERE
<foreach item="dataFilterValue" collection="genomicDataFilter.values" open="(" separator=" OR " close=")">
<choose>
<!-- NA value samples -->
<when test="dataFilterValue.value == 'NA'">alteration_value IS null</when>
<!-- non-NA value samples -->
<otherwise>alteration_value == #{dataFilterValue.value}</otherwise>
</choose>
</foreach>
</sql>

<sql id="numericalGenomicDataFilter">
<!-- check if 'NA' is selected -->
<bind name="userSelectsNA" value="false" />
Expand All @@ -478,9 +495,9 @@
</foreach>
<!-- if 'NA' is selected, prepare NA samples -->
<if test="userSelectsNA">
SELECT DISTINCT sd.sample_unique_id
FROM sample_derived sd
LEFT JOIN (<include refid="selectAllNumericalGeneticAlterations"/>) AS genomic_numerical_query ON sd.sample_unique_id = genomic_numerical_query.sample_unique_id
SELECT DISTINCT sample_query.sample_unique_id
FROM (<include refid="selectAllSamplesInSelectedStudies"/>) AS sample_query
LEFT JOIN (<include refid="selectAllGeneticAlterations"/>) AS genomic_numerical_query ON sample_query.sample_unique_id = genomic_numerical_query.sample_unique_id
WHERE alteration_value IS null
</if>
<!-- if both 'NA' and non-NA are selected, union them together -->
Expand All @@ -490,7 +507,7 @@
<!-- if non-NA is selected, prepare non-NA samples -->
<if test="userSelectsNumericalValue">
SELECT DISTINCT sample_unique_id
FROM (<include refid="selectAllNumericalGeneticAlterations"/>) AS genomic_numerical_query
FROM (<include refid="selectAllGeneticAlterations"/>) AS genomic_numerical_query
WHERE
<foreach item="dataFilterValue" collection="genomicDataFilter.values" open="((" separator=") OR (" close="))">
<trim prefix="" prefixOverrides="AND">
Expand Down Expand Up @@ -536,7 +553,7 @@
</if>
</sql>

<sql id="selectAllNumericalGeneticAlterations">
<sql id="selectAllGeneticAlterations">
SELECT sample_unique_id, alteration_value
FROM genetic_alteration_derived
WHERE profile_type = #{genomicDataFilter.profileType}
Expand All @@ -546,6 +563,15 @@
#{studyId}
</foreach>
</sql>

<sql id="selectAllSamplesInSelectedStudies">
SELECT sample_unique_id
FROM sample_derived
WHERE cancer_study_identifier IN
<foreach item="studyId" collection="studyViewFilterHelper.studyViewFilter.studyIds" open="(" separator="," close=")">
#{studyId}
</foreach>
</sql>

<sql id="selectAllGenericAssays">
SELECT sample_unique_id, value, datatype
Expand All @@ -572,7 +598,7 @@
<!-- if 'NA' is selected, prepare NA samples -->
<if test="userSelectsNA">
SELECT DISTINCT sd.sample_unique_id
FROM sample_derived sd
FROM (<include refid="selectAllSamplesInSelectedStudies"/>) AS sd
LEFT JOIN (<include refid="selectAllGenericAssays"/>) AS generic_numerical_query ON sd.sample_unique_id = generic_numerical_query.sample_unique_id
WHERE datatype = 'LIMIT-VALUE'
AND value IS null OR
Expand Down Expand Up @@ -646,7 +672,7 @@

<sql id="categoricalGenericAssayDataCountFilter">
SELECT ${unique_id}
FROM sample_derived sd
FROM (<include refid="selectAllSamplesInSelectedStudies"/>) AS sd
LEFT JOIN (<include refid="selectAllGenericAssays"/>) AS generic_assay_query
ON sd.sample_unique_id = generic_assay_query.sample_unique_id
<where>
Expand Down Expand Up @@ -735,34 +761,4 @@
</foreach>
</if>
</sql>

<sql id="categoricalGenomicDataFilterForCNA">
<!-- filter on study to reduce query size in preparation of the following LEFT JOIN -->
WITH cna_query AS (
SELECT sample_unique_id, alteration_value
FROM genetic_alteration_derived
WHERE profile_type = #{genomicDataFilter.profileType}
AND hugo_gene_symbol = #{genomicDataFilter.hugoGeneSymbol}
AND cancer_study_identifier IN
<foreach item="studyId" collection="studyViewFilterHelper.studyViewFilter.studyIds" open="(" separator="," close=")">
#{studyId}
</foreach>
)
SELECT DISTINCT sd.sample_unique_id
<!-- join with sample table to get all 'NA' samples -->
FROM sample_derived sd
LEFT JOIN cna_query ON sd.sample_unique_id = cna_query.sample_unique_id
WHERE cancer_study_identifier IN
<foreach item="studyId" collection="studyViewFilterHelper.studyViewFilter.studyIds" open="(" separator="," close=")">
#{studyId}
</foreach>
<foreach item="dataFilterValue" collection="genomicDataFilter.values" open="AND (" separator=" OR " close=")">
<choose>
<!-- NA value samples -->
<when test="dataFilterValue.value == 'NA'">alteration_value IS null</when>
<!-- non-NA value samples -->
<otherwise>alteration_value == #{dataFilterValue.value}</otherwise>
</choose>
</foreach>
</sql>
</mapper>
Loading