Skip to content

Commit

Permalink
Rfc80/clinical data counts refactor (#11084)
Browse files Browse the repository at this point in the history
* ♻️ Refactor NewStudyViewFilterUtil

* Update SQL to return ClinicalDataCountItem.. NA's not implemented

* Add NA counts to SQL for clinical data counts

* Remove unused methods

* Fix unit tests for clinical data counts

* Remove unused import

* Fix Review Comments

---------

Co-authored-by: alisman <[email protected]>
  • Loading branch information
haynescd and alisman authored Nov 6, 2024
1 parent 9782432 commit 03e0680
Show file tree
Hide file tree
Showing 9 changed files with 149 additions and 183 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import org.cbioportal.model.CaseListDataCount;
import org.cbioportal.model.ClinicalData;
import org.cbioportal.model.ClinicalDataCount;
import org.cbioportal.model.ClinicalDataCountItem;
import org.cbioportal.model.ClinicalEventTypeCount;
import org.cbioportal.model.CopyNumberCountByGene;
import org.cbioportal.model.GenericAssayDataCountItem;
Expand Down Expand Up @@ -39,7 +40,7 @@ public interface StudyViewRepository {
List<AlterationCountByGene> getStructuralVariantGenes(StudyViewFilterContext studyViewFilterContext);
List<CopyNumberCountByGene> getCnaGenes(StudyViewFilterContext studyViewFilterContext);

List<ClinicalDataCount> getClinicalDataCounts(StudyViewFilterContext studyViewFilterContext, List<String> filteredAttributes);
List<ClinicalDataCountItem> getClinicalDataCounts(StudyViewFilterContext studyViewFilterContext, List<String> filteredAttributes);

List<GenomicDataCount> getMolecularProfileSampleCounts(StudyViewFilterContext studyViewFilterContext);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import org.cbioportal.model.ClinicalAttribute;
import org.cbioportal.model.ClinicalData;
import org.cbioportal.model.ClinicalDataCount;
import org.cbioportal.model.ClinicalDataCountItem;
import org.cbioportal.model.ClinicalEventTypeCount;
import org.cbioportal.model.CopyNumberCountByGene;
import org.cbioportal.model.GenePanelToGene;
Expand Down Expand Up @@ -40,7 +41,7 @@ public interface StudyViewMapper {

List<AlterationCountByGene> getStructuralVariantGenes(StudyViewFilterHelper studyViewFilterHelper, AlterationFilterHelper alterationFilterHelper);

List<ClinicalDataCount> getClinicalDataCounts(StudyViewFilterHelper studyViewFilterHelper, List<String> attributeIds, List<String> filteredAttributeValues);
List<ClinicalDataCountItem> getClinicalDataCounts(StudyViewFilterHelper studyViewFilterHelper, List<String> attributeIds, List<String> filteredAttributeValues);

List<CaseListDataCount> getCaseListDataCountsPerStudy(@Param("studyViewFilterHelper") StudyViewFilterHelper studyViewFilterHelper);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import org.cbioportal.model.CaseListDataCount;
import org.cbioportal.model.ClinicalData;
import org.cbioportal.model.ClinicalDataCount;
import org.cbioportal.model.ClinicalDataCountItem;
import org.cbioportal.model.ClinicalEventTypeCount;
import org.cbioportal.model.GenePanelToGene;
import org.cbioportal.model.GenericAssayDataCountItem;
Expand Down Expand Up @@ -79,7 +80,7 @@ public List<AlterationCountByGene> getStructuralVariantGenes(StudyViewFilterCont
}

@Override
public List<ClinicalDataCount> getClinicalDataCounts(StudyViewFilterContext studyViewFilterContext, List<String> filteredAttributes) {
public List<ClinicalDataCountItem> getClinicalDataCounts(StudyViewFilterContext studyViewFilterContext, List<String> filteredAttributes) {
return mapper.getClinicalDataCounts(createStudyViewFilterHelper(studyViewFilterContext),
filteredAttributes, FILTERED_CLINICAL_ATTR_VALUES);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,17 +153,7 @@ public Map<String, ClinicalDataType> getClinicalAttributeDatatypeMap() {
)
@Override
public List<ClinicalDataCountItem> getClinicalDataCounts(StudyViewFilter studyViewFilter, List<String> filteredAttributes) {
StudyViewFilterContext studyViewFilterContext = createContext(studyViewFilter);
List<ClinicalDataCount> dataCounts = studyViewRepository.getClinicalDataCounts(studyViewFilterContext, filteredAttributes);
List<ClinicalDataCountItem> clinicalDataCountItems = generateDataCountItemsFromDataCounts(dataCounts);

return calculateMissingNaCountsForClinicalDataCountItems(
clinicalDataCountItems,
filteredAttributes.stream().distinct().toList(),
this.getClinicalAttributeDatatypeMap(),
studyViewRepository.getFilteredSamplesCount(studyViewFilterContext),
studyViewRepository.getFilteredPatientCount(studyViewFilterContext)
);
return studyViewRepository.getClinicalDataCounts(createContext(studyViewFilter), filteredAttributes);
}

@Cacheable(
Expand Down Expand Up @@ -311,83 +301,6 @@ private List<ClinicalDataCount> normalizeDataCounts(List<ClinicalDataCount> data
return new ArrayList<>(normalizedDataCounts);
}

public static List<ClinicalDataCountItem> calculateMissingNaCountsForClinicalDataCountItems(
List<ClinicalDataCountItem> clinicalDataCountItems,
List<String> filteredAttributes,
Map<String, ClinicalDataType> clinicalAttributeDatatypeMap,
int filteredSamplesCount,
int filteredPatientsCount
) {
// Postprocess clinical data count items to adjust NA counts
List<ClinicalDataCountItem> combinedClinicalDataCountItems = new ArrayList<>();

Map<String, ClinicalDataCountItem> clinicalDataCountItemMap = clinicalDataCountItems
.stream()
.collect(Collectors.toMap(
ClinicalDataCountItem::getAttributeId,
item -> item
));

// go over all filtered attributes, not just attributes found in clinicalDataCountItems
for (String attributeId: filteredAttributes) {
ClinicalDataCountItem clinicalDataCountItem = clinicalDataCountItemMap.get(attributeId);
boolean isItemMissing = false;

if (clinicalDataCountItem == null) {
isItemMissing = true;
clinicalDataCountItem = new ClinicalDataCountItem();
clinicalDataCountItem.setAttributeId(attributeId);
clinicalDataCountItem.setCounts(new ArrayList<>());
}

Integer totalClinicalDataCount = clinicalDataCountItem
.getCounts()
.stream()
.map(ClinicalDataCount::getCount)
.reduce(0, Integer::sum);
// depending on clinical data type we either use filtered sample count or filtered patient count
int filteredCount = clinicalAttributeDatatypeMap.get(clinicalDataCountItem.getAttributeId()) == ClinicalDataType.SAMPLE ?
filteredSamplesCount: filteredPatientsCount;
int casesWithoutClinicalData = filteredCount - totalClinicalDataCount;

if (casesWithoutClinicalData > 0) {
// some of these attributes may be completely missing in clinicalDataCountItem
// in case the only attribute value is NA.
// we need to manually add those missing items to make sure we have NA counts.
if (isItemMissing) {
combinedClinicalDataCountItems.add(clinicalDataCountItem);
}

// find "NA" or else create a new one
Optional<ClinicalDataCount> naClinicalDataCountOptional = clinicalDataCountItem
.getCounts()
.stream()
.filter(c -> c.getValue().equals("NA"))
.findFirst();

ClinicalDataCount naClinicalDataCount = naClinicalDataCountOptional
.orElseGet(() -> {
// this should only happen when there are multiple studies
ClinicalDataCount count = new ClinicalDataCount();
count.setAttributeId(attributeId);
count.setValue("NA");
count.setCount(0);
return count;
});

// if not present we need to add naClinicalDataCount to the existing counts
if (naClinicalDataCountOptional.isEmpty()) {
clinicalDataCountItem.getCounts().add(naClinicalDataCount);
}

naClinicalDataCount.setCount(naClinicalDataCount.getCount() + casesWithoutClinicalData);
}
}

combinedClinicalDataCountItems.addAll(clinicalDataCountItems);
return combinedClinicalDataCountItems;
}

public static List<CaseListDataCount> mergeCaseListCounts(List<CaseListDataCount> counts) {
Map<String, List<CaseListDataCount>> countsPerListType = counts.stream()
.collect((Collectors.groupingBy(CaseListDataCount::getValue)));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,12 +208,10 @@ public ResponseEntity<List<ClinicalDataCountItem>> fetchClinicalDataCounts(
StudyViewFilter studyViewFilter = interceptedClinicalDataCountFilter.getStudyViewFilter();

if (attributes.size() == 1) {
NewStudyViewFilterUtil.removeSelfFromFilter(attributes.get(0).getAttributeId(), studyViewFilter);
NewStudyViewFilterUtil.removeClinicalDataFilter(attributes.getFirst().getAttributeId(), studyViewFilter.getClinicalDataFilters());
}
// boolean singleStudyUnfiltered = studyViewFilterUtil.isSingleStudyUnfiltered(studyViewFilter);
List<ClinicalDataCountItem> result = studyViewColumnarService.getClinicalDataCounts(studyViewFilter,
attributes.stream().map(ClinicalDataFilter::getAttributeId).collect(Collectors.toList()));
//studyIds, sampleIds, attributes.stream().map(a -> a.getAttributeId()).collect(Collectors.toList()));
return new ResponseEntity<>(result, HttpStatus.OK);

}
Expand Down Expand Up @@ -576,7 +574,7 @@ public ResponseEntity<List<ClinicalDataCountItem>> fetchCustomDataCounts(
List<ClinicalDataFilter> attributes = interceptedClinicalDataCountFilter.getAttributes();
StudyViewFilter studyViewFilter = interceptedClinicalDataCountFilter.getStudyViewFilter();
if (attributes.size() == 1) {
NewStudyViewFilterUtil.removeSelfCustomDataFromFilter(attributes.get(0).getAttributeId(), studyViewFilter);
NewStudyViewFilterUtil.removeClinicalDataFilter(attributes.getFirst().getAttributeId(), studyViewFilter.getCustomDataFilters());
}

List <SampleIdentifier> filteredSampleIdentifiers = studyViewColumnarService.getFilteredSamples(studyViewFilter).stream().map(sample -> studyViewFilterUtil.buildSampleIdentifier(sample.getCancerStudyIdentifier(), sample.getStableId())).toList();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ public static StudyViewFilter removeSelfFromFilter(ClinicalDataBinCountFilter da
StudyViewFilter studyViewFilter = dataBinCountFilter.getStudyViewFilter();

if (attributes.size() == 1) {
NewStudyViewFilterUtil.removeSelfFromFilter(attributes.get(0).getAttributeId(), studyViewFilter);
NewStudyViewFilterUtil.removeClinicalDataFilter(attributes.getFirst().getAttributeId(), studyViewFilter.getClinicalDataFilters());
}

return studyViewFilter;
Expand All @@ -36,7 +36,7 @@ public static StudyViewFilter removeSelfCustomDataFromFilter(ClinicalDataBinCoun
StudyViewFilter studyViewFilter = dataBinCountFilter.getStudyViewFilter();

if (attributes.size() == 1) {
NewStudyViewFilterUtil.removeSelfCustomDataFromFilter(attributes.get(0).getAttributeId(), studyViewFilter);
NewStudyViewFilterUtil.removeClinicalDataFilter(attributes.getFirst().getAttributeId(), studyViewFilter.getCustomDataFilters());
}

return studyViewFilter;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,19 +1,15 @@
package org.cbioportal.web.columnar.util;


import org.cbioportal.web.parameter.StudyViewFilter;
import org.cbioportal.web.parameter.ClinicalDataFilter;

public class NewStudyViewFilterUtil {
import java.util.List;

public static void removeSelfFromFilter(String attributeId, StudyViewFilter studyViewFilter) {
if (studyViewFilter!= null && studyViewFilter.getClinicalDataFilters() != null) {
studyViewFilter.getClinicalDataFilters().removeIf(f -> f.getAttributeId().equals(attributeId));
}
}
public class NewStudyViewFilterUtil {

public static void removeSelfCustomDataFromFilter(String attributeId, StudyViewFilter studyViewFilter) {
if (studyViewFilter != null && studyViewFilter.getCustomDataFilters() != null) {
studyViewFilter.getCustomDataFilters().removeIf(f -> f.getAttributeId().equals(attributeId));
public static void removeClinicalDataFilter(String attributeId, List<ClinicalDataFilter> dataFilterList ) {
if (dataFilterList != null) {
dataFilterList.removeIf(f -> f.getAttributeId().equals(attributeId));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -136,10 +136,14 @@
</select>

<!-- for /clinical-data-counts/fetch (returns ClinicalData) which will then be converted to clinicalDataCountItems -->
<select id="getClinicalDataCounts" resultType="org.cbioportal.model.ClinicalDataCount">
<include refid="getClinicalDataCountsQuerySample" />
<select id="getClinicalDataCounts" resultMap="ClinicalDataCountItemResultMap">
<include refid="getClinicalDataCountsQuery">
<property name="type" value="sample"/>
</include>
UNION ALL
<include refid="getClinicalDataCountsQueryPatient" />
<include refid="getClinicalDataCountsQuery">
<property name="type" value="patient"/>
</include>
</select>

<!-- for /molecular-profile-sample-counts/fetch (returns GenomicDataCount) which will then be converted to clinicalDataCountItems -->
Expand Down Expand Up @@ -179,63 +183,63 @@
GROUP BY s.cancer_study_identifier, sl.stable_id, sl.name
</select>


<sql id="getClinicalDataCountsQuerySample">
<sql id="getClinicalDataCountsQuery">
(
WITH clinical_data_query AS (
SELECT
attribute_name as attributeId,
<include refid="normalizeAttributeValue">
<property name="attribute_value" value="attribute_value"/>
</include> as value,
count(value) as count
attribute_name AS attributeId,
upper(attribute_value) AS value,
cast(count(*) AS INTEGER) as count
FROM clinical_data_derived
<where>
type='sample' AND
<include refid="applyStudyViewFilter">
<property name="filter_type" value="'SAMPLE_ID_ONLY'"/>
type='${type}'
AND <!-- Table creation in clickhouse.sql has ensured no NA values but extra caution is always appreciated -->
<include refid="normalizeAttributeValue">
<property name="attribute_value" value="value"/>
</include>
<if test="filteredAttributeValues != null and !filteredAttributeValues.isEmpty()">
AND UPPER(value) NOT IN
<foreach item="filteredAttributeValue" collection="filteredAttributeValues" open="(" separator="," close=")">
#{filteredAttributeValue}
</foreach>
</if>
!= 'NA'
AND
<choose>
<when test="'${type}' == 'sample'">
<include refid="applyStudyViewFilter">
<property name="filter_type" value="'SAMPLE_ID_ONLY'"/>
</include>
</when>
<otherwise>
<include refid="applyStudyViewFilter">
<property name="filter_type" value="'PATIENT_ID_ONLY'"/>
</include>
</otherwise>
</choose>
AND attribute_name IN
<foreach item="attributeId" collection="attributeIds" open="(" separator="," close=")">
#{attributeId}
</foreach>
</where>
GROUP BY attribute_name,
value
</sql>

<sql id="getClinicalDataCountsQueryPatient">
SELECT
attribute_name as attributeId,
<include refid="normalizeAttributeValue">
<property name="attribute_value" value="attribute_value"/>
</include> as value,
count(value) as count
FROM clinical_data_derived
GROUP BY attribute_name, value ),
clinical_data_sum AS (SELECT attributeId, sum(count) AS sum FROM clinical_data_query GROUP BY attributeId)

SELECT * FROM clinical_data_query
UNION ALL
SELECT attributeId,
'NA' AS value,
((
<choose>
<when test="'${type}' == 'sample'">
<include refid="getTotalSampleCount"/>
</when>
<otherwise>
<include refid="getTotalPatientCount"/>
</otherwise>
</choose>
) - clinical_data_sum.sum) AS count
FROM clinical_data_sum
<where>
type='patient' AND
<include refid="applyStudyViewFilter">
<property name="filter_type" value="'PATIENT_ID_ONLY'"/>
</include>
<if test="filteredAttributeValues != null and !filteredAttributeValues.isEmpty()">
AND UPPER(value) NOT IN
<foreach item="filteredAttributeValue" collection="filteredAttributeValues" open="(" separator="," close=")">
#{filteredAttributeValue}
</foreach>
</if>
AND attribute_name IN
<foreach item="attributeId" collection="attributeIds" open="(" separator="," close=")">
#{attributeId}
</foreach>
count > 0
</where>
GROUP BY attribute_name,
value
)
</sql>

<sql id="getTotalSampleCount">
SELECT count(distinct sample_unique_id) as count
FROM sample_derived
Expand Down Expand Up @@ -409,6 +413,15 @@
<result property="count" column="count"/>
</collection>
</resultMap>

<resultMap id="ClinicalDataCountItemResultMap" type="org.cbioportal.model.ClinicalDataCountItem">
<result property="attributeId" column="attributeId"/>
<collection property="counts" ofType="org.cbioportal.model.ClinicalDataCount">
<result property="attributeId" column="attributeId"/>
<result property="value" column="value"/>
<result property="count" column="count"/>
</collection>
</resultMap>

<sql id="getPatientIdsFromSampleIdFilters">
SELECT patient_unique_id
Expand Down
Loading

0 comments on commit 03e0680

Please sign in to comment.