Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rfc80/clinical data counts refactor #11084

Merged
merged 8 commits into from
Nov 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import org.cbioportal.model.CaseListDataCount;
import org.cbioportal.model.ClinicalData;
import org.cbioportal.model.ClinicalDataCount;
import org.cbioportal.model.ClinicalDataCountItem;
import org.cbioportal.model.ClinicalEventTypeCount;
import org.cbioportal.model.CopyNumberCountByGene;
import org.cbioportal.model.GenericAssayDataCountItem;
Expand Down Expand Up @@ -39,7 +40,7 @@ public interface StudyViewRepository {
List<AlterationCountByGene> getStructuralVariantGenes(StudyViewFilterContext studyViewFilterContext);
List<CopyNumberCountByGene> getCnaGenes(StudyViewFilterContext studyViewFilterContext);

List<ClinicalDataCount> getClinicalDataCounts(StudyViewFilterContext studyViewFilterContext, List<String> filteredAttributes);
List<ClinicalDataCountItem> getClinicalDataCounts(StudyViewFilterContext studyViewFilterContext, List<String> filteredAttributes);

List<GenomicDataCount> getMolecularProfileSampleCounts(StudyViewFilterContext studyViewFilterContext);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import org.cbioportal.model.ClinicalAttribute;
import org.cbioportal.model.ClinicalData;
import org.cbioportal.model.ClinicalDataCount;
import org.cbioportal.model.ClinicalDataCountItem;
import org.cbioportal.model.ClinicalEventTypeCount;
import org.cbioportal.model.CopyNumberCountByGene;
import org.cbioportal.model.GenePanelToGene;
Expand Down Expand Up @@ -40,7 +41,7 @@ public interface StudyViewMapper {

List<AlterationCountByGene> getStructuralVariantGenes(StudyViewFilterHelper studyViewFilterHelper, AlterationFilterHelper alterationFilterHelper);

List<ClinicalDataCount> getClinicalDataCounts(StudyViewFilterHelper studyViewFilterHelper, List<String> attributeIds, List<String> filteredAttributeValues);
List<ClinicalDataCountItem> getClinicalDataCounts(StudyViewFilterHelper studyViewFilterHelper, List<String> attributeIds, List<String> filteredAttributeValues);

List<CaseListDataCount> getCaseListDataCountsPerStudy(@Param("studyViewFilterHelper") StudyViewFilterHelper studyViewFilterHelper);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import org.cbioportal.model.CaseListDataCount;
import org.cbioportal.model.ClinicalData;
import org.cbioportal.model.ClinicalDataCount;
import org.cbioportal.model.ClinicalDataCountItem;
import org.cbioportal.model.ClinicalEventTypeCount;
import org.cbioportal.model.GenePanelToGene;
import org.cbioportal.model.GenericAssayDataCountItem;
Expand Down Expand Up @@ -79,7 +80,7 @@ public List<AlterationCountByGene> getStructuralVariantGenes(StudyViewFilterCont
}

@Override
public List<ClinicalDataCount> getClinicalDataCounts(StudyViewFilterContext studyViewFilterContext, List<String> filteredAttributes) {
public List<ClinicalDataCountItem> getClinicalDataCounts(StudyViewFilterContext studyViewFilterContext, List<String> filteredAttributes) {
return mapper.getClinicalDataCounts(createStudyViewFilterHelper(studyViewFilterContext),
filteredAttributes, FILTERED_CLINICAL_ATTR_VALUES);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,17 +153,7 @@ public Map<String, ClinicalDataType> getClinicalAttributeDatatypeMap() {
)
@Override
public List<ClinicalDataCountItem> getClinicalDataCounts(StudyViewFilter studyViewFilter, List<String> filteredAttributes) {
StudyViewFilterContext studyViewFilterContext = createContext(studyViewFilter);
List<ClinicalDataCount> dataCounts = studyViewRepository.getClinicalDataCounts(studyViewFilterContext, filteredAttributes);
List<ClinicalDataCountItem> clinicalDataCountItems = generateDataCountItemsFromDataCounts(dataCounts);

return calculateMissingNaCountsForClinicalDataCountItems(
clinicalDataCountItems,
filteredAttributes.stream().distinct().toList(),
this.getClinicalAttributeDatatypeMap(),
studyViewRepository.getFilteredSamplesCount(studyViewFilterContext),
studyViewRepository.getFilteredPatientCount(studyViewFilterContext)
);
return studyViewRepository.getClinicalDataCounts(createContext(studyViewFilter), filteredAttributes);
}

@Cacheable(
Expand Down Expand Up @@ -311,83 +301,6 @@ private List<ClinicalDataCount> normalizeDataCounts(List<ClinicalDataCount> data
return new ArrayList<>(normalizedDataCounts);
}

public static List<ClinicalDataCountItem> calculateMissingNaCountsForClinicalDataCountItems(
List<ClinicalDataCountItem> clinicalDataCountItems,
List<String> filteredAttributes,
Map<String, ClinicalDataType> clinicalAttributeDatatypeMap,
int filteredSamplesCount,
int filteredPatientsCount
) {
// Postprocess clinical data count items to adjust NA counts
List<ClinicalDataCountItem> combinedClinicalDataCountItems = new ArrayList<>();

Map<String, ClinicalDataCountItem> clinicalDataCountItemMap = clinicalDataCountItems
.stream()
.collect(Collectors.toMap(
ClinicalDataCountItem::getAttributeId,
item -> item
));

// go over all filtered attributes, not just attributes found in clinicalDataCountItems
for (String attributeId: filteredAttributes) {
ClinicalDataCountItem clinicalDataCountItem = clinicalDataCountItemMap.get(attributeId);
boolean isItemMissing = false;

if (clinicalDataCountItem == null) {
isItemMissing = true;
clinicalDataCountItem = new ClinicalDataCountItem();
clinicalDataCountItem.setAttributeId(attributeId);
clinicalDataCountItem.setCounts(new ArrayList<>());
}

Integer totalClinicalDataCount = clinicalDataCountItem
.getCounts()
.stream()
.map(ClinicalDataCount::getCount)
.reduce(0, Integer::sum);
// depending on clinical data type we either use filtered sample count or filtered patient count
int filteredCount = clinicalAttributeDatatypeMap.get(clinicalDataCountItem.getAttributeId()) == ClinicalDataType.SAMPLE ?
filteredSamplesCount: filteredPatientsCount;
int casesWithoutClinicalData = filteredCount - totalClinicalDataCount;

if (casesWithoutClinicalData > 0) {
// some of these attributes may be completely missing in clinicalDataCountItem
// in case the only attribute value is NA.
// we need to manually add those missing items to make sure we have NA counts.
if (isItemMissing) {
combinedClinicalDataCountItems.add(clinicalDataCountItem);
}

// find "NA" or else create a new one
Optional<ClinicalDataCount> naClinicalDataCountOptional = clinicalDataCountItem
.getCounts()
.stream()
.filter(c -> c.getValue().equals("NA"))
.findFirst();

ClinicalDataCount naClinicalDataCount = naClinicalDataCountOptional
.orElseGet(() -> {
// this should only happen when there are multiple studies
ClinicalDataCount count = new ClinicalDataCount();
count.setAttributeId(attributeId);
count.setValue("NA");
count.setCount(0);
return count;
});

// if not present we need to add naClinicalDataCount to the existing counts
if (naClinicalDataCountOptional.isEmpty()) {
clinicalDataCountItem.getCounts().add(naClinicalDataCount);
}

naClinicalDataCount.setCount(naClinicalDataCount.getCount() + casesWithoutClinicalData);
}
}

combinedClinicalDataCountItems.addAll(clinicalDataCountItems);
return combinedClinicalDataCountItems;
}

public static List<CaseListDataCount> mergeCaseListCounts(List<CaseListDataCount> counts) {
Map<String, List<CaseListDataCount>> countsPerListType = counts.stream()
.collect((Collectors.groupingBy(CaseListDataCount::getValue)));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,12 +208,10 @@ public ResponseEntity<List<ClinicalDataCountItem>> fetchClinicalDataCounts(
StudyViewFilter studyViewFilter = interceptedClinicalDataCountFilter.getStudyViewFilter();

if (attributes.size() == 1) {
NewStudyViewFilterUtil.removeSelfFromFilter(attributes.get(0).getAttributeId(), studyViewFilter);
NewStudyViewFilterUtil.removeClinicalDataFilter(attributes.getFirst().getAttributeId(), studyViewFilter.getClinicalDataFilters());
}
// boolean singleStudyUnfiltered = studyViewFilterUtil.isSingleStudyUnfiltered(studyViewFilter);
List<ClinicalDataCountItem> result = studyViewColumnarService.getClinicalDataCounts(studyViewFilter,
attributes.stream().map(ClinicalDataFilter::getAttributeId).collect(Collectors.toList()));
//studyIds, sampleIds, attributes.stream().map(a -> a.getAttributeId()).collect(Collectors.toList()));
return new ResponseEntity<>(result, HttpStatus.OK);

}
Expand Down Expand Up @@ -576,7 +574,7 @@ public ResponseEntity<List<ClinicalDataCountItem>> fetchCustomDataCounts(
List<ClinicalDataFilter> attributes = interceptedClinicalDataCountFilter.getAttributes();
StudyViewFilter studyViewFilter = interceptedClinicalDataCountFilter.getStudyViewFilter();
if (attributes.size() == 1) {
NewStudyViewFilterUtil.removeSelfCustomDataFromFilter(attributes.get(0).getAttributeId(), studyViewFilter);
NewStudyViewFilterUtil.removeClinicalDataFilter(attributes.getFirst().getAttributeId(), studyViewFilter.getCustomDataFilters());
}

List <SampleIdentifier> filteredSampleIdentifiers = studyViewColumnarService.getFilteredSamples(studyViewFilter).stream().map(sample -> studyViewFilterUtil.buildSampleIdentifier(sample.getCancerStudyIdentifier(), sample.getStableId())).toList();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ public static StudyViewFilter removeSelfFromFilter(ClinicalDataBinCountFilter da
StudyViewFilter studyViewFilter = dataBinCountFilter.getStudyViewFilter();

if (attributes.size() == 1) {
NewStudyViewFilterUtil.removeSelfFromFilter(attributes.get(0).getAttributeId(), studyViewFilter);
NewStudyViewFilterUtil.removeClinicalDataFilter(attributes.getFirst().getAttributeId(), studyViewFilter.getClinicalDataFilters());
}

return studyViewFilter;
Expand All @@ -36,7 +36,7 @@ public static StudyViewFilter removeSelfCustomDataFromFilter(ClinicalDataBinCoun
StudyViewFilter studyViewFilter = dataBinCountFilter.getStudyViewFilter();

if (attributes.size() == 1) {
NewStudyViewFilterUtil.removeSelfCustomDataFromFilter(attributes.get(0).getAttributeId(), studyViewFilter);
NewStudyViewFilterUtil.removeClinicalDataFilter(attributes.getFirst().getAttributeId(), studyViewFilter.getCustomDataFilters());
}

return studyViewFilter;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,19 +1,15 @@
package org.cbioportal.web.columnar.util;


import org.cbioportal.web.parameter.StudyViewFilter;
import org.cbioportal.web.parameter.ClinicalDataFilter;

public class NewStudyViewFilterUtil {
import java.util.List;

public static void removeSelfFromFilter(String attributeId, StudyViewFilter studyViewFilter) {
if (studyViewFilter!= null && studyViewFilter.getClinicalDataFilters() != null) {
studyViewFilter.getClinicalDataFilters().removeIf(f -> f.getAttributeId().equals(attributeId));
}
}
public class NewStudyViewFilterUtil {

public static void removeSelfCustomDataFromFilter(String attributeId, StudyViewFilter studyViewFilter) {
if (studyViewFilter != null && studyViewFilter.getCustomDataFilters() != null) {
studyViewFilter.getCustomDataFilters().removeIf(f -> f.getAttributeId().equals(attributeId));
public static void removeClinicalDataFilter(String attributeId, List<ClinicalDataFilter> dataFilterList ) {
if (dataFilterList != null) {
dataFilterList.removeIf(f -> f.getAttributeId().equals(attributeId));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -136,10 +136,14 @@
</select>

<!-- for /clinical-data-counts/fetch (returns ClinicalData) which will then be converted to clinicalDataCountItems -->
<select id="getClinicalDataCounts" resultType="org.cbioportal.model.ClinicalDataCount">
<include refid="getClinicalDataCountsQuerySample" />
<select id="getClinicalDataCounts" resultMap="ClinicalDataCountItemResultMap">
<include refid="getClinicalDataCountsQuery">
<property name="type" value="sample"/>
</include>
UNION ALL
<include refid="getClinicalDataCountsQueryPatient" />
<include refid="getClinicalDataCountsQuery">
<property name="type" value="patient"/>
</include>
</select>

<!-- for /molecular-profile-sample-counts/fetch (returns GenomicDataCount) which will then be converted to clinicalDataCountItems -->
Expand Down Expand Up @@ -179,63 +183,63 @@
GROUP BY s.cancer_study_identifier, sl.stable_id, sl.name
</select>


<sql id="getClinicalDataCountsQuerySample">
<sql id="getClinicalDataCountsQuery">
(
WITH clinical_data_query AS (
SELECT
attribute_name as attributeId,
<include refid="normalizeAttributeValue">
<property name="attribute_value" value="attribute_value"/>
</include> as value,
haynescd marked this conversation as resolved.
Show resolved Hide resolved
count(value) as count
attribute_name AS attributeId,
upper(attribute_value) AS value,
cast(count(*) AS INTEGER) as count
FROM clinical_data_derived
<where>
type='sample' AND
<include refid="applyStudyViewFilter">
<property name="filter_type" value="'SAMPLE_ID_ONLY'"/>
type='${type}'
AND <!-- Table creation in clickhouse.sql has ensured no NA values but extra caution is always appreciated -->
<include refid="normalizeAttributeValue">
<property name="attribute_value" value="value"/>
</include>
<if test="filteredAttributeValues != null and !filteredAttributeValues.isEmpty()">
AND UPPER(value) NOT IN
<foreach item="filteredAttributeValue" collection="filteredAttributeValues" open="(" separator="," close=")">
#{filteredAttributeValue}
</foreach>
</if>
!= 'NA'
AND
<choose>
<when test="'${type}' == 'sample'">
<include refid="applyStudyViewFilter">
<property name="filter_type" value="'SAMPLE_ID_ONLY'"/>
</include>
</when>
<otherwise>
<include refid="applyStudyViewFilter">
<property name="filter_type" value="'PATIENT_ID_ONLY'"/>
</include>
</otherwise>
</choose>
AND attribute_name IN
<foreach item="attributeId" collection="attributeIds" open="(" separator="," close=")">
#{attributeId}
</foreach>
</where>
GROUP BY attribute_name,
value
</sql>

<sql id="getClinicalDataCountsQueryPatient">
SELECT
attribute_name as attributeId,
<include refid="normalizeAttributeValue">
<property name="attribute_value" value="attribute_value"/>
</include> as value,
count(value) as count
FROM clinical_data_derived
GROUP BY attribute_name, value ),
clinical_data_sum AS (SELECT attributeId, sum(count) AS sum FROM clinical_data_query GROUP BY attributeId)

SELECT * FROM clinical_data_query
UNION ALL
SELECT attributeId,
'NA' AS value,
((
<choose>
<when test="'${type}' == 'sample'">
<include refid="getTotalSampleCount"/>
</when>
<otherwise>
<include refid="getTotalPatientCount"/>
</otherwise>
</choose>
) - clinical_data_sum.sum) AS count
FROM clinical_data_sum
<where>
type='patient' AND
<include refid="applyStudyViewFilter">
<property name="filter_type" value="'PATIENT_ID_ONLY'"/>
</include>
<if test="filteredAttributeValues != null and !filteredAttributeValues.isEmpty()">
AND UPPER(value) NOT IN
<foreach item="filteredAttributeValue" collection="filteredAttributeValues" open="(" separator="," close=")">
#{filteredAttributeValue}
</foreach>
</if>
AND attribute_name IN
<foreach item="attributeId" collection="attributeIds" open="(" separator="," close=")">
#{attributeId}
</foreach>
count > 0
</where>
GROUP BY attribute_name,
value
)
</sql>

<sql id="getTotalSampleCount">
SELECT count(distinct sample_unique_id) as count
FROM sample_derived
Expand Down Expand Up @@ -409,6 +413,15 @@
<result property="count" column="count"/>
</collection>
</resultMap>

<resultMap id="ClinicalDataCountItemResultMap" type="org.cbioportal.model.ClinicalDataCountItem">
<result property="attributeId" column="attributeId"/>
<collection property="counts" ofType="org.cbioportal.model.ClinicalDataCount">
<result property="attributeId" column="attributeId"/>
<result property="value" column="value"/>
<result property="count" column="count"/>
</collection>
</resultMap>

<sql id="getPatientIdsFromSampleIdFilters">
SELECT patient_unique_id
Expand Down
Loading
Loading