From 6e4d297b91ee73145bc582780eeb9c3bd83f1459 Mon Sep 17 00:00:00 2001 From: alisman Date: Thu, 14 Nov 2024 13:24:58 -0500 Subject: [PATCH] Add back NA counts when corresponding samples are filtered out of study view (#11185) * Fix counting of clinical data na when filtered out * Add caching and cleanup * Fix Sonar issues --- .../persistence/StudyViewRepository.java | 6 +- .../mybatisclickhouse/StudyViewMapper.java | 2 + .../StudyViewMyBatisRepository.java | 5 + .../impl/StudyViewColumnarServiceImpl.java | 43 +++++- .../util/StudyViewColumnarServiceUtil.java | 63 +++++++++ .../StudyViewColumnStoreController.java | 3 +- .../mybatisclickhouse/StudyViewMapper.xml | 18 +++ .../StudyViewColumnarServiceUtilTest.java | 131 ++++++++++++++++++ 8 files changed, 266 insertions(+), 5 deletions(-) create mode 100644 src/main/java/org/cbioportal/service/util/StudyViewColumnarServiceUtil.java create mode 100644 src/test/java/org/cbioportal/service/util/StudyViewColumnarServiceUtilTest.java diff --git a/src/main/java/org/cbioportal/persistence/StudyViewRepository.java b/src/main/java/org/cbioportal/persistence/StudyViewRepository.java index 5c3ee61a9c3..32ae1444d15 100644 --- a/src/main/java/org/cbioportal/persistence/StudyViewRepository.java +++ b/src/main/java/org/cbioportal/persistence/StudyViewRepository.java @@ -44,13 +44,15 @@ public interface StudyViewRepository { List getMolecularProfileSampleCounts(StudyViewFilterContext studyViewFilterContext); - List getClinicalAttributes(); - Map getClinicalAttributeDatatypeMap(); List getCaseListDataCountsPerStudy(StudyViewFilterContext studyViewFilterContext); Map getTotalProfiledCounts(StudyViewFilterContext studyViewFilterContext, String alterationType); + + List getClinicalAttributes(); + + List getClinicalAttributesForStudies(List studyIds); int getFilteredSamplesCount(StudyViewFilterContext studyViewFilterContext); diff --git a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java index 97419bcf06b..fa3a92fd898 100644 --- a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java +++ b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.java @@ -47,6 +47,8 @@ public interface StudyViewMapper { List getClinicalAttributes(); + List getClinicalAttributesForStudies(List studyIds); + List getSampleClinicalDataFromStudyViewFilter(StudyViewFilterHelper studyViewFilterHelper, List attributeIds); List getPatientClinicalDataFromStudyViewFilter(StudyViewFilterHelper studyViewFilterHelper, List attributeIds); diff --git a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java index 74affd393db..46f75d53891 100644 --- a/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java +++ b/src/main/java/org/cbioportal/persistence/mybatisclickhouse/StudyViewMyBatisRepository.java @@ -144,6 +144,11 @@ public Map getClinicalAttributeDatatypeMap() { return attributeDatatypeMap; } + + @Override + public List getClinicalAttributesForStudies(List studyIds) { + return mapper.getClinicalAttributesForStudies(studyIds); + } @Override public List getCaseListDataCountsPerStudy(StudyViewFilterContext studyViewFilterContext) { diff --git a/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java b/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java index 19713980486..54cdf5e130a 100644 --- a/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java +++ b/src/main/java/org/cbioportal/service/impl/StudyViewColumnarServiceImpl.java @@ -2,6 +2,7 @@ import org.cbioportal.model.AlterationCountByGene; import org.cbioportal.model.CaseListDataCount; +import org.cbioportal.model.ClinicalAttribute; import org.cbioportal.model.ClinicalData; import org.cbioportal.model.ClinicalDataCount; import org.cbioportal.model.ClinicalDataCountItem; @@ -19,6 +20,7 @@ import org.cbioportal.service.StudyViewColumnarService; import org.cbioportal.service.exception.StudyNotFoundException; import org.cbioportal.service.treatment.TreatmentCountReportService; +import org.cbioportal.service.util.StudyViewColumnarServiceUtil; import org.cbioportal.web.parameter.ClinicalDataType; import org.cbioportal.web.parameter.CustomSampleIdentifier; import org.cbioportal.web.parameter.GenericAssayDataBinFilter; @@ -146,14 +148,51 @@ public List getStructuralVariantGenes(StudyViewFilter stu public Map getClinicalAttributeDatatypeMap(StudyViewFilter studyViewFilter) { return studyViewRepository.getClinicalAttributeDatatypeMap(); } - + @Cacheable( cacheResolver = "staticRepositoryCacheOneResolver", condition = "@cacheEnabledConfig.getEnabledClickhouse() && @studyViewFilterUtil.isUnfiltered(#studyViewFilter)" ) @Override public List getClinicalDataCounts(StudyViewFilter studyViewFilter, List filteredAttributes) { - return studyViewRepository.getClinicalDataCounts(createContext(studyViewFilter), filteredAttributes); + + var context = createContext(studyViewFilter); + + var involvedCancerStudies = studyViewFilter.getStudyIds(); + + var result = studyViewRepository.getClinicalDataCounts(context, filteredAttributes); + + // attributes may be missing in result set because they have been filtered out + // e.g. if the filtered samples happen to have no SEX data, they will not appear in the list + // even though the inferred value of those attributes is NA + // the following code restores these counts for missing attributes + if (result.size() != filteredAttributes.size()) { + var attributes = getClinicalAttributesForStudies(involvedCancerStudies) + .stream() + .filter(attribute -> filteredAttributes.contains(attribute.getAttrId())) + .toList(); + + Integer filteredSampleCount = studyViewRepository.getFilteredSamplesCount(createContext(studyViewFilter)); + Integer filteredPatientCount = studyViewRepository.getFilteredPatientCount(createContext(studyViewFilter)); + + result = StudyViewColumnarServiceUtil.addClinicalDataCountsForMissingAttributes( + result, + attributes, + filteredSampleCount, + filteredPatientCount + ); + } + + return StudyViewColumnarServiceUtil.mergeClinicalDataCounts(result); + + } + + @Cacheable( + cacheResolver = "staticRepositoryCacheOneResolver", + condition = "@cacheEnabledConfig.getEnabledClickhouse()" + ) + public List getClinicalAttributesForStudies(List studyIds) { + return studyViewRepository.getClinicalAttributesForStudies(studyIds).stream().toList(); } @Cacheable( diff --git a/src/main/java/org/cbioportal/service/util/StudyViewColumnarServiceUtil.java b/src/main/java/org/cbioportal/service/util/StudyViewColumnarServiceUtil.java new file mode 100644 index 00000000000..4975cb9730b --- /dev/null +++ b/src/main/java/org/cbioportal/service/util/StudyViewColumnarServiceUtil.java @@ -0,0 +1,63 @@ +package org.cbioportal.service.util; + +import org.cbioportal.model.ClinicalAttribute; +import org.cbioportal.model.ClinicalDataCount; +import org.cbioportal.model.ClinicalDataCountItem; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +public class StudyViewColumnarServiceUtil { + + private StudyViewColumnarServiceUtil() {} + + public static List mergeClinicalDataCounts( + List items + ) { + items.forEach(attr -> { + Map> countsPerType = attr.getCounts().stream() + .collect(Collectors.groupingBy(ClinicalDataCount::getValue)); + List res = countsPerType.entrySet().stream().map(entry -> { + ClinicalDataCount mergedCount = new ClinicalDataCount(); + mergedCount.setAttributeId(attr.getAttributeId()); + mergedCount.setValue(entry.getKey()); + mergedCount.setCount(entry.getValue().stream().mapToInt(ClinicalDataCount::getCount).sum()); + return mergedCount; + }).toList(); + attr.setCounts(res); + }); + return items; + } + + public static List addClinicalDataCountsForMissingAttributes( + List counts, + List attributes, + Integer filteredSampleCount, + Integer filteredPatientCount + ) { + Map map = counts.stream() + .collect(Collectors.toMap(ClinicalDataCountItem::getAttributeId, item -> item)); + + List result = new ArrayList<>(counts); + + attributes.forEach(attr -> { + Integer count = attr.getPatientAttribute().booleanValue() ? filteredPatientCount : filteredSampleCount; + + if (!map.containsKey(attr.getAttrId())) { + ClinicalDataCountItem newItem = new ClinicalDataCountItem(); + newItem.setAttributeId(attr.getAttrId()); + ClinicalDataCount countObj = new ClinicalDataCount(); + countObj.setCount(count); + countObj.setValue("NA"); + countObj.setAttributeId(attr.getAttrId()); + newItem.setCounts(List.of(countObj)); + result.add(newItem); + } + }); + + return result; + } + + +} \ No newline at end of file diff --git a/src/main/java/org/cbioportal/web/columnar/StudyViewColumnStoreController.java b/src/main/java/org/cbioportal/web/columnar/StudyViewColumnStoreController.java index bd360e50f0a..0cef9299e37 100644 --- a/src/main/java/org/cbioportal/web/columnar/StudyViewColumnStoreController.java +++ b/src/main/java/org/cbioportal/web/columnar/StudyViewColumnStoreController.java @@ -210,7 +210,8 @@ public ResponseEntity> fetchClinicalDataCounts( if (attributes.size() == 1) { NewStudyViewFilterUtil.removeClinicalDataFilter(attributes.getFirst().getAttributeId(), studyViewFilter.getClinicalDataFilters()); } - List result = studyViewColumnarService.getClinicalDataCounts(studyViewFilter, + List result = studyViewColumnarService.getClinicalDataCounts( + studyViewFilter, attributes.stream().map(ClinicalDataFilter::getAttributeId).collect(Collectors.toList())); return new ResponseEntity<>(result, HttpStatus.OK); diff --git a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml index cd95eb0a2fb..cfab9988d2f 100644 --- a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml +++ b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml @@ -443,6 +443,24 @@ JOIN cancer_study cs on cs.cancer_study_id = cam.cancer_study_id + + +