Skip to content

Commit

Permalink
Add back NA counts when corresponding samples are filtered out of stu…
Browse files Browse the repository at this point in the history
…dy view (#11185)

* Fix counting of clinical data na when filtered out
* Add caching and cleanup
* Fix Sonar issues
  • Loading branch information
alisman authored Nov 14, 2024
1 parent 49d6952 commit 6e4d297
Show file tree
Hide file tree
Showing 8 changed files with 266 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,15 @@ public interface StudyViewRepository {

List<GenomicDataCount> getMolecularProfileSampleCounts(StudyViewFilterContext studyViewFilterContext);

List<ClinicalAttribute> getClinicalAttributes();

Map<String, ClinicalDataType> getClinicalAttributeDatatypeMap();

List<CaseListDataCount> getCaseListDataCountsPerStudy(StudyViewFilterContext studyViewFilterContext);

Map<String, Integer> getTotalProfiledCounts(StudyViewFilterContext studyViewFilterContext, String alterationType);

List<ClinicalAttribute> getClinicalAttributes();

List<ClinicalAttribute> getClinicalAttributesForStudies(List<String> studyIds);

int getFilteredSamplesCount(StudyViewFilterContext studyViewFilterContext);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ public interface StudyViewMapper {

List<ClinicalAttribute> getClinicalAttributes();

List<ClinicalAttribute> getClinicalAttributesForStudies(List<String> studyIds);

List<ClinicalData> getSampleClinicalDataFromStudyViewFilter(StudyViewFilterHelper studyViewFilterHelper, List<String> attributeIds);

List<ClinicalData> getPatientClinicalDataFromStudyViewFilter(StudyViewFilterHelper studyViewFilterHelper, List<String> attributeIds);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,11 @@ public Map<String, ClinicalDataType> getClinicalAttributeDatatypeMap() {

return attributeDatatypeMap;
}

@Override
public List<ClinicalAttribute> getClinicalAttributesForStudies(List<String> studyIds) {
return mapper.getClinicalAttributesForStudies(studyIds);
}

@Override
public List<CaseListDataCount> getCaseListDataCountsPerStudy(StudyViewFilterContext studyViewFilterContext) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import org.cbioportal.model.AlterationCountByGene;
import org.cbioportal.model.CaseListDataCount;
import org.cbioportal.model.ClinicalAttribute;
import org.cbioportal.model.ClinicalData;
import org.cbioportal.model.ClinicalDataCount;
import org.cbioportal.model.ClinicalDataCountItem;
Expand All @@ -19,6 +20,7 @@
import org.cbioportal.service.StudyViewColumnarService;
import org.cbioportal.service.exception.StudyNotFoundException;
import org.cbioportal.service.treatment.TreatmentCountReportService;
import org.cbioportal.service.util.StudyViewColumnarServiceUtil;
import org.cbioportal.web.parameter.ClinicalDataType;
import org.cbioportal.web.parameter.CustomSampleIdentifier;
import org.cbioportal.web.parameter.GenericAssayDataBinFilter;
Expand Down Expand Up @@ -146,14 +148,51 @@ public List<AlterationCountByGene> getStructuralVariantGenes(StudyViewFilter stu
public Map<String, ClinicalDataType> getClinicalAttributeDatatypeMap(StudyViewFilter studyViewFilter) {
return studyViewRepository.getClinicalAttributeDatatypeMap();
}

@Cacheable(
cacheResolver = "staticRepositoryCacheOneResolver",
condition = "@cacheEnabledConfig.getEnabledClickhouse() && @studyViewFilterUtil.isUnfiltered(#studyViewFilter)"
)
@Override
public List<ClinicalDataCountItem> getClinicalDataCounts(StudyViewFilter studyViewFilter, List<String> filteredAttributes) {
return studyViewRepository.getClinicalDataCounts(createContext(studyViewFilter), filteredAttributes);

var context = createContext(studyViewFilter);

var involvedCancerStudies = studyViewFilter.getStudyIds();

var result = studyViewRepository.getClinicalDataCounts(context, filteredAttributes);

// attributes may be missing in result set because they have been filtered out
// e.g. if the filtered samples happen to have no SEX data, they will not appear in the list
// even though the inferred value of those attributes is NA
// the following code restores these counts for missing attributes
if (result.size() != filteredAttributes.size()) {
var attributes = getClinicalAttributesForStudies(involvedCancerStudies)
.stream()
.filter(attribute -> filteredAttributes.contains(attribute.getAttrId()))
.toList();

Integer filteredSampleCount = studyViewRepository.getFilteredSamplesCount(createContext(studyViewFilter));
Integer filteredPatientCount = studyViewRepository.getFilteredPatientCount(createContext(studyViewFilter));

result = StudyViewColumnarServiceUtil.addClinicalDataCountsForMissingAttributes(
result,
attributes,
filteredSampleCount,
filteredPatientCount
);
}

return StudyViewColumnarServiceUtil.mergeClinicalDataCounts(result);

}

@Cacheable(
cacheResolver = "staticRepositoryCacheOneResolver",
condition = "@cacheEnabledConfig.getEnabledClickhouse()"
)
public List<ClinicalAttribute> getClinicalAttributesForStudies(List<String> studyIds) {
return studyViewRepository.getClinicalAttributesForStudies(studyIds).stream().toList();
}

@Cacheable(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
package org.cbioportal.service.util;

import org.cbioportal.model.ClinicalAttribute;
import org.cbioportal.model.ClinicalDataCount;
import org.cbioportal.model.ClinicalDataCountItem;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

public class StudyViewColumnarServiceUtil {

private StudyViewColumnarServiceUtil() {}

public static List<ClinicalDataCountItem> mergeClinicalDataCounts(
List<ClinicalDataCountItem> items
) {
items.forEach(attr -> {
Map<String, List<ClinicalDataCount>> countsPerType = attr.getCounts().stream()
.collect(Collectors.groupingBy(ClinicalDataCount::getValue));
List<ClinicalDataCount> res = countsPerType.entrySet().stream().map(entry -> {
ClinicalDataCount mergedCount = new ClinicalDataCount();
mergedCount.setAttributeId(attr.getAttributeId());
mergedCount.setValue(entry.getKey());
mergedCount.setCount(entry.getValue().stream().mapToInt(ClinicalDataCount::getCount).sum());
return mergedCount;
}).toList();
attr.setCounts(res);
});
return items;
}

public static List<ClinicalDataCountItem> addClinicalDataCountsForMissingAttributes(
List<ClinicalDataCountItem> counts,
List<ClinicalAttribute> attributes,
Integer filteredSampleCount,
Integer filteredPatientCount
) {
Map<String, ClinicalDataCountItem> map = counts.stream()
.collect(Collectors.toMap(ClinicalDataCountItem::getAttributeId, item -> item));

List<ClinicalDataCountItem> result = new ArrayList<>(counts);

attributes.forEach(attr -> {
Integer count = attr.getPatientAttribute().booleanValue() ? filteredPatientCount : filteredSampleCount;

if (!map.containsKey(attr.getAttrId())) {
ClinicalDataCountItem newItem = new ClinicalDataCountItem();
newItem.setAttributeId(attr.getAttrId());
ClinicalDataCount countObj = new ClinicalDataCount();
countObj.setCount(count);
countObj.setValue("NA");
countObj.setAttributeId(attr.getAttrId());
newItem.setCounts(List.of(countObj));
result.add(newItem);
}
});

return result;
}


}
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,8 @@ public ResponseEntity<List<ClinicalDataCountItem>> fetchClinicalDataCounts(
if (attributes.size() == 1) {
NewStudyViewFilterUtil.removeClinicalDataFilter(attributes.getFirst().getAttributeId(), studyViewFilter.getClinicalDataFilters());
}
List<ClinicalDataCountItem> result = studyViewColumnarService.getClinicalDataCounts(studyViewFilter,
List<ClinicalDataCountItem> result = studyViewColumnarService.getClinicalDataCounts(
studyViewFilter,
attributes.stream().map(ClinicalDataFilter::getAttributeId).collect(Collectors.toList()));
return new ResponseEntity<>(result, HttpStatus.OK);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,24 @@
JOIN cancer_study cs on cs.cancer_study_id = cam.cancer_study_id
</select>

<select id="getClinicalAttributesForStudies" resultType="org.cbioportal.model.ClinicalAttribute">
SELECT
attr_id as attrId,
datatype as dataType,
patient_attribute as patientAttribute,
cs.cancer_study_identifier cancerStudyIdentifier
FROM clinical_attribute_meta cammo
JOIN cancer_study cs on cs.cancer_study_id = cammo.cancer_study_id
<where>
cancerStudyIdentifier IN
<foreach item="studyId" collection="studyIds" open="(" separator="," close=")">
#{studyId}
</foreach>
</where>

</select>


<!-- Grab Total Profiled Counts. Currently, this query filters out all samples associated with a Gene Panel WES before
doing a join on gene_panel_to_gene_derived table. This is to prevent unnecessary stress on the db. For every sample
associated with a gene panel WES we multiply that row by the number of genes in the gene table. This could be greater
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
package org.cbioportal.service.util;

import org.cbioportal.model.ClinicalAttribute;
import org.cbioportal.model.ClinicalDataCount;
import org.cbioportal.model.ClinicalDataCountItem;
import org.junit.Assert;
import org.junit.Test;

import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Optional;

import static org.junit.Assert.*;

public class StudyViewColumnarServiceUtilTest {

@Test
public void testMergeClinicalDataCounts() {

// first two counts are for same value (value1) and so should be
// merged

ClinicalDataCount count1 = new ClinicalDataCount();
count1.setAttributeId("attr1");
count1.setValue("value1");
count1.setCount(1);

ClinicalDataCount count2 = new ClinicalDataCount();
count2.setAttributeId("attr1");
count2.setValue("value1");
count2.setCount(2);

ClinicalDataCount count3 = new ClinicalDataCount();
count3.setAttributeId("attr1");
count3.setValue("value3");
count3.setCount(6);

ClinicalDataCount count4 = new ClinicalDataCount();
count4.setAttributeId("attr1");
count4.setValue("value3");
count4.setCount(4);

ClinicalDataCount count5 = new ClinicalDataCount();
count5.setAttributeId("attr1");
count5.setValue("value2");
count5.setCount(4);

ClinicalDataCountItem item = new ClinicalDataCountItem();
item.setAttributeId("attr1");
item.setCounts(Arrays.asList(count1, count2, count3, count4, count5));

List<ClinicalDataCountItem> items = Collections.singletonList(item);

// Call the method under test
List<ClinicalDataCountItem> mergedItems = StudyViewColumnarServiceUtil.mergeClinicalDataCounts(items);

// it merged three count items to 2
Optional<ClinicalDataCount> mergedCount=mergedItems.get(0).getCounts().stream()
.filter(count->count.getValue().equals("value1")).findFirst();
Assert.assertEquals(3, mergedCount.get().getCount().intValue());

Optional<ClinicalDataCount> mergedCount2=mergedItems.get(0).getCounts().stream()
.filter(count->count.getValue().equals("value2")).findFirst();
Assert.assertEquals(4, mergedCount2.get().getCount().intValue());

Optional<ClinicalDataCount> mergedCount3=mergedItems.get(0).getCounts().stream()
.filter(count->count.getValue().equals("value3")).findFirst();
Assert.assertEquals(10, mergedCount3.get().getCount().intValue());

}


@Test
public void testAddClinicalDataCountsForMissingAttributes() {
ClinicalDataCountItem existingItem = new ClinicalDataCountItem();
existingItem.setAttributeId("attr1");
ClinicalDataCount existingCount = new ClinicalDataCount();
existingCount.setCount(5);
existingCount.setValue("value1");
existingCount.setAttributeId("attr1");
existingItem.setCounts(Collections.singletonList(existingCount));

List<ClinicalDataCountItem> counts = Collections.singletonList(existingItem);

// we're gonna create two attributes which will not be represented in the passed result set
// test whether addClinicalDataCountsForMissingAttributes restores them

ClinicalAttribute missingAttributeSample = new ClinicalAttribute();
missingAttributeSample.setAttrId("attr2");
missingAttributeSample.setPatientAttribute(false);

ClinicalAttribute missingAttributePatient = new ClinicalAttribute();
missingAttributePatient.setAttrId("attr3");
missingAttributePatient.setPatientAttribute(true);

List<ClinicalAttribute> attributes = Arrays.asList(missingAttributeSample, missingAttributePatient);

List<ClinicalDataCountItem> result = StudyViewColumnarServiceUtil.addClinicalDataCountsForMissingAttributes(
counts, attributes, 10, 20
);

assertEquals(3, result.size());

Optional<ClinicalDataCountItem> addedItemSample = result.stream()
.filter(item -> item.getAttributeId().equals("attr2"))
.findFirst();

assertTrue(addedItemSample.isPresent());
assertEquals(1, addedItemSample.get().getCounts().size());
assertEquals("NA", addedItemSample.get().getCounts().get(0).getValue());
assertEquals(10, addedItemSample.get().getCounts().get(0).getCount().intValue());

Optional<ClinicalDataCountItem> addedItemPatient = result.stream()
.filter(item -> item.getAttributeId().equals("attr3"))
.findFirst();

assertTrue(addedItemPatient.isPresent());
assertEquals(1, addedItemPatient.get().getCounts().size());
assertEquals("NA", addedItemPatient.get().getCounts().get(0).getValue());
assertEquals(20, addedItemPatient.get().getCounts().get(0).getCount().intValue());


}






}

0 comments on commit 6e4d297

Please sign in to comment.