Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add back NA counts when corresponding samples are filtered out of study view #11185

Merged
merged 4 commits into from
Nov 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,15 @@ public interface StudyViewRepository {

List<GenomicDataCount> getMolecularProfileSampleCounts(StudyViewFilterContext studyViewFilterContext);

List<ClinicalAttribute> getClinicalAttributes();

Map<String, ClinicalDataType> getClinicalAttributeDatatypeMap();

List<CaseListDataCount> getCaseListDataCountsPerStudy(StudyViewFilterContext studyViewFilterContext);

Map<String, Integer> getTotalProfiledCounts(StudyViewFilterContext studyViewFilterContext, String alterationType);

List<ClinicalAttribute> getClinicalAttributes();

List<ClinicalAttribute> getClinicalAttributesForStudies(List<String> studyIds);

int getFilteredSamplesCount(StudyViewFilterContext studyViewFilterContext);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ public interface StudyViewMapper {

List<ClinicalAttribute> getClinicalAttributes();

List<ClinicalAttribute> getClinicalAttributesForStudies(List<String> studyIds);

List<ClinicalData> getSampleClinicalDataFromStudyViewFilter(StudyViewFilterHelper studyViewFilterHelper, List<String> attributeIds);

List<ClinicalData> getPatientClinicalDataFromStudyViewFilter(StudyViewFilterHelper studyViewFilterHelper, List<String> attributeIds);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,11 @@ public Map<String, ClinicalDataType> getClinicalAttributeDatatypeMap() {

return attributeDatatypeMap;
}

@Override
public List<ClinicalAttribute> getClinicalAttributesForStudies(List<String> studyIds) {
return mapper.getClinicalAttributesForStudies(studyIds);
}

@Override
public List<CaseListDataCount> getCaseListDataCountsPerStudy(StudyViewFilterContext studyViewFilterContext) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import org.cbioportal.model.AlterationCountByGene;
import org.cbioportal.model.CaseListDataCount;
import org.cbioportal.model.ClinicalAttribute;
import org.cbioportal.model.ClinicalData;
import org.cbioportal.model.ClinicalDataCount;
import org.cbioportal.model.ClinicalDataCountItem;
Expand All @@ -19,6 +20,7 @@
import org.cbioportal.service.StudyViewColumnarService;
import org.cbioportal.service.exception.StudyNotFoundException;
import org.cbioportal.service.treatment.TreatmentCountReportService;
import org.cbioportal.service.util.StudyViewColumnarServiceUtil;
import org.cbioportal.web.parameter.ClinicalDataType;
import org.cbioportal.web.parameter.CustomSampleIdentifier;
import org.cbioportal.web.parameter.GenericAssayDataBinFilter;
Expand Down Expand Up @@ -146,14 +148,51 @@ public List<AlterationCountByGene> getStructuralVariantGenes(StudyViewFilter stu
public Map<String, ClinicalDataType> getClinicalAttributeDatatypeMap(StudyViewFilter studyViewFilter) {
return studyViewRepository.getClinicalAttributeDatatypeMap();
}

@Cacheable(
cacheResolver = "staticRepositoryCacheOneResolver",
condition = "@cacheEnabledConfig.getEnabledClickhouse() && @studyViewFilterUtil.isUnfiltered(#studyViewFilter)"
)
@Override
public List<ClinicalDataCountItem> getClinicalDataCounts(StudyViewFilter studyViewFilter, List<String> filteredAttributes) {
return studyViewRepository.getClinicalDataCounts(createContext(studyViewFilter), filteredAttributes);

var context = createContext(studyViewFilter);

var involvedCancerStudies = studyViewFilter.getStudyIds();

var result = studyViewRepository.getClinicalDataCounts(context, filteredAttributes);

// attributes may be missing in result set because they have been filtered out
// e.g. if the filtered samples happen to have no SEX data, they will not appear in the list
// even though the inferred value of those attributes is NA
// the following code restores these counts for missing attributes
if (result.size() != filteredAttributes.size()) {
var attributes = getClinicalAttributesForStudies(involvedCancerStudies)
.stream()
.filter(attribute -> filteredAttributes.contains(attribute.getAttrId()))
.toList();

Integer filteredSampleCount = studyViewRepository.getFilteredSamplesCount(createContext(studyViewFilter));
Integer filteredPatientCount = studyViewRepository.getFilteredPatientCount(createContext(studyViewFilter));

result = StudyViewColumnarServiceUtil.addClinicalDataCountsForMissingAttributes(
result,
attributes,
filteredSampleCount,
filteredPatientCount
);
}

return StudyViewColumnarServiceUtil.mergeClinicalDataCounts(result);

}

@Cacheable(
cacheResolver = "staticRepositoryCacheOneResolver",
condition = "@cacheEnabledConfig.getEnabledClickhouse()"
)
public List<ClinicalAttribute> getClinicalAttributesForStudies(List<String> studyIds) {
return studyViewRepository.getClinicalAttributesForStudies(studyIds).stream().toList();
}

@Cacheable(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
package org.cbioportal.service.util;

import org.cbioportal.model.ClinicalAttribute;
import org.cbioportal.model.ClinicalDataCount;
import org.cbioportal.model.ClinicalDataCountItem;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

public class StudyViewColumnarServiceUtil {

private StudyViewColumnarServiceUtil() {}

public static List<ClinicalDataCountItem> mergeClinicalDataCounts(
List<ClinicalDataCountItem> items
) {
items.forEach(attr -> {
Map<String, List<ClinicalDataCount>> countsPerType = attr.getCounts().stream()
.collect(Collectors.groupingBy(ClinicalDataCount::getValue));
List<ClinicalDataCount> res = countsPerType.entrySet().stream().map(entry -> {
ClinicalDataCount mergedCount = new ClinicalDataCount();
mergedCount.setAttributeId(attr.getAttributeId());
mergedCount.setValue(entry.getKey());
mergedCount.setCount(entry.getValue().stream().mapToInt(ClinicalDataCount::getCount).sum());
return mergedCount;
}).toList();
attr.setCounts(res);
});
return items;
}

public static List<ClinicalDataCountItem> addClinicalDataCountsForMissingAttributes(
List<ClinicalDataCountItem> counts,
List<ClinicalAttribute> attributes,
Integer filteredSampleCount,
Integer filteredPatientCount
) {
Map<String, ClinicalDataCountItem> map = counts.stream()
.collect(Collectors.toMap(ClinicalDataCountItem::getAttributeId, item -> item));

List<ClinicalDataCountItem> result = new ArrayList<>(counts);

attributes.forEach(attr -> {
Integer count = attr.getPatientAttribute().booleanValue() ? filteredPatientCount : filteredSampleCount;

if (!map.containsKey(attr.getAttrId())) {
ClinicalDataCountItem newItem = new ClinicalDataCountItem();
newItem.setAttributeId(attr.getAttrId());
ClinicalDataCount countObj = new ClinicalDataCount();
countObj.setCount(count);
countObj.setValue("NA");
countObj.setAttributeId(attr.getAttrId());
newItem.setCounts(List.of(countObj));
result.add(newItem);
}
});

return result;
}


}
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,8 @@ public ResponseEntity<List<ClinicalDataCountItem>> fetchClinicalDataCounts(
if (attributes.size() == 1) {
NewStudyViewFilterUtil.removeClinicalDataFilter(attributes.getFirst().getAttributeId(), studyViewFilter.getClinicalDataFilters());
}
List<ClinicalDataCountItem> result = studyViewColumnarService.getClinicalDataCounts(studyViewFilter,
List<ClinicalDataCountItem> result = studyViewColumnarService.getClinicalDataCounts(
studyViewFilter,
attributes.stream().map(ClinicalDataFilter::getAttributeId).collect(Collectors.toList()));
return new ResponseEntity<>(result, HttpStatus.OK);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,24 @@
JOIN cancer_study cs on cs.cancer_study_id = cam.cancer_study_id
</select>

<select id="getClinicalAttributesForStudies" resultType="org.cbioportal.model.ClinicalAttribute">
SELECT
attr_id as attrId,
datatype as dataType,
patient_attribute as patientAttribute,
cs.cancer_study_identifier cancerStudyIdentifier
FROM clinical_attribute_meta cammo
JOIN cancer_study cs on cs.cancer_study_id = cammo.cancer_study_id
<where>
cancerStudyIdentifier IN
<foreach item="studyId" collection="studyIds" open="(" separator="," close=")">
#{studyId}
</foreach>
</where>

</select>


<!-- Grab Total Profiled Counts. Currently, this query filters out all samples associated with a Gene Panel WES before
doing a join on gene_panel_to_gene_derived table. This is to prevent unnecessary stress on the db. For every sample
associated with a gene panel WES we multiply that row by the number of genes in the gene table. This could be greater
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
package org.cbioportal.service.util;

import org.cbioportal.model.ClinicalAttribute;
import org.cbioportal.model.ClinicalDataCount;
import org.cbioportal.model.ClinicalDataCountItem;
import org.junit.Assert;
import org.junit.Test;

import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Optional;

import static org.junit.Assert.*;

public class StudyViewColumnarServiceUtilTest {

@Test
public void testMergeClinicalDataCounts() {

// first two counts are for same value (value1) and so should be
// merged

ClinicalDataCount count1 = new ClinicalDataCount();
count1.setAttributeId("attr1");
count1.setValue("value1");
count1.setCount(1);

ClinicalDataCount count2 = new ClinicalDataCount();
count2.setAttributeId("attr1");
count2.setValue("value1");
count2.setCount(2);

ClinicalDataCount count3 = new ClinicalDataCount();
count3.setAttributeId("attr1");
count3.setValue("value3");
count3.setCount(6);

ClinicalDataCount count4 = new ClinicalDataCount();
count4.setAttributeId("attr1");
count4.setValue("value3");
count4.setCount(4);

ClinicalDataCount count5 = new ClinicalDataCount();
count5.setAttributeId("attr1");
count5.setValue("value2");
count5.setCount(4);

ClinicalDataCountItem item = new ClinicalDataCountItem();
item.setAttributeId("attr1");
item.setCounts(Arrays.asList(count1, count2, count3, count4, count5));

List<ClinicalDataCountItem> items = Collections.singletonList(item);

// Call the method under test
List<ClinicalDataCountItem> mergedItems = StudyViewColumnarServiceUtil.mergeClinicalDataCounts(items);

// it merged three count items to 2
Optional<ClinicalDataCount> mergedCount=mergedItems.get(0).getCounts().stream()
.filter(count->count.getValue().equals("value1")).findFirst();
Assert.assertEquals(3, mergedCount.get().getCount().intValue());

Optional<ClinicalDataCount> mergedCount2=mergedItems.get(0).getCounts().stream()
.filter(count->count.getValue().equals("value2")).findFirst();
Assert.assertEquals(4, mergedCount2.get().getCount().intValue());

Optional<ClinicalDataCount> mergedCount3=mergedItems.get(0).getCounts().stream()
.filter(count->count.getValue().equals("value3")).findFirst();
Assert.assertEquals(10, mergedCount3.get().getCount().intValue());

}


@Test
public void testAddClinicalDataCountsForMissingAttributes() {
ClinicalDataCountItem existingItem = new ClinicalDataCountItem();
existingItem.setAttributeId("attr1");
ClinicalDataCount existingCount = new ClinicalDataCount();
existingCount.setCount(5);
existingCount.setValue("value1");
existingCount.setAttributeId("attr1");
existingItem.setCounts(Collections.singletonList(existingCount));

List<ClinicalDataCountItem> counts = Collections.singletonList(existingItem);

// we're gonna create two attributes which will not be represented in the passed result set
// test whether addClinicalDataCountsForMissingAttributes restores them

ClinicalAttribute missingAttributeSample = new ClinicalAttribute();
missingAttributeSample.setAttrId("attr2");
missingAttributeSample.setPatientAttribute(false);

ClinicalAttribute missingAttributePatient = new ClinicalAttribute();
missingAttributePatient.setAttrId("attr3");
missingAttributePatient.setPatientAttribute(true);

List<ClinicalAttribute> attributes = Arrays.asList(missingAttributeSample, missingAttributePatient);

List<ClinicalDataCountItem> result = StudyViewColumnarServiceUtil.addClinicalDataCountsForMissingAttributes(
counts, attributes, 10, 20
);

assertEquals(3, result.size());

Optional<ClinicalDataCountItem> addedItemSample = result.stream()
.filter(item -> item.getAttributeId().equals("attr2"))
.findFirst();

assertTrue(addedItemSample.isPresent());
assertEquals(1, addedItemSample.get().getCounts().size());
assertEquals("NA", addedItemSample.get().getCounts().get(0).getValue());
assertEquals(10, addedItemSample.get().getCounts().get(0).getCount().intValue());

Optional<ClinicalDataCountItem> addedItemPatient = result.stream()
.filter(item -> item.getAttributeId().equals("attr3"))
.findFirst();

assertTrue(addedItemPatient.isPresent());
assertEquals(1, addedItemPatient.get().getCounts().size());
assertEquals("NA", addedItemPatient.get().getCounts().get(0).getValue());
assertEquals(20, addedItemPatient.get().getCounts().get(0).getCount().intValue());


}






}
Loading