diff --git a/src/main/java/org/cbioportal/service/util/StudyViewColumnarServiceUtil.java b/src/main/java/org/cbioportal/service/util/StudyViewColumnarServiceUtil.java index 981dbb80cf2..92a8295d725 100644 --- a/src/main/java/org/cbioportal/service/util/StudyViewColumnarServiceUtil.java +++ b/src/main/java/org/cbioportal/service/util/StudyViewColumnarServiceUtil.java @@ -27,11 +27,14 @@ public static List mergeClinicalDataCounts( ) { items.forEach(attr -> { Map> countsPerType = attr.getCounts().stream() - .collect(Collectors.groupingBy(ClinicalDataCount::getValue)); + .collect(Collectors.groupingBy(a -> a.getValue().toLowerCase())); List res = countsPerType.entrySet().stream().map(entry -> { ClinicalDataCount mergedCount = new ClinicalDataCount(); mergedCount.setAttributeId(attr.getAttributeId()); - mergedCount.setValue(entry.getKey()); + // we are just going to choose the value of the first item + // due to failure in data normalization in source files, we may find values + // have inconsistent casing. we choose to merge and choose an arbitrary casing + mergedCount.setValue(entry.getValue().stream().findFirst().get().getValue()); mergedCount.setCount(entry.getValue().stream().mapToInt(ClinicalDataCount::getCount).sum()); return mergedCount; }).toList(); diff --git a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml index bfc49eddc67..ce2fd9bf6ed 100644 --- a/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml +++ b/src/main/resources/org/cbioportal/persistence/mybatisclickhouse/StudyViewMapper.xml @@ -190,7 +190,7 @@ WITH clinical_data_query AS ( SELECT attribute_name AS attributeId, - upper(attribute_value) AS value, + attribute_value AS value, cast(count(*) AS INTEGER) as count FROM clinical_data_derived diff --git a/src/test/java/org/cbioportal/service/util/StudyViewColumnarServiceUtilTest.java b/src/test/java/org/cbioportal/service/util/StudyViewColumnarServiceUtilTest.java index 0c9687d6e59..879ef6baade 100644 --- a/src/test/java/org/cbioportal/service/util/StudyViewColumnarServiceUtilTest.java +++ b/src/test/java/org/cbioportal/service/util/StudyViewColumnarServiceUtilTest.java @@ -31,9 +31,11 @@ public void testMergeClinicalDataCounts() { count1.setValue("value1"); count1.setCount(1); + // not that the caseing of the value is different + // our merge code should ignore that and choose the first item ClinicalDataCount count2 = new ClinicalDataCount(); count2.setAttributeId("attr1"); - count2.setValue("value1"); + count2.setValue("VALUE1"); count2.setCount(2); ClinicalDataCount count3 = new ClinicalDataCount();