Skip to content

Commit

Permalink
Add more unit tests for clickhouse service related methods (#11262)
Browse files Browse the repository at this point in the history
* add more unit tests with clickhouse service methods

* fix sonar issues

* fix study view service errors

---------

Co-authored-by: Bryan Lai <[email protected]>
  • Loading branch information
gblaih and Bryan Lai authored Dec 13, 2024
1 parent 5008d5f commit 540b57d
Show file tree
Hide file tree
Showing 6 changed files with 693 additions and 132 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import org.cbioportal.model.AlterationType;
import org.cbioportal.model.CopyNumberCountByGene;
import org.cbioportal.model.Gistic;
import org.cbioportal.model.GisticToGene;
import org.cbioportal.model.MolecularProfile;
import org.cbioportal.model.MolecularProfileCaseIdentifier;
import org.cbioportal.model.MutSig;
Expand All @@ -21,6 +20,7 @@
import org.cbioportal.service.SignificantCopyNumberRegionService;
import org.cbioportal.service.SignificantlyMutatedGeneService;
import org.cbioportal.service.exception.StudyNotFoundException;
import org.cbioportal.service.util.AlterationCountServiceUtil;
import org.cbioportal.service.util.AlterationEnrichmentUtil;
import org.cbioportal.web.parameter.Projection;
import org.springframework.beans.factory.annotation.Autowired;
Expand Down Expand Up @@ -51,8 +51,6 @@ public class AlterationCountServiceImpl implements AlterationCountService {
private final SignificantlyMutatedGeneService significantlyMutatedGeneService;
private final StudyViewRepository studyViewRepository;
private final SignificantCopyNumberRegionService significantCopyNumberRegionService;

private static final String WHOLE_EXOME_SEQUENCING = "WES";


@Autowired
Expand Down Expand Up @@ -265,19 +263,19 @@ public Pair<List<CopyNumberCountByGene>, Long> getPatientCnaGeneCounts(List<Mole

@Override
public List<AlterationCountByGene> getMutatedGenes(StudyViewFilterContext studyViewFilterContext) throws StudyNotFoundException {
var alterationCountByGenes = populateAlterationCounts(combineAlterationCountsWithConflictingHugoSymbols( studyViewRepository.getMutatedGenes(studyViewFilterContext)),
var alterationCountByGenes = populateAlterationCounts(AlterationCountServiceUtil.combineAlterationCountsWithConflictingHugoSymbols( studyViewRepository.getMutatedGenes(studyViewFilterContext)),
studyViewFilterContext, AlterationType.MUTATION_EXTENDED);
return populateAlterationCountsWithMutSigQValue(alterationCountByGenes, studyViewFilterContext);
}

public List<CopyNumberCountByGene> getCnaGenes(StudyViewFilterContext studyViewFilterContext) throws StudyNotFoundException {
var copyNumberAlterationCounts = populateAlterationCounts(combineCopyNumberCountsWithConflictingHugoSymbols(studyViewRepository.getCnaGenes(studyViewFilterContext)), studyViewFilterContext, AlterationType.COPY_NUMBER_ALTERATION);
var copyNumberAlterationCounts = populateAlterationCounts(AlterationCountServiceUtil.combineCopyNumberCountsWithConflictingHugoSymbols(studyViewRepository.getCnaGenes(studyViewFilterContext)), studyViewFilterContext, AlterationType.COPY_NUMBER_ALTERATION);
return populateAlterationCountsWithCNASigQValue(copyNumberAlterationCounts, studyViewFilterContext);
}

@Override
public List<AlterationCountByGene> getStructuralVariantGenes(StudyViewFilterContext studyViewFilterContext) throws StudyNotFoundException {
var alterationCountByGenes = populateAlterationCounts(combineAlterationCountsWithConflictingHugoSymbols(studyViewRepository.getStructuralVariantGenes(studyViewFilterContext)),
var alterationCountByGenes = populateAlterationCounts(AlterationCountServiceUtil.combineAlterationCountsWithConflictingHugoSymbols(studyViewRepository.getStructuralVariantGenes(studyViewFilterContext)),
studyViewFilterContext, AlterationType.STRUCTURAL_VARIANT);
return populateAlterationCountsWithMutSigQValue(alterationCountByGenes, studyViewFilterContext);
}
Expand All @@ -297,7 +295,7 @@ private < T extends AlterationCountByGene> List<T> populateAlterationCounts(@Non
Set<String> matchingGenePanelIds = matchingGenePanelIdsMap.get(hugoGeneSymbol) != null ?
matchingGenePanelIdsMap.get(hugoGeneSymbol) : Collections.emptySet();

int alterationTotalProfiledCount = computeTotalProfiledCount(hasGenePanelData(matchingGenePanelIds),
int alterationTotalProfiledCount = AlterationCountServiceUtil.computeTotalProfiledCount(AlterationCountServiceUtil.hasGenePanelData(matchingGenePanelIds),
profiledCountsMap.getOrDefault(hugoGeneSymbol, 0),
sampleProfileCountWithoutGenePanelData, totalProfiledCount);

Expand All @@ -308,107 +306,24 @@ private < T extends AlterationCountByGene> List<T> populateAlterationCounts(@Non
});
return alterationCounts;
}

private int computeTotalProfiledCount(boolean hasGenePanelData, int alterationsProfiledCount, int sampleProfileCountWithoutGenePanelData, int totalProfiledCount) {
int profiledCount = hasGenePanelData ? alterationsProfiledCount + sampleProfileCountWithoutGenePanelData
: sampleProfileCountWithoutGenePanelData;
return profiledCount == 0 ? totalProfiledCount : profiledCount;
}

private List<AlterationCountByGene> populateAlterationCountsWithMutSigQValue(List<AlterationCountByGene> alterationCountByGenes, StudyViewFilterContext studyViewFilterContext) throws StudyNotFoundException {
final var mutSigs = getMutSigs(studyViewFilterContext);
// If MutSig is not empty update Mutated Genes
if (!mutSigs.isEmpty()) {
alterationCountByGenes.parallelStream()
.filter(alterationCount -> mutSigs.containsKey(alterationCount.getHugoGeneSymbol()))
.forEach(alterationCount ->
alterationCount.setqValue(mutSigs.get(alterationCount.getHugoGeneSymbol()).getqValue())
);
}
return alterationCountByGenes;
return AlterationCountServiceUtil.updateAlterationCountsWithMutSigQValue(alterationCountByGenes, mutSigs);
}

private List<CopyNumberCountByGene> populateAlterationCountsWithCNASigQValue(List<CopyNumberCountByGene> alterationCountByGenes, StudyViewFilterContext studyViewFilterContext) throws StudyNotFoundException {
final var gisticMap = getGisticMap(studyViewFilterContext);

if(!gisticMap.isEmpty()) {
alterationCountByGenes.parallelStream()
.filter(alterationCount -> gisticMap.containsKey(Pair.create(alterationCount.getHugoGeneSymbol(), alterationCount.getAlteration())))
.forEach(alterationCount -> {
alterationCount.setqValue(gisticMap.get(Pair.create(alterationCount.getHugoGeneSymbol(), alterationCount.getAlteration())).getqValue());
});
}
return alterationCountByGenes;
return AlterationCountServiceUtil.updateAlterationCountsWithCNASigQValue(alterationCountByGenes, gisticMap);
}

private List<MolecularProfile> getFirstMolecularProfileGroupedByStudy(StudyViewFilterContext studyViewFilterContext, AlterationType alterationType) {
final var molecularProfiles = studyViewRepository.getFilteredMolecularProfilesByAlterationType(studyViewFilterContext, alterationType.toString());

return molecularProfiles.stream()
.collect(Collectors.toMap(
MolecularProfile::getCancerStudyIdentifier,
Function.identity(),
(existing, replacement) -> existing // Keep the first occurrence
))
.values()
.stream()
.toList();
}

/**
* Combines alteration counts by Hugo gene symbols. If multiple entries exist for the same
* gene symbol, their number of altered cases and total counts are summed up. Returns a
* list of unique AlterationCountByGene objects where each gene symbol is represented only once.
*
* This appears in the Data where Genes have similar Hugo Gene Symbols but different Entrez Ids
*
* @param alterationCounts List of AlterationCountByGene objects, potentially with duplicate gene symbols
* @return List of AlterationCountByGene objects with unique gene symbols and combined counts
*/
private List<AlterationCountByGene> combineAlterationCountsWithConflictingHugoSymbols(@NonNull List<AlterationCountByGene> alterationCounts) {
Map<String, AlterationCountByGene> alterationCountByGeneMap = new HashMap<>();
for (var alterationCount : alterationCounts) {
if (alterationCountByGeneMap.containsKey(alterationCount.getHugoGeneSymbol())){
AlterationCountByGene toUpdate = alterationCountByGeneMap.get(alterationCount.getHugoGeneSymbol());
toUpdate.setNumberOfAlteredCases(toUpdate.getNumberOfAlteredCases() + alterationCount.getNumberOfAlteredCases());
toUpdate.setTotalCount(toUpdate.getTotalCount() + alterationCount.getTotalCount());
} else {
alterationCountByGeneMap.put(alterationCount.getHugoGeneSymbol(), alterationCount);
}
}
return alterationCountByGeneMap.values().stream().toList();
}

/**
* Combines alteration counts by Hugo gene symbols. If multiple entries exist for the same
* gene symbol, their number of altered cases and total counts are summed up. Returns a
* list of unique AlterationCountByGene objects where each gene symbol is represented only once.
*
* This appears in the Data where Genes have similar Hugo Gene Symbols but different Entrez Ids.
* This is a special case to handle Copy Number Mutations where the Alteration type should be a part of the key
*
* @param alterationCounts List of CopyNumberCountByGene objects, potentially with duplicate gene symbols
* @return List of AlterationCountByGene objects with unique gene symbols and combined counts
*/
private List<CopyNumberCountByGene> combineCopyNumberCountsWithConflictingHugoSymbols(@NonNull List<CopyNumberCountByGene> alterationCounts) {
Map<Pair<String, Integer>, CopyNumberCountByGene> alterationCountByGeneMap = new HashMap<>();
for (var alterationCount : alterationCounts) {
var copyNumberKey = Pair.create(alterationCount.getHugoGeneSymbol(), alterationCount.getAlteration());
if (alterationCountByGeneMap.containsKey(copyNumberKey)) {
AlterationCountByGene toUpdate = alterationCountByGeneMap.get(copyNumberKey);
toUpdate.setNumberOfAlteredCases(toUpdate.getNumberOfAlteredCases() + alterationCount.getNumberOfAlteredCases());
toUpdate.setTotalCount(toUpdate.getTotalCount() + alterationCount.getTotalCount());
} else {
alterationCountByGeneMap.put(copyNumberKey, alterationCount);
}
}
return alterationCountByGeneMap.values().stream().toList();
}

private boolean hasGenePanelData(@NonNull Set<String> matchingGenePanelIds) {
return matchingGenePanelIds.contains(WHOLE_EXOME_SEQUENCING)
&& matchingGenePanelIds.size() > 1 || !matchingGenePanelIds.contains(WHOLE_EXOME_SEQUENCING) && !matchingGenePanelIds.isEmpty();
}
return AlterationCountServiceUtil.getFirstMolecularProfileGroupedByStudy(molecularProfiles);
}

private Map<String, MutSig> getMutSigs(StudyViewFilterContext studyViewFilterContext) throws StudyNotFoundException {
var distinctStudyIds = studyViewRepository.getFilteredStudyIds(studyViewFilterContext);
Expand Down Expand Up @@ -440,16 +355,7 @@ private Map<Pair<String, Integer>, Gistic> getGisticMap(StudyViewFilterContext s
null,
null,
null);
for(Gistic gistic : gisticList) {
var amp = (gistic.getAmp()) ? 2 : -2;
for (GisticToGene gene : gistic.getGenes()) {
var key = Pair.create(gene.getHugoGeneSymbol(), amp);
Gistic currentGistic = gisticMap.get(key);
if (currentGistic == null || gistic.getqValue().compareTo(currentGistic.getqValue()) < 0) {
gisticMap.put(key, gistic);
}
}
}
AlterationCountServiceUtil.setupGisticMap(gisticList, gisticMap);
}
return gisticMap;
}
Expand Down Expand Up @@ -487,26 +393,7 @@ private <S extends AlterationCountBase> Pair<List<S>, Long> getAlterationGeneCou
Long studyProfiledCasesCount = includeFrequencyFunction.apply(studyMolecularProfileCaseIdentifiers, studyAlterationCountByGenes);
profiledCasesCount.updateAndGet(v -> v + studyProfiledCasesCount);
}
studyAlterationCountByGenes.forEach(datum -> {
String key = datum.getUniqueEventKey();
if (totalResult.containsKey(key)) {
S alterationCountByGene = totalResult.get(key);
alterationCountByGene.setTotalCount(alterationCountByGene.getTotalCount() + datum.getTotalCount());
alterationCountByGene.setNumberOfAlteredCases(alterationCountByGene.getNumberOfAlteredCases() + datum.getNumberOfAlteredCases());
alterationCountByGene.setNumberOfProfiledCases(alterationCountByGene.getNumberOfProfiledCases() + datum.getNumberOfProfiledCases());
Set<String> matchingGenePanelIds = new HashSet<>();
if (!alterationCountByGene.getMatchingGenePanelIds().isEmpty()) {
matchingGenePanelIds.addAll(alterationCountByGene.getMatchingGenePanelIds());
}
if (!datum.getMatchingGenePanelIds().isEmpty()) {
matchingGenePanelIds.addAll(datum.getMatchingGenePanelIds());
}
alterationCountByGene.setMatchingGenePanelIds(matchingGenePanelIds);
totalResult.put(key, alterationCountByGene);
} else {
totalResult.put(key, datum);
}
});
AlterationCountServiceUtil.setupAlterationGeneCountsMap(studyAlterationCountByGenes, totalResult);
});
alterationCountByGenes = new ArrayList<>(totalResult.values());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -255,14 +255,7 @@ public List<GenomicDataCountItem> getMutationCountsByGeneSpecific(StudyViewFilte
List<GenomicDataCountItem> genomicDataCountItemList = new ArrayList<>();
for (GenomicDataFilter genomicDataFilter : genomicDataFilters) {
Map<String, Integer> counts = studyViewRepository.getMutationCounts(createContext(studyViewFilter), genomicDataFilter);
List<GenomicDataCount> genomicDataCountList = new ArrayList<>();
if (counts.getOrDefault("mutatedCount", 0) > 0)
genomicDataCountList.add(new GenomicDataCount("Mutated", "MUTATED", counts.get("mutatedCount"), counts.get("mutatedCount")));
if (counts.getOrDefault("notMutatedCount", 0) > 0)
genomicDataCountList.add(new GenomicDataCount("Not Mutated", "NOT_MUTATED", counts.get("notMutatedCount"), counts.get("notMutatedCount")));
if (counts.getOrDefault("notProfiledCount", 0) > 0)
genomicDataCountList.add(new GenomicDataCount("Not Profiled", "NOT_PROFILED", counts.get("notProfiledCount"), counts.get("notProfiledCount")));
genomicDataCountItemList.add(new GenomicDataCountItem(genomicDataFilter.getHugoGeneSymbol(), "mutations", genomicDataCountList));
genomicDataCountItemList.add(StudyViewColumnarServiceUtil.createGenomicDataCountItemFromMutationCounts(genomicDataFilter, counts));
}
return genomicDataCountItemList;
}
Expand Down
Loading

0 comments on commit 540b57d

Please sign in to comment.