Skip to content

Commit

Permalink
[#3] add genetic length (cM) of segments to final .match file (#4)
Browse files Browse the repository at this point in the history
* Added saving of actual segments used for IBD estimation in germline format to IBD estimation step

* Ouput from masking not produced segment with genetic lenght in cM

* Added "min_seg_len" parameter to use for germline and estimation:"
  • Loading branch information
piotrszul authored Oct 10, 2019
1 parent 874b31f commit e46178b
Show file tree
Hide file tree
Showing 9 changed files with 82 additions and 22 deletions.
2 changes: 1 addition & 1 deletion R/tribes.tools/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ Description: More about what it does (maybe more than one line)
License: What license is it under?
Encoding: UTF-8
LazyData: true
Depends: dplyr
Depends: tools,dplyr
22 changes: 15 additions & 7 deletions R/tribes.tools/R/cmd.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
## Highe level interface functions (specific for transformations)
##

germline2ibdEstimate <-function(inputFile, outputFile, minSegLength.cM = 3.0, verbose = FALSE, ...) {
germline2ibdEstimate <-function(inputFile, outputFile, minSegLength.cM = 3.0, verbose = FALSE, segmentsFile = NULL, ...) {

germlineMatches <- read.germline(inputFile)
if (verbose) {
Expand All @@ -15,7 +15,14 @@ germline2ibdEstimate <-function(inputFile, outputFile, minSegLength.cM = 3.0, ve
print("Normalized segments")
print(head(as.data.frame(normalizedSegments)))
}
sharedIBDPerPair <-estimateIBDSharing(normalizedSegments, minLength.cM = minSegLength.cM)
filteredSegments <- filterSegments(normalizedSegments, minLength.cM = minSegLength.cM)
if (!is.null(segmentsFile)) {
if (verbose) {
print(sprintf("Writting segments to: %s", segmentsFile))
}
write.germline(segments2germline(filteredSegments), segmentsFile)
}
sharedIBDPerPair <-estimateIBDSharing(filteredSegments)
if (verbose) {
print("Estimated IBD")
print(head(sharedIBDPerPair))
Expand All @@ -28,7 +35,7 @@ germline2ibdEstimate <-function(inputFile, outputFile, minSegLength.cM = 3.0, ve
write.csv(sharedIBDPerPairWithDegree, outputFile, row.names = FALSE, quote = FALSE)
}

maskSegments <- function(inputFile, outputFile, ersaMask, verbose = FALSE, ...) {
maskSegments <- function(inputFile, outputFile, ersaMask, units = 'bp', verbose = FALSE, ...) {
germlineSegments <- read.germline(inputFile)
if (verbose) {
print("Germline segments")
Expand All @@ -42,12 +49,13 @@ maskSegments <- function(inputFile, outputFile, ersaMask, verbose = FALSE, ...)
}

adjustedSegments <- adjustGermlineSegments(germlineSegments, ersaMask)
if (units == 'cM') {
normalizedSegments <- normalizeSegmentsWithGU(adjustedSegments, GeneticMap.default())
adjustedSegments <- segments2germline(normalizedSegments)
}
if (verbose) {
print("Adjusted segments")
print(head(adjustedSegments))
}

outf <- gzfile(outputFile, 'w')
write.table(adjustedSegments, outf, sep = '\t', col.names = FALSE, row.names = FALSE, quote = FALSE)
close(outf)
write.germline(adjustedSegments, outputFile)
}
10 changes: 10 additions & 0 deletions R/tribes.tools/R/common.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,13 @@ na.replace <-function(x, val) {
x[is.na(x)] <- val
x
}


open.file <- function(filename, ...) {
if (file_ext(filename) == "gz") {
gzfile(filename, ...)
} else {
file(filename, ...)
}
}

7 changes: 1 addition & 6 deletions R/tribes.tools/R/ersa.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,14 +67,9 @@ adjustGermlineSegments <- function(germlineSegments, ersaMask) {
Unit='bp',
NoMismatch=NA ) %>%
select(-MaskedSegStart, -MaskedSegEnd, -IsTruncated)
class(outputSegments) <- append(class(outputSegments), "germline")
outputSegments
}

#germlineSegments <- read.germline('../../data/SOD1_germline_2_1_wg_3cM_128.match.gz')
#head(germlineSegments)
#ersaMask <- readErsaMask('../../data/ersa.out.msk')
#head(ersaMask)




34 changes: 34 additions & 0 deletions R/tribes.tools/R/germline.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,50 @@ read.germline <- function(filename) {
}


write.germline <-function(germline_df, filename) {
outf <- open.file(filename, 'w')
write.table(germline_df, outf, sep = '\t', col.names = FALSE, row.names = FALSE, quote = FALSE)
close(outf)
}


normalizeSegments.germline <-function(germline_df) {
data.frame(
Chr = germline_df$Chr,
Fam1 = germline_df$Fam1,
Id1 = germline_df$Id1,
Fam2 = germline_df$Fam2,
Id2 = germline_df$Id2,
Pair = paste(germline_df$Id1, germline_df$Id2, sep='_'),
Start.bp = germline_df$SegStart,
End.bp = germline_df$SegEnd,
SnpStart = germline_df$SnpStart,
SnpEnd = germline_df$SnpEnd,
Length.bp = germline_df$SegEnd - germline_df$SegStart,
germline_Length.CM = germline_df$Length,
stringsAsFactors = FALSE
)
}

segments2germline <- function(segmengts) {
segmengts %>% ungroup %>% transmute(
Fam1 = Fam1,
Id1 = Id1,
Fam2 = Fam2,
Id2 = Id2,
Chr = Chr,
SegStart = Start.bp,
SegEnd = End.bp,
SnpStart = SnpStart,
SnpEnd = SnpEnd,
NoSnp = 'NA',
Length = Length.cM,
Unit = 'cM',
NoMismatch = 'NA',
Res1 = 'NA',
Res2 = 'NA'
)
}



5 changes: 2 additions & 3 deletions R/tribes.tools/R/ibd.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,9 @@ filterSegments <-function(segments, chrs=ALL_AUTOSOMES, minLength.cM=0) {
segments[is.element(segments$Chr, chrs) & segments$Length.cM >= minLength.cM, ]
}

estimateIBDSharing <-function(segments, chrs=ALL_AUTOSOMES, minLength.cM=0) {
estimateIBDSharing <-function(segments, chrs=ALL_AUTOSOMES) {
totalLenght.cM <- sum(CHROMOSOMES$Length.cM[is.element(CHROMOSOMES$Chr, chrs)])
filteredSegments <- filterSegments(segments, chrs, minLength.cM)
totalLengthById <- aggregate(filteredSegments$Length.cM, by=list(filteredSegments$Id1, filteredSegments$Id2), sum)
totalLengthById <- aggregate(segments$Length.cM, by=list(segments$Id1, segments$Id2), sum)
data.frame(Id1 = totalLengthById$Group.1, Id2 = totalLengthById$Group.2,
IBD0.cM= 1-totalLengthById$x/totalLenght.cM,
IBD1.cM = NA,
Expand Down
20 changes: 15 additions & 5 deletions relations.smake
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,23 @@
# Detect IBD segemnts with germline
#

DEF_MIN_SEG_LEN=3.0

def min_seg_len():
return config.get('min_seg_len', DEF_MIN_SEG_LEN)


rule germline:
input:
ped="{dir}/{file}.ped",
map="{dir}/{file}_interpolated.map"
output:
"{dir}_GRM/{file}.match"
params:
prefix="{dir}_GRM/{file}"
prefix="{dir}_GRM/{file}",
min_seg=min_seg_len()
shell:
"{BIN_DIR}/germlinew -input {input.ped} {input.map} -output {params.prefix} -bits 128 -min_m 1.5 -err_het 1 -err_hom 2 -g_extend -w_extend"
"{BIN_DIR}/germlinew -input {input.ped} {input.map} -output {params.prefix} -bits 128 -min_m {params.min_seg} -err_het 1 -err_hom 2 -g_extend -w_extend"

#
# Convert vcf to plinke format.
Expand Down Expand Up @@ -46,7 +53,7 @@ rule FPI_filter_population_ibd:
params:
ersa_mask=ref_path('ersa-mask.tsv')
shell:
"{BIN_DIR}/maskSegments -v -i {input} -o {output} -m {params.ersa_mask}"
"{BIN_DIR}/maskSegments -v -i {input} -o {output} -m {params.ersa_mask} -u cM"


#
Expand All @@ -57,9 +64,12 @@ rule estimateIBD0:
input:
"{filename}.match.gz"
output:
"{filename}_IBD.csv"
degrees="{filename}_IBD.csv",
segments="{filename}_IBD-segments.match.gz"
params:
min_seg=min_seg_len()
shell:
"{BIN_DIR}/ibd2degree -v -i {input} -o {output} -m 3.0"
"{BIN_DIR}/ibd2degree -v -i {input} -o {output.degrees} -s {output.segments} -m {params.min_seg}"

#
# Compare estimared degree relatedness vs reported (true) one.
Expand Down
2 changes: 2 additions & 0 deletions scripts/ibd2degree
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ option_list = list(
help="input file name (germline match file)", metavar="character"),
make_option(c("-o", "--output"), dest="outputFile", type="character", default=NULL,
help="output file name [default= %default]", metavar="character"),
make_option(c("-s", "--segments"), dest="segmentsFile", type="character", default=NULL,
help="optionanl segments file name [default= %default]", metavar="character"),
make_option(c("-m", "--min-segment"), dest="minSegmentLength.cM", type="double", default=3.0,
help="min segment lenght in cM [default= %default]", metavar="numeric"),
make_option(c("-v", "--verbose"), dest="verbose",default=FALSE, action="store_true",
Expand Down
2 changes: 2 additions & 0 deletions scripts/maskSegments
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ option_list = list(
help="output file name [default= %default]", metavar="character"),
make_option(c("-m", "--mask"), dest="ersaMask", type="character", default=NULL,
help="ersa mask [default= %default]", metavar="character"),
make_option(c("-u", "--unit"), dest="unit", type="character", default='bp',
help="The unit to use for segment lengths, either `bp` or `cM`. [default= %default]", metavar="character"),
make_option(c("-v", "--verbose"), dest="verbose",default=FALSE, action="store_true",
help="produce verbose output")
);
Expand Down

0 comments on commit e46178b

Please sign in to comment.