Skip to content

Commit

Permalink
Merge pull request #5 from stjude/madetunj
Browse files Browse the repository at this point in the history
converted from python2 to python3
  • Loading branch information
madetunj authored Jan 9, 2020
2 parents 1a0f3e8 + 24251c0 commit 1e5b62a
Show file tree
Hide file tree
Showing 6 changed files with 96 additions and 100 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@ Executable anywhere as long as the PATHTO is correctly specified.

├── README.md

├── BAM2GFF-call.sh : bash wrapper
├── BAM2GFF-call.sh : bash wrapper script

├── lib

└── utils.py : utilities method
│  

└── src
└── bin

├── BAM2GFF_main.py : calculates density of .bam reads in .gff regions

Expand All @@ -35,7 +35,7 @@ Executable anywhere as long as the PATHTO is correctly specified.

* samtools
* R version > 3.4
* python2
* python3


=================================================================
26 changes: 11 additions & 15 deletions bin/BAM2GFF-call.sh
Original file line number Diff line number Diff line change
@@ -1,19 +1,10 @@
#!/bin/bash
#
# BAM TO GFF to detect Read density across gene transcripting regions
# Template wrapper shell script
#
# Edited from Original version 12/11/2019

##############################################################
# ##### Please replace PATHTO with your own directory ###### #
##############################################################
# NOTE: We now assume the PATH and PYTHONPATH has been set up before
# entering this script
#PATHTO=/PATH/TO/BAM2GFF
#PYTHONPATH=$PATHTO/lib
#export PYTHONPATH
#export PATH=$PATH:$PATHTO/src

if [ $# -lt 3 ]; then
echo ""
echo 1>&2 Usage: $0 ["GTF file"] ["feature type"] ["BAM file"] ["CHROM SIZES"] ["SAMPLENAME"]
Expand All @@ -39,7 +30,8 @@ CHROMSIZES=$4

#sample name
SAMPLENAME=$5
SAMPLENAME=${SAMPLENAME:=BAMFILE##*/}
TEMPFILE=${BAMFILE##*/}
SAMPLENAME=${SAMPLENAME:=${TEMPFILE%%.*}}

echo "#############################################"
echo "###### BAM2GFF v1 ######"
Expand All @@ -61,16 +53,20 @@ echo
# BAM TO GFF main code
#
mkdir -p matrix
echo "Working on GeneBody Region\nBAM2GFF_main.py -b $BAMFILE -i annotation/genes.gff -m 100 -o matrix/genebody.txt"
echo "Working on GeneBody Region"
echo "BAM2GFF_main.py -b $BAMFILE -i annotation/genes.gff -m 100 -o matrix/genebody.txt"
BAM2GFF_main.py -b $BAMFILE -i annotation/genes.gff -m 100 -o matrix/genebody.txt
echo
echo "Working on Upstream Region\nBAM2GFF_main.py -b $BAMFILE -i annotation/upstream.gff -m 50 -o matrix/upstream.txt"
echo "Working on Upstream Region"
echo "BAM2GFF_main.py -b $BAMFILE -i annotation/upstream.gff -m 50 -o matrix/upstream.txt"
BAM2GFF_main.py -b $BAMFILE -i annotation/upstream.gff -m 50 -o matrix/upstream.txt
echo
echo "Working on Downstream Region\nBAM2GFF_main.py -b $BAMFILE -i annotation/downstream.gff -m 50 -o matrix/downstream.txt"
echo "Working on Downstream Region"
echo "BAM2GFF_main.py -b $BAMFILE -i annotation/downstream.gff -m 50 -o matrix/downstream.txt"
BAM2GFF_main.py -b $BAMFILE -i annotation/downstream.gff -m 50 -o matrix/downstream.txt
echo
echo "Working on Promoter Region\nBAM2GFF_main.py -b $BAMFILE -i annotation/promoters.gff -m 100 -o matrix/promoters.txt"
echo "Working on Promoter Region"
echo "BAM2GFF_main.py -b $BAMFILE -i annotation/promoters.gff -m 100 -o matrix/promoters.txt"
BAM2GFF_main.py -b $BAMFILE -i annotation/promoters.gff -m 100 -o matrix/promoters.txt
echo

Expand Down
2 changes: 1 addition & 1 deletion bin/BAM2GFF_gtftogenes.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
#get region both left and right of position of interest.

import os
Expand Down
48 changes: 22 additions & 26 deletions bin/BAM2GFF_main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
#bamToGFF.py

# modified for python3 #1/8/20

'''
The MIT License (MIT)
Copyright (c) 2013 Charles Lin
Expand Down Expand Up @@ -64,19 +65,19 @@ def getUniquelyMappingReads(bamFile):
bamName = fullPath.split('/')[-1].split('.')[0]
pathFolder = join(fullPath.split('/')[0:-1],'/')
bamFiles = os.listdir(pathFolder)
statsFile = filter(lambda x: x.count(bamName) ==1 and x.count('stats.') ==1,bamFiles)
statsFile = [x for x in bamFiles if x.count(bamName) ==1 and x.count('stats.') ==1]
if len(statsFile) == 1:
print('USING STATS FILE %s' % (pathFolder+'/'+statsFile[0]))
print(('USING STATS FILE %s' % (pathFolder+'/'+statsFile[0])))
samDict = parseSamHeader(pathFolder+'/'+statsFile[0])
return int(samDict['UniquelyMappingSequenceTags'])
elif len(statsFile) > 1:
statsFile = filter(lambda x: x.count(bamName) ==1 and x.count('stats.concise') ==1,bamFiles)
print('USING STATS FILE %s' % (pathFolder+'/'+statsFile[0]))
statsFile = [x for x in bamFiles if x.count(bamName) ==1 and x.count('stats.concise') ==1]
print(('USING STATS FILE %s' % (pathFolder+'/'+statsFile[0])))
samDict = parseSamHeader(pathFolder+'/'+statsFile[0])
return int(samDict['UniquelyMappingSequenceTags'])

else:
print('no precomputed stats file found for %s.' % (bamFile))
print(('no precomputed stats file found for %s.' % (bamFile)))
return None


Expand Down Expand Up @@ -116,12 +117,7 @@ def mapBamToGFF(bamFile,gff,sense = 'both',unique = 0,extension = 200,floor = 0,
MMR = 1
unique = True



print('using a MMR value of %s' % (MMR))

senseTrans = maketrans('-+.','+-+')

print(('using a MMR value of %s' % (MMR)))

if type(gff) == str:
gff = parseTable(gff,'\t')
Expand Down Expand Up @@ -155,7 +151,7 @@ def mapBamToGFF(bamFile,gff,sense = 'both',unique = 0,extension = 200,floor = 0,
for line in gff:
line = line[0:9]
if ticker%100 == 0:
print ticker
print(ticker)
ticker+=1
gffLocus = Locus(line[0],int(line[3]),int(line[4]),line[6],line[1])
searchLocus = makeSearchLocus(gffLocus,int(extension),int(extension))
Expand All @@ -170,11 +166,11 @@ def mapBamToGFF(bamFile,gff,sense = 'both',unique = 0,extension = 200,floor = 0,
locus = Locus(locus.chr(),locus.start()-extension,locus.end(),locus.sense(),locus.ID())
extendedReads.append(locus)
if gffLocus.sense() == '+' or gffLocus.sense == '.':
senseReads = filter(lambda x:x.sense() == '+' or x.sense() == '.',extendedReads)
antiReads = filter(lambda x:x.sense() == '-',extendedReads)
senseReads = [x for x in extendedReads if x.sense() == '+' or x.sense() == '.']
antiReads = [x for x in extendedReads if x.sense() == '-']
else:
senseReads = filter(lambda x:x.sense() == '-' or x.sense() == '.',extendedReads)
antiReads = filter(lambda x:x.sense() == '+',extendedReads)
senseReads = [x for x in extendedReads if x.sense() == '-' or x.sense() == '.']
antiReads = [x for x in extendedReads if x.sense() == '+']

#at this point can output starts onto the GFF unless density is called

Expand All @@ -195,12 +191,12 @@ def mapBamToGFF(bamFile,gff,sense = 'both',unique = 0,extension = 200,floor = 0,
antiHash[x]+=1

#now apply flooring and filtering for coordinates
keys = uniquify(senseHash.keys()+antiHash.keys())
keys = uniquify(list(senseHash.keys())+list(antiHash.keys()))
if floor > 0:

keys = filter(lambda x: (senseHash[x]+antiHash[x]) > floor,keys)
keys = [x for x in keys if (senseHash[x]+antiHash[x]) > floor]
#coordinate filtering
keys = filter(lambda x: gffLocus.start() < x < gffLocus.end(),keys)
keys = [x for x in keys if gffLocus.start() < x < gffLocus.end()]

if clusterGram or matrix:
clusterLine = [gffLocus.ID(),gffLocus.__str__()]
Expand All @@ -219,15 +215,15 @@ def mapBamToGFF(bamFile,gff,sense = 'both',unique = 0,extension = 200,floor = 0,

while n <nBins:
n+=1
binKeys = filter(lambda x: i < x < i+binSize,keys)
binKeys = [x for x in keys if i < x < i+binSize]
binDen = float(sum([senseHash[x]+antiHash[x] for x in binKeys]))/binSize
clusterLine+=[round(binDen/MMR,4)]
i = i+binSize
else:
i = gffLocus.end()
while n < nBins:
n+=1
binKeys = filter(lambda x: i-binSize < x < i,keys)
binKeys = [x for x in keys if i-binSize < x < i]
binDen = float(sum([senseHash[x]+antiHash[x] for x in binKeys]))/binSize
clusterLine+=[round(binDen/MMR,4)]
i = i-binSize
Expand Down Expand Up @@ -260,7 +256,7 @@ def mapBamToGFF(bamFile,gff,sense = 'both',unique = 0,extension = 200,floor = 0,
elif sense == '+':
readLine = gffLocus.sense()+':'+ join([str(locus.start()) for locus in senseReads],',')
elif sense == '-':
readLine = string.translate(gffLocus.sense(),senseTrans)+':'+ join([str(locus.start()) for locus in antiReads],',')
readLine = gffLocus.sense().maketrans('-+.','+-+')+':'+ join([str(locus.start()) for locus in antiReads],',')
newGFF.append(line+[readLine])
#if not raw and not density gives total
else:
Expand Down Expand Up @@ -342,7 +338,7 @@ def main():
bamFile = options.bam
fullPath = os.path.abspath(bamFile)
bamName = fullPath.split('/')[-1].split('.')[0]
pathFolder = join(fullPath.split('/')[0:-1],'/')
pathFolder = '/'.join(fullPath.split('/')[0:-1])
fileList = os.listdir(pathFolder)
hasBai = False
for fileName in fileList:
Expand Down Expand Up @@ -429,7 +425,7 @@ def main():
NEWcontent[summation] = {}
NEWcontent[summation][linenumber] = line

for each, content in sorted(NEWcontent.items(), reverse=True):
for each, content in sorted(list(NEWcontent.items()), reverse=True):
for key in content:
otherGFF.append(content[key])
unParseTable(otherGFF,output,'\t')
Expand Down
2 changes: 2 additions & 0 deletions bin/BAM2GFF_plots.R
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,7 @@ dev.off();
#extrapolate breaks & colz
remainder = round((quantile(as.vector(t(promoters[,3:ncol(promoters)])),.80)),digits=0) %% 2;
finalcount = round((quantile(as.vector(t(promoters[,3:ncol(promoters)])),.80)),digits=0) + remainder;
if (finalcount < 2) { finalcount = 2; } #adjusting for lack of variability in bam density scores
colz=colorRampPalette(c("white", "red"))(finalcount);
breaks=seq(0,finalcount,by=1);

Expand All @@ -505,6 +506,7 @@ dev.off()

remainder = round((quantile(as.vector(t(combined[,3:ncol(combined)])),.80)),digits=0) %% 2
finalcount = round((quantile(as.vector(t(combined[,3:ncol(combined)])),.80) + remainder),digits=0) + remainder
if (finalcount < 2) { finalcount = 2; } #adjusting for lack of variability in bam density scores
colz=colorRampPalette(c("white", "red"))(finalcount)
breaks=seq(0,finalcount,by=1)

Expand Down
Loading

0 comments on commit 1e5b62a

Please sign in to comment.