Skip to content

Commit

Permalink
Merge pull request #72 from sbslee/0.38.0-dev
Browse files Browse the repository at this point in the history
0.38.0 dev
  • Loading branch information
sbslee authored Jun 15, 2024
2 parents 4b84de8 + 18845cd commit 7b0fbfb
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 134 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
Changelog
*********

0.38.0 (2024-06-16)
-------------------

* Update :meth:`pyvcf.has_chr_prefix` method to ignore the HLA contigs for GRCh38.
* :issue:`71`: Deprecate :meth:`common.plot_cytobands` method.

0.37.0 (2023-09-09)
-------------------

Expand Down
133 changes: 0 additions & 133 deletions fuc/api/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.collections import BrokenBarHCollection
import matplotlib.patches as mpatches
import seaborn as sns

Expand Down Expand Up @@ -841,138 +840,6 @@ def extract_sequence(fasta, region):
sequence = ''
return sequence

def plot_cytobands(cytoband, bed, ax=None, figsize=None):
"""
Create chromosome ideograms along with BED data.
The method's source code is derived from a Python script (ideograms.py)
written by Ryan Dale. The original script can be found at:
https://gist.github.com/daler/c98fc410282d7570efc3#file-ideograms-py
Parameters
----------
cytoband : str
Text file containing cytoband ideogram information.
bed : str
BED file to be displayed.
ax : matplotlib.axes.Axes, optional
Pre-existing axes for the plot. Otherwise, crete a new one.
figsize : tuple, optional
Width, height in inches. Format: (float, float).
Examples
--------
.. plot::
:context: close-figs
>>> import matplotlib.pyplot as plt
>>> from fuc import common
>>> common.load_dataset('cytoband')
>>> cytoband_file = '~/fuc-data/cytoband/cytoBandIdeo.txt.gz'
>>> bed_file = '~/fuc-data/cytoband/ucsc_genes.bed.gz'
>>> common.plot_cytobands(cytoband_file, bed_file, figsize=(10, 8))
"""
def chromosome_collections(df, y_positions, height, **kwargs):
del_width = False
if 'width' not in df.columns:
del_width = True
df['width'] = df['end'] - df['start']
for chrom, group in df.groupby('chrom'):
yrange = (y_positions[chrom], height)
xranges = group[['start', 'width']].values
yield BrokenBarHCollection(
xranges, yrange, edgecolors=("black",), facecolors=group['colors'], **kwargs)
if del_width:
del df['width']

# Height of each ideogram
chrom_height = 1

# Spacing between consecutive ideograms
chrom_spacing = 1

# Height of the gene track. Should be smaller than `chrom_spacing` in order to
# fit correctly
gene_height = 0.4

# Padding between the top of a gene track and its corresponding ideogram
gene_padding = 0.1

# Decide which chromosomes to use
chromosome_list = [f'chr{i}' for i in list(range(1, 23)) + ['M', 'X', 'Y']]

# Keep track of the y positions for ideograms and genes for each chromosome,
# and the center of each ideogram (which is where we'll put the ytick labels)
ybase = 0
chrom_ybase = {}
gene_ybase = {}
chrom_centers = {}

# Iterate in reverse so that items in the beginning of `chromosome_list` will
# appear at the top of the plot
for chrom in chromosome_list[::-1]:
chrom_ybase[chrom] = ybase
chrom_centers[chrom] = ybase + chrom_height / 2.
gene_ybase[chrom] = ybase - gene_height - gene_padding
ybase += chrom_height + chrom_spacing

# Read in ideogram.txt, downloaded from UCSC Table Browser
ideo = pd.read_table(
cytoband,
names=['chrom', 'start', 'end', 'name', 'gieStain']
)

# Filter out chromosomes not in our list
ideo = ideo[ideo.chrom.apply(lambda x: x in chromosome_list)]

# Add a new column for width
ideo['width'] = ideo.end - ideo.start

# Colors for different chromosome stains
color_lookup = {
'gneg': (1., 1., 1.),
'gpos25': (.6, .6, .6),
'gpos50': (.4, .4, .4),
'gpos75': (.2, .2, .2),
'gpos100': (0., 0., 0.),
'acen': (.8, .4, .4),
'gvar': (.8, .8, .8),
'stalk': (.9, .9, .9),
}

# Add a new column for colors
ideo['colors'] = ideo['gieStain'].apply(lambda x: color_lookup[x])

# Same thing for genes
genes = pd.read_table(
bed,
names=['chrom', 'start', 'end', 'name'],
usecols=range(4))
genes = genes[genes.chrom.apply(lambda x: x in chromosome_list)]
genes['width'] = genes.end - genes.start
genes['colors'] = '#2243a8'

if ax is None:
fig, ax = plt.subplots(figsize=figsize)

# Now all we have to do is call our function for the ideogram data...
for collection in chromosome_collections(ideo, chrom_ybase, chrom_height):
ax.add_collection(collection)

# ...and the gene data
for collection in chromosome_collections(
genes, gene_ybase, gene_height, alpha=0.5, linewidths=0
):
ax.add_collection(collection)

# Axes tweaking
ax.set_yticks([chrom_centers[i] for i in chromosome_list])
ax.set_yticklabels(chromosome_list)
ax.axis('tight')

return ax

def convert_file2list(fn):
"""
Convert a text file to a list of filenames.
Expand Down
4 changes: 4 additions & 0 deletions fuc/api/pyvcf.py
Original file line number Diff line number Diff line change
Expand Up @@ -763,6 +763,8 @@ def has_chr_prefix(file, size=1000):
Return True if all of the sampled contigs from a VCF file have the
(annoying) 'chr' string.
For GRCh38, the HLA contigs will be ignored.
Parameters
----------
file : str
Expand All @@ -779,6 +781,8 @@ def has_chr_prefix(file, size=1000):
vcf = VariantFile(file)
for record in vcf.fetch():
n += 1
if record.chrom.startswith('HLA'):
continue
if 'chr' not in record.chrom:
return False
if n > size:
Expand Down
2 changes: 1 addition & 1 deletion fuc/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.37.0'
__version__ = '0.38.0'

0 comments on commit 7b0fbfb

Please sign in to comment.