#!/usr/bin/env python3
import pandas
from matplotlib import pyplot as plt
from matplotlib.collections import BrokenBarHCollection
from matplotlib.patches import Patch
from cnvfinder.utils import resource_path_or_exit
"""
attribution: this file (ideogram.py) is based on https://gist.github.com/daler/c98fc410282d7570efc3
"""
def chromosome_collections(df: pandas.DataFrame, y_positions: dict, height: float,
to_log: bool=False, **kwargs):
"""
Yields BrokenBarHCollection of features that can be added to an Axes object
:param bool to_log: whether to log info
:param DataFrame df: must at least have columns ['chrom', 'chromStart', 'chromEnd', 'colors']. If no column 'width',
it will be calculated from start/end
:param dict y_positions: keys are chromosomes, value are y-value at which to anchor the BrokenBarHCollection
:param float height: height of each BrokenBarHCollection
:param kwargs: are passed to BrokenBarHCollection
:return: BrokenBarHCollection
"""
del_width = False
if 'width' not in df.columns:
del_width = True
df['width'] = df['chromEnd'] - df['chromStart']
for chrom, group in df.groupby('chrom'):
if to_log:
print(chrom)
yrange = (y_positions[chrom], height)
xranges = group[['chromStart', 'width']].values
yield BrokenBarHCollection(
xranges, yrange, facecolors=group['colors'], **kwargs)
if del_width:
del df['width']
[docs]class Ideogram(object):
"""
Create ideograms
:param str file: file to load chromosome bands data. Default: 'cytoBand' table from https://genome.ucsc.edu/cgi-bin/hgTables.
:param list chroms: plot only chromosomes that are in this list. Default: ['chr%s' % i for i in list(range(1, 23)) + ['M', 'X', 'Y']]
:param float chrom_height: height of each ideogram
:param float chrom_spacing: spacing between consecutive ideograms
:param tuple fig_size: width and height in inches
:param dict colors: colors for different chromosome stains
:param bool to_log: whether to print log info
"""
def __init__(self, file: str = None, chroms: list = None, chrom_height: float = 1,
chrom_spacing: float = 1.5, fig_size: tuple = None, colors: dict = None,
to_log=False):
self.to_log = to_log
self.chrom_height = chrom_height
self.chrom_spacing = chrom_spacing
self._colors = self.colors = colors
self._chroms = self.chroms = chroms
self._fig_size = self.fig_size = fig_size
self.file = file
self.__pgk_bands_file = 'data/cytoBand.txt'
self.df = self.load_bands()
# chromosomes
self.ybase = 0
self.chrom_ybase = {}
self.chrom_centers = {}
self.fig, self.ax = self.add_chromosomes()
@property
def colors(self):
return self._colors
@colors.setter
def colors(self, value):
default_colors = {
'gneg': (1., 1., 1.),
'gpos25': (.6, .6, .6),
'gpos50': (.4, .4, .4),
'gpos75': (.2, .2, .2),
'gpos100': (0., 0., 0.),
'acen': (.8, .4, .4),
'gvar': (.8, .8, .8),
'stalk': (.9, .9, .9)}
if value is None:
self._colors = default_colors
else:
self._colors = value
@property
def chroms(self):
return self._chroms
@chroms.setter
def chroms(self, value):
if value is None:
self._chroms = ['chr%s' % i for i in list(range(1, 23)) + ['M', 'X', 'Y']]
else:
self._chroms = value
@property
def fig_size(self):
return self._fig_size
@fig_size.setter
def fig_size(self, value):
if value is None:
self._fig_size = (12, len(self.chroms))
else:
self._fig_size = value
[docs] def add_chromosomes(self):
"""
Add chromosome ideograms
:return: fig and ax
"""
for chrom in self.chroms[::-1]:
self.chrom_ybase[chrom] = self.ybase
self.chrom_centers[chrom] = self.ybase + self.chrom_height / 2.
self.ybase += self.chrom_height + self.chrom_spacing
fig = plt.figure(figsize=self.fig_size)
ax = fig.add_subplot(111)
if self.to_log:
print("adding ideograms...")
for collection in chromosome_collections(self.df, self.chrom_ybase, self.chrom_height, edgecolors=(0, 0, 0)):
ax.add_collection(collection)
# axes tweaking
ax.set_yticks([self.chrom_centers[i] for i in self.chroms])
ax.set_yticklabels(self.chroms)
ax.xaxis.set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(False)
return fig, ax
[docs] def save(self, filename: str, **kwargs):
"""
Save ideograms in a file
:param str filename: filename
:param kwargs: are passed to pyplot.savefig
"""
self.ax.axis('tight')
self.fig.savefig(filename, **kwargs)
def show(self):
self.ax.axis('tight')
self.fig.show()
def load_bands(self):
file = self.file if self.file is not None else resource_path_or_exit(self.__pgk_bands_file)
df = pandas.read_table(file, skiprows=1, names=['chrom', 'chromStart', 'chromEnd', 'name', 'gieStain'])
df = self.filter_by_chroms(df)
df['width'] = df.chromEnd - df.chromStart
df['colors'] = df.gieStain.apply(lambda x: self.colors[x])
return df
def filter_by_chroms(self, df: pandas.DataFrame) -> pandas.DataFrame:
return df[df.chrom.apply(lambda x: x in self.chroms)].copy()
[docs] def add_data(self, df: pandas.DataFrame, height: float = 0.5, padding: float = 0.1,
color: str = '#2243a8', alpha: float = 0.5, linewidths: float = 0, **kwargs):
"""
Add (genomic) data in the plot
:param DataFrame df: data
:param float height: height of genomic track. Should be smaller than 'chrom_spacing'
:param float padding: padding between the top of a genomic track and its corresponding ideogram
:param str color: track's color. It will be used in case 'colors' not in df.columns
:param float alpha: alpha value used for blending
:param float linewidths: line widths
:param kwargs: are passed to BrokenBarHCollection
"""
df = self.filter_by_chroms(df)
if 'colors' not in df.columns:
df['colors'] = color
data_ybase = {}
for chrom in self.chroms:
data_ybase[chrom] = self.chrom_ybase[chrom] + (height + padding)
for collection in chromosome_collections(df, data_ybase, abs(height),
alpha=alpha, linewidths=linewidths, **kwargs):
self.ax.add_collection(collection)
[docs] def add_data_above(self, df: pandas.DataFrame, color: str = None):
"""
Wrapper for adding data above ideograms
:param str color: bars color
:param DataFrame df: data
"""
self.add_data(df, height=0.5, padding=0.6, color=color)
[docs] def add_data_below(self, df: pandas.DataFrame, color: str = None):
"""
Wrapper for adding data below ideograms
:param str color: bars color
:param DataFrame df: data
"""
self.add_data(df, height=-0.5, padding=-0.1, color=color)
[docs] def add_legend(self, to_patches: list, loc='lower right', **kwargs):
"""
Create a legend base on to_patches list
:param list to_patches: list of dict -> {color: color, label: label}
:param str loc: legend location
:param kwargs: are passed to pyplot.legend
"""
patches = [Patch(color=p['color'], label=p['label']) for p in to_patches]
self.fig.legend(handles=patches, loc=loc, **kwargs)