【2.1】箱线图(seaborn-boxplot)

函数说明:

seaborn.boxplot(x=None, y=None, hue=None, data=None, order=None, hue_order=None, orient=None, color=None, palette=None, saturation=0.75, width=0.8, dodge=True, fliersize=5, linewidth=None, whis=1.5, notch=False, ax=None, **kwargs)

案例

案例1

>>> import seaborn as sns
>>> sns.set(style="whitegrid")
>>> tips = sns.load_dataset("tips")
>>> ax = sns.boxplot(x=tips["total_bill"])

案例2

>>> ax = sns.boxplot(x="day", y="total_bill", data=tips)

案例3:分类变量

>>> ax = sns.boxplot(x="day", y="total_bill", hue="smoker",
...                  data=tips, palette="Set3")

案例4:当某些数据为空时

>>> ax = sns.boxplot(x="day", y="total_bill", hue="time",
...                  data=tips, linewidth=2.5)

案例5 排序

>>> ax = sns.boxplot(x="time", y="tip", data=tips,
...                  order=["Dinner", "Lunch"])

案例6 水平boxplot

>>> iris = sns.load_dataset("iris")
>>> ax = sns.boxplot(data=iris, orient="h", palette="Set2")

案例6 不改变box大小

>>> tips["weekend"] = tips["day"].isin(["Sat", "Sun"])
>>> ax = sns.boxplot(x="day", y="total_bill", hue="weekend",
...                  data=tips, dodge=False)

案例7 显示点

>>> ax = sns.boxplot(x="day", y="total_bill", data=tips)
>>> ax = sns.swarmplot(x="day", y="total_bill", data=tips, color=".25")

案例8 图分页

>>> g = sns.catplot(x="sex", y="total_bill",
...                 hue="smoker", col="time",
...                 data=tips, kind="box",
...                 height=4, aspect=.7);

我的案例

案例一:

import seaborn as sns

import datetime
import matplotlib 

# matplotlib.use('Agg')
import matplotlib.pyplot as plt
from matplotlib.dates import date2num
from collections import Counter
import matplotlib.patches as mpatches
import numpy as np
import pandas as pd
from matplotlib import pyplot


df_1  = pd.read_csv('result/heavy_mismatch-2.tsv',sep='\t',index_col=0,header=None )
df_2  = pd.read_csv('result/light_mismatch-2.tsv',sep='\t',index_col=0,header=None )
df_3  = pd.read_csv('result/pair_mismatch-2.tsv',sep='\t',index_col=0,header=None )

df_1['category'] = 'Heavy'
df_2['category'] = 'Light'
df_3['category'] = 'Pair'

df = pd.concat([df_1,df_2,df_3])

sns.set(style="whitegrid")

a4_dims = (11.7, 8.27)
fig, ax = pyplot.subplots(figsize=a4_dims)


# ax = sns.violinplot(x=1, y="category",data=df, dodge=False)
ax = sns.boxplot(x=1, y="category",data=df)
ax = sns.swarmplot(x=1, y="category", data=df,  alpha=0.5,color='0.25')
ax.set(xlabel='# Mismatch Residue', ylabel='Chain')  # 添加xlabel,ylabel
plt.title('Mismatch Residues Boxplot')
ax.figure.savefig('pic/pair-mismatch-boxplot-2.jpeg')

案例二

import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import pyplot

df_raw = pd.read_table('5.blast_dir/hl_germline_blast_result_cutoff_80-high-resolution-famlily.tsv')
df_raw['HL_map']= df_raw['H_map'] +'-' +df_raw['L_map']
germline_pairs_raw = set([iii.split('_')[0] for iii in df_raw['HL_map']])
cut_frequency = 3

df=df_raw.copy()
for one_key in germline_pairs_raw:
    if len(df[df['HL_map']== one_key] ) <cut_frequency:

        df=df[~df['HL_map'].isin([one_key])]


pdb_ids = set([iii.split('_')[0] for iii in df['#PDB_chains']])
germline_pairs = set([iii.split('_')[0] for iii in df['HL_map']])

h_chains = set([iii.split('_')[0] for iii in df['H_map']])
l_chains = set([iii.split('_')[0] for iii in df['L_map']])
sorted_h_chains = sorted(h_chains)
sorted_l_chains = sorted(l_chains)

sorted_pairs = []
for ii in sorted_h_chains:
    for jj in sorted_l_chains:
        pair_hl = '%s-%s' % (ii,jj)
        if pair_hl in germline_pairs:
            sorted_pairs.append(pair_hl)

a4_dims = (32,22)
fig, ax = pyplot.subplots(figsize=a4_dims)

ax = sns.boxplot(x= 'HL_map',y='Angle',data=df,order=sorted_pairs)
ax.set(xlabel='Pair Germline(Fv Identity Cutoff >80%)', ylabel='Packing Angle')
plt.xticks(rotation=90)

## 给每个boxplot添加 点的个数
medians = []
nobs = []
for one_key in sorted_pairs:
    df_select = df[df['HL_map']==one_key]
    medians.append(df_select['Angle'].median())
    nobs.append(df_select['HL_map'].value_counts().values[0])
pos = range(len(nobs))
for tick,label in zip(pos,ax.get_xticklabels()):
    ax.text(pos[tick], medians[tick] + 0.03, nobs[tick],horizontalalignment='center', size=24, color='black', weight='semibold')

# 调整字体大小
plt.rc('xtick', labelsize=24) 
plt.rc('ytick', labelsize=24) 
plt.rc('axes', labelsize=24) 
plt.rc('axes', titlesize=24) 
plt.rc('legend', fontsize=24) 

# plt.title('Mismatch Residues Boxplot')

ax.text(0.8, 0.9, 'PDB IDs :%s \n Germline Pairs: %s' % (len(pdb_ids),len(germline_pairs)), horizontalalignment='left',  verticalalignment='center', transform=ax.transAxes,fontsize=28, color='red')

ax.figure.savefig('pic/fv_pair-family-germline-cutoff_80-high-resolution.jpeg')
print 'Finish!'

参考资料

个人公众号,比较懒,很少更新,可以在上面提问题,如果回复不及时,可发邮件给我: tiehan@sina.cn

Sam avatar
About Sam
专注生物信息 专注转化医学