# 【2.1.1】箱线图（seaborn-boxplot）

## 一、函数说明：

seaborn.boxplot(x=None, y=None, hue=None, data=None, order=None, hue_order=None, orient=None, color=None, palette=None, saturation=0.75, width=0.8, dodge=True, fliersize=5, linewidth=None, whis=1.5, notch=False, ax=None, **kwargs)

## 二、案例

### 案例1

>>> import seaborn as sns
>>> sns.set(style="whitegrid")
>>> ax = sns.boxplot(x=tips["total_bill"])


### 案例2

>>> ax = sns.boxplot(x="day", y="total_bill", data=tips)


### 案例3：分类变量

>>> ax = sns.boxplot(x="day", y="total_bill", hue="smoker",
...                  data=tips, palette="Set3")


### 案例4：当某些数据为空时

>>> ax = sns.boxplot(x="day", y="total_bill", hue="time",
...                  data=tips, linewidth=2.5)


### 案例5 排序

>>> ax = sns.boxplot(x="time", y="tip", data=tips,
...                  order=["Dinner", "Lunch"])


### 案例6 水平boxplot

>>> iris = sns.load_dataset("iris")
>>> ax = sns.boxplot(data=iris, orient="h", palette="Set2")


### 案例6 不改变box大小

>>> tips["weekend"] = tips["day"].isin(["Sat", "Sun"])
>>> ax = sns.boxplot(x="day", y="total_bill", hue="weekend",
...                  data=tips, dodge=False)


### 案例7 显示点

>>> ax = sns.boxplot(x="day", y="total_bill", data=tips)
>>> ax = sns.swarmplot(x="day", y="total_bill", data=tips, color=".25")


### 案例8 图分页

>>> g = sns.catplot(x="sex", y="total_bill",
...                 hue="smoker", col="time",
...                 data=tips, kind="box",
...                 height=4, aspect=.7);


### 案例8 boxplot不显示outlier

ax = sns.boxplot(data=phenotype_info_merge, orient="h", y="primary disease or tissue", x='value', hue='SampleType',
order=sorted_pairs,palette=choosed_color,flierprops=flierprops,hue_order=choosed_sample_sites，showfliers=False)


flierprops = dict(markerfacecolor='0.75', markersize=0.01,
linestyle='none')
ax = sns.boxplot(data=phenotype_info_merge, orient="h", y="primary disease or tissue", x='value', hue='SampleType',
order=sorted_pairs,palette=choosed_color,flierprops=flierprops,hue_order=choosed_sample_sites) #


### 案例9 boxplot的box线条调整颜色

import matplotlib.pyplot as plt
import seaborn as sns

fig,(ax1,ax2) = plt.subplots(2)

sns.set_style("whitegrid")

sns.boxplot(x="day", y="total_bill", hue="smoker", data=tips, palette="Set1", ax=ax1)
sns.boxplot(x="day", y="total_bill", hue="smoker", data=tips, palette="Set1", ax=ax2)

for i,artist in enumerate(ax2.artists):
# Set the linecolor on the artist to the facecolor, and set the facecolor to None
col = artist.get_facecolor()
artist.set_edgecolor(col)
artist.set_facecolor('None')

# Each box has 6 associated Line2D objects (to make the whiskers, fliers, etc.)
# Loop over them here, and use the same colour as above
for j in range(i*6,i*6+6):
line = ax2.lines[j]
line.set_color(col)
line.set_mfc(col)
line.set_mec(col)

# Also fix the legend
for legpatch in ax2.get_legend().get_patches():
col = legpatch.get_facecolor()
legpatch.set_edgecolor(col)
legpatch.set_facecolor('None')

plt.show()


flierprops = dict(markerfacecolor='0.75', markersize=0.01,
linestyle='none')
ax = sns.boxplot(data=phenotype_info_merge, orient="h", y="primary disease or tissue", x='value', hue='SampleType',
order=sorted_pairs,palette=choosed_color,flierprops=flierprops,hue_order=choosed_sample_sites) #


### 案例10 title背景色占满

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

s = pd.Series(np.random.lognormal(.001, .01, 100))
ax = s.cumprod().plot()
title = ax.set_title('My Log Normal Example', position=(.5, 1.02),
backgroundcolor='black', color='white',
verticalalignment="bottom", horizontalalignment="center")

title._bbox_patch._mutation_aspect = 0.04  ## 这个参数控制title高度
plt.tight_layout()
plt.savefig(__file__+".png")
plt.show()


### 案例11 legend放到图外

plt.subplots_adjust(bottom=.06, top=.91, left=.50, right=.82, hspace=0.2) #控制box图的位置，给lend预留出空间


### 案例12 指定box的颜色

# libraries & dataset
import seaborn as sns
import matplotlib.pyplot as plt
# set a grey background (use sns.set_theme() if seaborn version 0.11.0 or above)
sns.set(style="darkgrid")

my_pal = {species: "r" if species == "versicolor" else "b" for species in df.species.unique()}
sns.boxplot( x=df["species"], y=df["sepal_length"], palette=my_pal)
plt.show()


## 三、我的案例

import seaborn as sns

import datetime
import matplotlib

# matplotlib.use('Agg')
import matplotlib.pyplot as plt
from matplotlib.dates import date2num
from collections import Counter
import matplotlib.patches as mpatches
import numpy as np
import pandas as pd
from matplotlib import pyplot

df_1['category'] = 'Heavy'
df_2['category'] = 'Light'
df_3['category'] = 'Pair'

df = pd.concat([df_1,df_2,df_3])

sns.set(style="whitegrid")

a4_dims = (11.7, 8.27)
fig, ax = pyplot.subplots(figsize=a4_dims)

# ax = sns.violinplot(x=1, y="category",data=df, dodge=False)
ax = sns.boxplot(x=1, y="category",data=df)
ax = sns.swarmplot(x=1, y="category", data=df,  alpha=0.5,color='0.25')
ax.set(xlabel='# Mismatch Residue', ylabel='Chain')  # 添加xlabel,ylabel
plt.title('Mismatch Residues Boxplot')
ax.figure.savefig('pic/pair-mismatch-boxplot-2.jpeg')


### 案例二

import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import pyplot

df_raw['HL_map']= df_raw['H_map'] +'-' +df_raw['L_map']
germline_pairs_raw = set([iii.split('_')[0] for iii in df_raw['HL_map']])
cut_frequency = 3

df=df_raw.copy()
for one_key in germline_pairs_raw:
if len(df[df['HL_map']== one_key] ) <cut_frequency:

df=df[~df['HL_map'].isin([one_key])]

pdb_ids = set([iii.split('_')[0] for iii in df['#PDB_chains']])
germline_pairs = set([iii.split('_')[0] for iii in df['HL_map']])

h_chains = set([iii.split('_')[0] for iii in df['H_map']])
l_chains = set([iii.split('_')[0] for iii in df['L_map']])
sorted_h_chains = sorted(h_chains)
sorted_l_chains = sorted(l_chains)

sorted_pairs = []
for ii in sorted_h_chains:
for jj in sorted_l_chains:
pair_hl = '%s-%s' % (ii,jj)
if pair_hl in germline_pairs:
sorted_pairs.append(pair_hl)

a4_dims = (32,22)
fig, ax = pyplot.subplots(figsize=a4_dims)

ax = sns.boxplot(x= 'HL_map',y='Angle',data=df,order=sorted_pairs)
ax.set(xlabel='Pair Germline(Fv Identity Cutoff >80%)', ylabel='Packing Angle')
plt.xticks(rotation=90)

## 给每个boxplot添加 点的个数
medians = []
nobs = []
for one_key in sorted_pairs:
df_select = df[df['HL_map']==one_key]
medians.append(df_select['Angle'].median())
nobs.append(df_select['HL_map'].value_counts().values[0])
pos = range(len(nobs))
for tick,label in zip(pos,ax.get_xticklabels()):
ax.text(pos[tick], medians[tick] + 0.03, nobs[tick],horizontalalignment='center', size=24, color='black', weight='semibold')

# 调整字体大小
plt.rc('xtick', labelsize=24)
plt.rc('ytick', labelsize=24)
plt.rc('axes', labelsize=24)
plt.rc('axes', titlesize=24)
plt.rc('legend', fontsize=24)

# plt.title('Mismatch Residues Boxplot')

ax.text(0.8, 0.9, 'PDB IDs :%s \n Germline Pairs: %s' % (len(pdb_ids),len(germline_pairs)), horizontalalignment='left',  verticalalignment='center', transform=ax.transAxes,fontsize=28, color='red')

ax.figure.savefig('pic/fv_pair-family-germline-cutoff_80-high-resolution.jpeg')
print 'Finish!'