【2.1】条形图(matplotlib-bar)

一、案例

1.1 并排放

import matplotlib.pyplot as plt
from matplotlib.dates import date2num
import datetime

x = [datetime.datetime(2011, 1, 4, 0, 0),
     datetime.datetime(2011, 1, 5, 0, 0),
     datetime.datetime(2011, 1, 6, 0, 0)]
x = date2num(x)

y = [4, 9, 2]
z=[1,2,3]
k=[11,12,13]

ax = plt.subplot(111)
ax.bar(x-0.2, y,width=0.2,color='b',align='center')
ax.bar(x, z,width=0.2,color='g',align='center')
ax.bar(x+0.2, k,width=0.2,color='r',align='center')
ax.xaxis_date()

plt.show()

png

二、水平条形图

代码:

import matplotlib.pyplot as plt
import numpy as np

# Fixing random state for reproducibility
np.random.seed(19680801)


plt.rcdefaults()
fig, ax = plt.subplots()

# Example data
people = ('Tom', 'Dick', 'Harry', 'Slim', 'Jim')
y_pos = np.arange(len(people))
performance = 3 + 10 * np.random.rand(len(people))
error = np.random.rand(len(people))

ax.barh(y_pos, performance, xerr=error, align='center',
        color='green', ecolor='black')
ax.set_yticks(y_pos)
ax.set_yticklabels(people)
ax.invert_yaxis()  # labels read top-to-bottom
ax.set_xlabel('Performance')
ax.set_title('How fast do you want to go today?')

plt.show()

三、我的案例

代码:

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.dates import date2num
import datetime
import matplotlib.patches as mpatches

aa = pd.read_csv("absim_data_loc_mutated_freq.tsv",sep='\t',index_col=False)
# a2 = a1[25:35]
# a3 = a1[49:66]
# aa = pd.concat([a2,a3])

bb = pd.read_csv("ig_loc_mutated_freq.tsv",sep='\t',index_col=False)
# b2 = b1[25:35]
# b3 = b1[49:66]
# bb = pd.concat([b2,b3])

x = np.array(aa['#AA_Loc'])

# ax = plt.subplot(111)
f,(ax,ax2) = plt.subplots(1,2,sharey=True, facecolor='w')


aa_list = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y']

NUM_COLORS = len(aa_list)
cm = plt.get_cmap('gist_rainbow')
bottom_start = 0
bottom_start2 = 0
color_patch = []


d = .015  # how big to make the diagonal lines in axes coordinates
# arguments to pass to plot, just so we don't keep repeating them

for ii in range(len(aa_list)):
    aa_base = aa_list[ii]

    one_color = cm(ii//3*3.0/NUM_COLORS)
    color_patch.append(mpatches.Patch(color=one_color, label=aa_base))

#   kwargs = dict(transform=ax.transAxes, clip_on=False)
    ax.bar(x-0.2, np.array(aa[aa_base]),width=0.2,color=one_color,bottom=bottom_start,align='center')
    ax2.bar(x-0.2, np.array(aa[aa_base]),width=0.2,color=one_color,bottom=bottom_start,align='center')

    bottom_start += np.array(aa[aa_base])                         


    ax.bar(x, np.array(bb[aa_base]),width=0.2,color=one_color,bottom=bottom_start2,align='center')
    ax2.bar(x, np.array(bb[aa_base]),width=0.2,color=one_color,bottom=bottom_start2,align='center')

    bottom_start2 += np.array(bb[aa_base])                         


# ax.bar(x+0.2, k,width=0.2,color='r',align='center')


ax.set_xlim(24, 37)  # outliers only
ax2.set_xlim(47, 68)  # most of the data

# hide the spines between ax and ax2
ax.spines['right'].set_visible(False)
ax2.spines['left'].set_visible(False)
ax.yaxis.tick_left()
ax.tick_params(labelright='off')
ax2.yaxis.tick_right()

# plt.axis([25,16,37,18],['a','b','c','d'])

d = .015 # how big to make the diagonal lines in axes coordinates
# arguments to pass plot, just so we don't keep repeating them
kwargs = dict(transform=ax.transAxes, color='k', clip_on=False)
ax.plot((1-d,1+d), (-d,+d), **kwargs)
ax.plot((1-d,1+d),(1-d,1+d), **kwargs)

kwargs.update(transform=ax2.transAxes)  # switch to the bottom axes
ax2.plot((-d,+d), (1-d,1+d), **kwargs)
ax2.plot((-d,+d), (-d,+d), **kwargs)


plt.legend(bbox_to_anchor=(1.02, 1), loc=2, borderaxespad=0.,handles=color_patch, prop={'size': 7})
# prop={'size': 7}调整legend大小

plt.text(1.8, 0.9, 'AbSim | IgSimulator', horizontalalignment='center',verticalalignment='center', transform=ax.transAxes)
plt.xlabel('AA Location of Sequence')
ax.xaxis.set_label_coords(-0.2, -0.1)  # 调整x轴label的位置
ax2.xaxis.set_label_coords(-0.2, -0.1) 

plt.ylabel('AA bases Percent (%)')
ax.yaxis.set_label_coords(-1.4, 0.5) # 调整y轴label的位置
ax2.yaxis.set_label_coords(-1.4, 0.5)

plt.title('SHM Mutation Distribution',x=-0.1) #调整title的x轴的位置

# plt.xticks(np.arange(x.min(), x.max(), 1))


#plt.savefig('shm_mutated_distibution.jpeg',dpi=400) #保存图片
plt.show()

注:

bottom 相当于定位bar的起始位置,不断的往上加

案例2

代码

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.dates import date2num
import datetime
import matplotlib.patches as mpatches

aa = pd.read_csv("ig_loc_mutated_nucl_freq.tsv",sep='\t',index_col=False)
bb = pd.read_csv("ig_bases_mutated_nucl_freq.tsv",sep='\t',index_col=False,header=None,skip_blank_lines=True)

aa2 = pd.read_csv("absim_data_loc_mutated_nucl_freq.tsv",sep='\t',index_col=False)
bb2 = pd.read_csv("absim_data_bases_mutated_nucl_freq.tsv",sep='\t',index_col=False,header=None,skip_blank_lines=True)

aa3 = pd.read_csv("shazam_loc_mutated_freq_nucl.tsv",sep='\t',index_col=False)
bb3 = pd.read_csv("shazam_bases_mutated_freq_nucl.tsv",sep='\t',index_col=False,header=None,skip_blank_lines=True)


# plt.rc('axes', labelsize=5)
plt.rc('xtick', labelsize=6) 
plt.rc('ytick', labelsize=6) 

fig = plt.figure() 

# ax = fig.add_subplot(111)    # The big subplot
ax1 = fig.add_subplot(3,2,1)
ax2 = fig.add_subplot(3,2,2)
ax3 = fig.add_subplot(3,2,3)
ax4 = fig.add_subplot(3,2,4)
ax5 = fig.add_subplot(3,2,5)
ax6 = fig.add_subplot(3,2,6)

aa_list = ['A', 'T','C','G']

NUM_COLORS = len(aa_list)
cm = ['y','b','r','g']

bottom_start = 0
bottom_start2 = 0
bottom_start3 = 0
color_patch = []

for ii in range(len(aa_list)):
    aa_base = aa_list[ii]

    one_color = cm[ii]
    color_patch.append(mpatches.Patch(color=one_color, label=aa_base))
    ax2.bar(np.array(aa['#AA_Loc']), np.array(aa[aa_base]),color=one_color,bottom=bottom_start,align='center')
    bottom_start += np.array(aa[aa_base])

x1 =[76,76]
x2= [105,105]
x3= [148,148]
x4= [198,198]
y = [0,5]

# ax2.set_ylabel('')
# ax2.set_xlim(0, 287) 
ax2.set_ylim(0, 5) 
ax2.plot(x1, y, '--', picker=5,color='r')
ax2.plot(x2, y, '--', picker=5,color='r')
ax2.plot(x3, y, '--', picker=5,color='g')
ax2.plot(x4, y, '--', picker=5,color='g')
ax2.text(77, 4, 'CDR1', fontsize=6,color='r')
ax2.text(155, 4, 'CDR2', fontsize=6,color='g')

ax1.set_xlim(0, 20) 
ax1.bar( np.array(bb[0]), np.array(bb[1]))    
ax1.text(14, 17, 'IgSimulator\nNum:1000000', fontsize=6,color='r')


for ii in range(len(aa_list)):
    aa_base = aa_list[ii]
    one_color = cm[ii]
    ax4.bar(np.array(aa2['#AA_Loc']), np.array(aa2[aa_base]),color=one_color,bottom=bottom_start2,align='center')
    bottom_start2 += np.array(aa2[aa_base])

ax3.bar( np.array(bb2[0]), np.array(bb2[1])) 
ax3.set_xlim(0, 60)
ax3.text(44, 4.2, 'AbSim_Data\nNum:100000', fontsize=6,color='r')

ax4.set_ylim(0, 75) 
y2 = [0,75]
ax4.plot(x1, y2, '--', picker=5,color='r')
ax4.plot(x2, y2, '--', picker=5,color='r')
ax4.plot(x3, y2, '--', picker=5,color='g')
ax4.plot(x4, y2, '--', picker=5,color='g')

for ii in range(len(aa_list)):
    aa_base = aa_list[ii]
    one_color = cm[ii]
    ax6.bar(np.array(aa3['#AA_Loc']), np.array(aa3[aa_base]),color=one_color,bottom=bottom_start3,align='center')
    bottom_start3 += np.array(aa3[aa_base])

ax6.set_ylim(0, 7) 
y3 = [0,7]
ax6.plot(x1, y3, '--', picker=5,color='r')
ax6.plot(x2, y3, '--', picker=5,color='r')
ax6.plot(x3, y3, '--', picker=5,color='g')
ax6.plot(x4, y3, '--', picker=5,color='g')


ax5.bar( np.array(bb[0]), np.array(bb[1])) 
ax5.set_xlim(0, 20)
ax5.text(14, 17, 'ShaZam\nNum:9039753', fontsize=6,color='r')


title_font = {'fontname':'Arial', 'size':'10', 'color':'black', 'weight':'normal',
              'verticalalignment':'bottom'}
ax1.set_title('Histogram of Mutated Bases Frequency',**title_font)
ax2.set_title('Barplot of Base Mutated Frequency',**title_font)

ax3.set_ylabel('Frequency (%)',size=10)

ax5.set_xlabel('# Mutated Bases',size=10)
ax6.set_xlabel('Base Loc',size=10)

plt.legend(bbox_to_anchor=(0.02, 3.4), loc=2, borderaxespad=0.,handles=color_patch, prop={'size': 6})

plt.savefig('shm_mutated_distibution_1.jpeg',dpi=400)
# plt.show()

案例3

代码

import matplotlib 

# matplotlib.use('Agg')
import matplotlib.pyplot as plt
from collections import Counter
import matplotlib.patches as mpatches

plt.rcdefaults()

list1 = []
list2 = []
list3 = []

input_file = 'result/adimab-ident-2.tsv'

with open(input_file) as data1:
    for each_line in data1:
        if each_line.strip() =='' or each_line.startswith('Query'):
            continue
        cnt = each_line.strip().split('\t')
        hit_name = cnt[1]
        list1.append(hit_name)
        list2.append(hit_name.split("*")[0])
        list3.append(hit_name.split("-")[0].split('/')[0].replace('D',''))

list1_count = Counter(list1)
list2_count = Counter(list2)
list3_count = Counter(list3)

ax = plt.subplot(111)

data_x = []
data_y = []
ticks_name = []
num = 0

data_x_1 = []
data_x_2 = []
data_x_3 = []
data_y_1 = []
data_y_2 = []
data_y_3 = []

sorted_list = sorted(list3_count.items(),key=lambda(k,v):k,reverse=True) # 用名字来排序
for one_key in sorted_list:
    num +=1 
    data_x.append(num)
    data_y.append(one_key[1])
    ticks_name.append(one_key[0])
    if 'HV' in one_key[0]:
        data_x_1.append(num)
        data_y_1.append(one_key[1])
    elif 'KV' in one_key[0]:
        data_x_2.append(num)
        data_y_2.append(one_key[1])
    elif 'LV' in one_key[0]:
        data_x_3.append(num)
        data_y_3.append(one_key[1])

ax.barh(data_x_1, data_y_1,color='b',align='center')  # 横着的bar
ax.barh(data_x_2, data_y_2,color='r',align='center')
ax.barh(data_x_3, data_y_3,color='g',align='center')

ax.set_xlabel('# Num')
ax.set_ylabel('Gene name')

# 加legand
blue_patch = mpatches.Patch(color='blue', label='IGHV')
red_patch = mpatches.Patch(color='red', label='IGKV')
green_patch = mpatches.Patch(color='green', label='IGLV')

plt.legend(handles=[blue_patch,red_patch,green_patch])

# plt.xticks(rotation=90) # 选择xticks
ax.set_yticks(data_x)
ax.set_yticklabels(ticks_name)

fig = matplotlib.pyplot.gcf()
fig.set_size_inches(12, 9.5)
plt.savefig('pic/family-2.jpeg',dpi=400)
plt.show()

参考资料

个人公众号,比较懒,很少更新,可以在上面提问题,如果回复不及时,可发邮件给我: tiehan@sina.cn

Sam avatar
About Sam
专注生物信息 专注转化医学