# 【2.1.2】带边界的气泡图(Bubble plot with Encircling)

## 一、案例

from matplotlib import patches
from scipy.spatial import ConvexHull
import warnings; warnings.simplefilter('ignore')
sns.set_style("white")

# Step 1: Prepare Data

# As many colors as there are unique midwest['category']
categories = np.unique(midwest['category'])
colors = [plt.cm.tab10(i/float(len(categories)-1)) for i in range(len(categories))]

# Step 2: Draw Scatterplot with unique color for each category
fig = plt.figure(figsize=(16, 10), dpi= 80, facecolor='w', edgecolor='k')

for i, category in enumerate(categories):
plt.scatter('area', 'poptotal', data=midwest.loc[midwest.category==category, :], s='dot_size', c=colors[i], label=str(category), edgecolors='black', linewidths=.5)

# Step 3: Encircling
# https://stackoverflow.com/questions/44575681/how-do-i-encircle-different-data-sets-in-scatter-plot
def encircle(x,y, ax=None, **kw):
if not ax: ax=plt.gca()
p = np.c_[x,y]
hull = ConvexHull(p)
poly = plt.Polygon(p[hull.vertices,:], **kw)

# Select data to be encircled
midwest_encircle_data = midwest.loc[midwest.state=='IN', :]

# Draw polygon surrounding vertices
encircle(midwest_encircle_data.area, midwest_encircle_data.poptotal, ec="k", fc="gold", alpha=0.1)
encircle(midwest_encircle_data.area, midwest_encircle_data.poptotal, ec="firebrick", fc="none", linewidth=1.5)

# Step 4: Decorations
plt.gca().set(xlim=(0.0, 0.1), ylim=(0, 90000),
xlabel='Area', ylabel='Population')

plt.xticks(fontsize=12); plt.yticks(fontsize=12)
plt.title("Bubble Plot with Encircling", fontsize=22)
plt.legend(fontsize=12)
plt.show()


## 二、案例解读

### 2.1 确定边界

from scipy.spatial import ConvexHull, convex_hull_plot_2d
points = np.random.rand(30, 2)   # 30 random points in 2-D
hull = ConvexHull(points)
print hull

import matplotlib.pyplot as plt
plt.plot(points[:,0], points[:,1], 'o')

for simplex in hull.simplices:
print simplex,points[simplex, 0]
plt.plot(points[simplex, 0], points[simplex, 1], 'k-')

plt.plot(points[hull.vertices,0], points[hull.vertices,1], 'r--', lw=2)
plt.plot(points[hull.vertices[0],0], points[hull.vertices[0],1], 'ro')
plt.show()


print simplex,points[simplex, 0]

[20 17] [0.22706164 0.00547213]
[20 29] [0.22706164 0.69636975]
...
[27  4] [0.25261135 0.1616203 ]
[27  8] [0.25261135 0.55498599]

print points

[[0.75991856 0.83036497]
[0.83861989 0.72823694]
...
[0.29919865 0.55682239]
[0.25261135 0.86871164]
[0.16625568 0.41353741]
[0.69636975 0.03329878]]

print hull.simplices

[[20 17]
[20 29]
[21 29]
[15 17]
[15  4]
[22 11]
[22 21]
[ 0 11]
[ 0  8]
[27  4]
[27  8]]

print hull.vertices
[17 20 29 21 22 11  0  8 27  4 15]


• hull.vertices是确定为边界的一个point的在数据List中的位置
• hull.simplices是外边界两两紧挨着的点的hull.vertices
• 更多解读，以后用的时候再来补充

### 2.2 关于enricle

import matplotlib.pyplot as plt
import numpy as np; np.random.seed(1)
from scipy.spatial import ConvexHull

x1, y1 = np.random.normal(loc=5, scale=2, size=(2,15))
x2, y2 = np.random.normal(loc=8, scale=2.5, size=(2,13))

plt.scatter(x1, y1)
plt.scatter(x2, y2)

def encircle(x,y, ax=None, **kw):
if not ax: ax=plt.gca()
p = np.c_[x,y]
hull = ConvexHull(p)
poly = plt.Polygon(p[hull.vertices,:], **kw)

encircle(x1, y1, ec="k", fc="gold", alpha=0.2)
encircle(x2, y2, ec="orange", fc="none")

plt.show()


import matplotlib.pyplot as plt
import numpy as np; np.random.seed(1)
from scipy.spatial import ConvexHull

x1, y1 = np.random.normal(loc=5, scale=2, size=(2,15))
x2, y2 = np.random.normal(loc=8, scale=2.5, size=(2,13))

plt.scatter(x1, y1)
plt.scatter(x2, y2)

def encircle2(x,y, ax=None, **kw):
if not ax: ax=plt.gca()
p = np.c_[x,y]
mean = np.mean(p, axis=0)
d = p-mean
r = np.max(np.sqrt(d[:,0]**2+d[:,1]**2 ))

encircle2(x1, y1, ec="k", fc="gold", alpha=0.2)
encircle2(x2, y2, ec="orange", fc="none")

plt.gca().relim()
plt.gca().autoscale_view()
plt.show()