[Cheat Sheet] Pairplot - 여러 변수간 산점도
요약 :
import os
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
sns.PairGrid
np.random.seed(seed=2020)
x1 = np.random.normal(size=100) + 20
e = np.random.normal(size=100)
x2 = 2*x1 + 10 + e
x3 = np.random.normal(size=100) * 2
x4 = 0.5*x3 + 0.5*x2 + 5*e
z = np.random.binomial(n=1, p=0.4, size=100).astype(str)
data = pd.DataFrame({'x1':x1,
'x2':x2,
'x3':x3,
'x4':x4,
'z':z})
data.head()
x1 | x2 | x3 | x4 | z | |
---|---|---|---|---|---|
0 | 18.231154 | 47.013757 | -0.970496 | 25.778870 | 1 |
1 | 20.075552 | 49.000093 | 0.544443 | 19.017209 | 0 |
2 | 18.869370 | 47.493437 | 0.965801 | 23.003100 | 0 |
3 | 19.348570 | 48.527971 | 1.170103 | 24.003192 | 1 |
4 | 19.106884 | 48.755688 | 0.834740 | 27.504812 | 0 |
g = sns.PairGrid(data, vars=['x1','x2','x3','x4'], hue='z', palette='RdBu_r')
g.map(plt.scatter, alpha=0.8)
g.add_legend()
<seaborn.axisgrid.PairGrid at 0x20c8c1875b0>