天天看点

Seaborn可视化常见操作

主要是Seaborn常见的操作

  • Seaborn 基础用法
import seaborn as sns
import numpy as np

import matplotlib.pyplot as plt


def sinplot(flip=1):
    x = np.linspace(0, 14, 100)
    for i in range(1, 7):
        plt.plot(x, np.sin(x + i * .5) * (7 - i) * flip)

# 5种 主题风格
# darkgrid
# whitegrid
# dark
# white
# ticks
# sns.set()
# sinplot()

# sns.set_style('whitegrid')
data = np.random.normal(size=(20, 6)) + np.arange(6) / 2
#
# sns.boxplot(data = data)
# sns中 ticks 风格, 可以把上面和右边刻度去掉
# sns.set_style('ticks')
# sinplot()
# sns.despine()

# 设置离x轴的距离
# sns.violinplot(data)
# sns.despine(offset=30)

# 保留x轴或者y轴
# sns.set_style('whitegrid')
# data = np.random.normal(size=(20, 6)) + np.arange(6) / 2
# sns.boxplot(data = data, palette='deep')
# sns.despine(left=True)

# 指定风格, 可以使用with
# with sns.axes_style('darkgrid'):
#     plt.subplot(211)
#     sinplot()
# plt.subplot(212)
# sinplot(-1)

# 大小风格
# sns.set()
# sns.set_context('paper')
# plt.figure(figsize=(8, 6))
# sinplot()

# sns.set()
# sns.set_context('talk')
# plt.figure(figsize=(8, 6))
# sinplot()
#
# sns.set()
# sns.set_context('poster')
# plt.figure(figsize=(8, 6))
# sinplot()
# font_scale 字体 后面是线条
sns.set_context('notebook', font_scale=1.5, rc={'lines.linewidth': 4.5})
sinplot()
           
  • 颜色的相关设定
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
sns.set(rc={'figure.figsize': (6, 6)})

# 调色板
# 颜色很重要

# 分类色板, 默认提供六个颜色
# current_palette = sns.color_palette()
# sns.palplot(current_palette)

# 当颜色超过6个, 需要提供一些圆形画板
# 最常用的颜色空间是使用hls颜色空间
# sns.palplot(sns.color_palette('hls', 8))
#
# data = np.random.normal(size=(20, 8)) + np.arange(8) / 2
# sns.set()
# sns.boxplot(data=data, palette=sns.color_palette('hls', 8))

# hls_palette() 函数是用来控制亮度和饱和度
# l - 亮度  s- 饱和度
# sns.palplot(sns.hls_palette(8, l=.7, s=.9))
sns.palplot(sns.color_palette('Paired', 10))
           
  • 单变量绘图
import seaborn as sns
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats, integrate


sns.set(color_codes=True)
np.random.seed(sum(map(ord, 'distributions')))

# x = np.random.normal(size=100)
# #sns.distplot(x, kde=False)
# # sns.distplot(x, kde=False, bins=20)
#
# # 数据分布
# x = np.random.gamma(6, size=200)
# sns.distplot(x, kde=False, fit=stats.gamma)
#
# # 根据均值和方差生成数据
# mean, cov = [0, 1],[(1, .5), (.5, 1)]
#
# data = np.random.multivariate_normal(mean, cov, 200)
# df = pd.DataFrame(data, columns=['x', 'y'])
# print(df)
#
# # 观察两个变量之间的关系最好用散点图
# sns.jointplot(x='x', y='y', data=df)
#
# # 另外一种方式
# with sns.axes_style('white'):
#     sns.jointplot(x='x', y='y', kind='hex', data=df)

           
  • 多变量绘图
# iris = sns.load_dataset('iris')
# sns.pairplot(iris)

# 回归分析绘图

import seaborn as sns
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

sns.set(color_codes=True)
np.random.seed(sum(map(ord, 'distributions')))

titanic = sns.load_dataset('titanic')
iris = sns.load_dataset('iris')
tips = pd.read_csv('tips.csv', delimiter='\t')

# sns.stripplot(x='day', y='total_bill', data=tips)

# 如果数据量过大,加上参数
# sns.stripplot(x='day', y='total_bill', data=tips, jitter=True)

# sns.swarmplot(x='day', y='total_bill', data=tips)

# 加上属性值

# sns.swarmplot(x='day', y='total_bill', data=tips, hue='sex')
# sns.swarmplot(x='day', y='total_bill', data=tips, hue='time')

# 盒图
# sns.boxplot(x='day', y='total_bill', data=tips, hue='time')
# 小提琴图
# sns.violinplot(x='day', y='total_bill', data=tips, hue='time')
# sns.violinplot(x='day', y='total_bill', data=tips, hue='time', split=True)

# 显示值的集中趋势
# sns.boxplot(x='sex', y='survived', hue='class', data=titanic)

# 点图可以更好的描述变化差异
# sns.pointplot(x='sex', y='survived', hue='class', data=titanic)

# sns.pointplot(x='class', y='survived', hue='sex', data=titanic, palette={'male': 'g', 'female':'m'},
#               markers=["^", "o"], linestyles=['-', '--'])


# 宽性数据
# sns.boxplot(data=iris, orient='h')

# 多层面板分类图
# sns.factorplot(x='day', y='total_bill', hue='smoker', data=tips)
# sns.factorplot(x='day', y='total_bill', hue='smoker', data=tips, kind='swarm')
# sns.factorplot(x='day', y='total_bill', hue='smoker', data=tips, kind='bar')
           
  • FacetGrid绘图使用方法
import seaborn as sns
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

sns.set(color_codes=True)
np.random.seed(sum(map(ord, 'distributions')))

titanic = sns.load_dataset('titanic')
iris = sns.load_dataset('iris')
tips = pd.read_csv('tips.csv', sep='\t')
print(tips.head())

# g = sns.FacetGrid(tips, col='time')
# g.map(plt.hist, 'tip')
# g = sns.FacetGrid(tips, col='sex', hue='smoker')
# g.map(plt.scatter, 'total_bill', 'tip', alpha=.7)
# g.add_legend()

# g = sns.FacetGrid(tips, row='smoker', col='time', margin_titles=True)
# g.map(sns.regplot, 'size', 'total_bill', color='.3', fit_reg=False, x_jitter=.1)

# g = sns.FacetGrid(tips, col='day', size=4, aspect=.5)
# g.map(sns.barplot, 'sex', 'total_bill')
# from pandas import Categorical
# ordered_days = tips.day.value_counts().index
# print(ordered_days)
# ordered_days = Categorical(['Thur', 'Fri','Sat', 'Sun'])
# print(ordered_days)
#
# g = sns.FacetGrid(tips, row='day', row_order=ordered_days, size=1.7, aspect=4)
# g.map(sns.boxplot, 'total_bill')

# 使用FaceGrid 绘制多变量
# pal = dict(Lunch = 'seagreen', Dinner='gray')
# g = sns.FacetGrid(tips, hue='time', palette=pal, size=5)
# g.map(plt.scatter, 'total_bill', 'tip', s=50, alpha=.7, linewidth=.5, edgecolor='white')
# g.add_legend()

# g = sns.FacetGrid(tips, hue='sex', palette='Set1', size=5, hue_kws={'marker': ['^', 'v']})
# g.map(plt.scatter, 'total_bill', 'tip', s=100, linewidth=.5, edgecolor='white')
# g.add_legend()

# 改变x轴和y轴的坐标
# with sns.axes_style('white'):
#     g = sns.FacetGrid(tips, row='sex', col='smoker', margin_titles=True, size=2.5)
# g.map(plt.scatter, 'total_bill', 'tip', color='#334488', edgecolor='white', lw=.5)
# g.set_axis_labels('Total bill US Dollars', 'Tip')
# g.set(xticks=[10, 30, 50], yticks=[2, 6, 10])
# g.fig.subplots_adjust(wspace=.02, hspace=.02)


# 对图
iris = sns.load_dataset('iris')
# g = sns.PairGrid(iris)
# g.map(plt.scatter)

# 对角线上画图
# g = sns.PairGrid(iris, hue='species')
# g.map_diag(plt.hist)
# g.map_offdiag(plt.scatter)
# g.add_legend()


# 画部分特征
# print(iris)
# g = sns.PairGrid(iris, vars=['sepal_length', 'sepal_width'], hue='species')
# g.map(plt.scatter)

# 加上颜色
g = sns.PairGrid(tips, hue='size', palette='GnBu_d')
g.map(sns.scatterplot, s=50, edgecolor='white')
g.add_legend()
           
  • 热力图的绘制
import seaborn as sns
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

sns.set()
np.random.seed(0)
uniform_data = np.random.rand(3, 3)
print(uniform_data)
# heatmap = sns.heatmap(uniform_data)
# 取最大值和最小值
# heatmap = sns.heatmap(uniform_data, vmin=0.2, vmax=0.5)


# 对于上下不同的值, 可以使用center
# normal_data = np.random.randn(3, 3)
# print(normal_data)
# ax = sns.heatmap(normal_data, center=0)

flights = pd.read_csv('flights.csv')
print(flights.head())

flights = flights.pivot('month', 'year', 'passengers')
print(flights)
# ax = sns.heatmap(flights)

# 热力图的一些参数

# ax = sns.heatmap(flights, annot=True, fmt='d')

# ax = sns.heatmap(flights,linewidths=.5)

# 调色盘
# ax = sns.heatmap(flights, cmap='YlGnBu', linewidths=.5)

# 可以隐藏cbar
ax = sns.heatmap(flights, cbar=False)
plt.show()