• Python 視覺化:Matplotlib、Seaborn、Plotly 熱力圖與互動圖

PS 程式碼後面沒有print()、plt.show(),是因為用jupyter notebook

Matplotlib

基本介紹

Matplotlib 是Python的數據可視化庫,用於創建各種類型的圖形,折線圖、散點圖、直方圖、條形圖、餅圖…

import matplotlib.pyplot as plt

%matplotlib inline   # 如果用jupyter notebook 要加
  • 假設我先創一個x,y
    “`=
    import numpy as np

x = np.linspace(0,5,11)
y = x**2

x 為 array([0. , 0.5, 1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5, 5. ])

y 為 array(\[ 0. , 0.25, 1. , 2.25, 4. , 6.25, 9. , 12.25, 16. , 20.25, 25. \])

繪製基礎圖表

plt.plot(x,y)
plt.show()

創建圖表的三種方式

方法一

plt.figure(figsize=(8, 2), dpi=100)  # 先設置畫布

plt.plot(x, y, 'r')    # 繪製基礎圖、顏色
plt.plot(x+1, y+1, 'b') 

plt.xlabel('X Axis Title Here')     # 標註 label
plt.ylabel('Y Axis Title Here')
plt.title('String Title Here')      # 標註 title

plt.legend(labels=['Line 1', 'Line 2'], loc=1)  # 添加圖例,loc=1 在右上角 
plt.savefig("filename.png")        # 如果要存成檔案

plt.show()

方法二

fig, axes = plt.subplots(figsize=(8,2), dpi=100)

axes.plot(x, y, 'r')
axes.plot(x+1, y+1, 'b')

axes.set_xlabel('X Axis Title Here')
axes.set_ylabel('Y Axis Title Here')
axes.set_title('String Title Here')

axes.legend(labels=['Line 1', 'Line 2'], loc=1)
fig.savefig("filename.png")

plt.show()

創建了一個包含軸(axes)的圖表對象(figure),後面都一樣
plt.figure() 用來創建一個圖,但plt.subplots() 之後可以產成多個子圖

方法三
使用【對象導向】方法,明確地創建了圖形對象和軸對象,並通過添加軸來進行繪圖

[0.1, 0.1, 0.8, 0.8] 軸的左邊緣距離圖形左邊緣10%,底邊緣距離圖形底邊緣10%,寬度佔圖形寬度的80%,高度佔圖形高度的80%

fig = plt.figure(figsize=(8, 2), dpi=100)

axes = fig.add_axes([0.1, 0.1, 0.8, 0.8]) 

axes.plot(x, y, 'r')
axes.plot(x+1, y+1, 'b')
axes.set_xlabel('X Axis Title Here') 
axes.set_ylabel('Y Axis Title Here')
axes.set_title('String Title Here')

axes.legend(labels=['Line 1', 'Line 2'], loc=1)
fig.savefig("filename.png")

plt.show()

創建子圖的三種方式 (改欄/行的數量,就能增加子圖數量)

方法一
使用 plt.figure() 創建圖形,然後使用 plt.subplot() 來創建子圖

plt.subplot() 接受三個參數,(行數, 列數,子圖的索引)

plt.figure(figsize=(8, 2), dpi=100)

plt.subplot(1,2,1)  # (一欄,兩列,第一個圖)
plt.plot(x, y, 'r--')  
plt.plot(x+1, y+1, 'b')
plt.xlabel('X1 Axis Title Here')
plt.ylabel('Y1 Axis Title Here')
plt.title('String Title Here')
plt.legend(labels=['Line 1', 'Line 2'], loc=1)

plt.subplot(1,2,2)  # (一欄,兩列,第二個圖)
plt.plot(y, x, 'g*-')
plt.plot(y-1, x-1, 'g*-')
plt.xlabel('X2 Axis Title Here')
plt.ylabel('Y2 Axis Title Here')
plt.title('String Title Here')
plt.legend(labels=['Line 1', 'Line 2'], loc=1)

plt.savefig("filename.png")

plt.show()

方法二
使用 plt.subplots() 創建一個包含兩個子圖的 fig 和 axes(軸)對象陣列

axes 是一個包含兩個軸對象的陣列,分別對應於兩個子圖

fig, axes = plt.subplots(nrows=1, ncols=2,figsize=(8,2), dpi=100)

axes[0].plot(x, y, 'r--' )
axes[0].plot(x+1, y-1, 'b--' )
axes[0].set_xlabel('X1 Axis Title Here')
axes[0].set_ylabel('Y1 Axis Title Here')
axes[0].set_title('String Title Here')
axes[0].legend(labels=['Line 1', 'Line 2'], loc=1)

axes[1].plot(y, x, 'g*-')
axes[1].plot(y-1, x+1, 'g*-')
axes[1].set_xlabel('X2 Axis Title Here')
axes[1].set_ylabel('Y2 Axis Title Here')
axes[1].set_title('String Title Here')
axes[1].legend(labels=['Line 1', 'Line 2'], loc=1)

fig.savefig("filename.png")

plt.show()

方法三
使用【對象導向】方法,明確地創建了圖形對象和軸對象,並通過添加軸來進行繪圖

fig = plt.figure(figsize=(8, 2), dpi=100)

axes1 = fig.add_axes([0.1, 0.1, 0.4, 0.8]) 
axes1.plot(x, y, 'r')
axes1.plot(x+1, y+1, 'b')
axes1.set_xlabel('X1 Axis Title Here') 
axes1.set_ylabel('Y1 Axis Title Here')
axes1.set_title('String Title 1')

axes2 = fig.add_axes([0.6, 0.1, 0.4, 0.8]) 
axes2.plot(y, x, 'g*-')
axes2.plot(y+1, x+1, 'g*-')
axes2.set_xlabel('X2 Axis Title Here') 
axes2.set_ylabel('Y2 Axis Title Here')
axes2.set_title('String Title 2')

plt.savefig("filename.png")

plt.show()

補充import matplotlib.pyplot as pltfig, axes = plt.subplots(nrows=1, ncols=3, figsize=(10, 4), dpi=100)x = [1, 2, 3, 4, 5]
y1 = [1, 4, 9, 16, 25]
y2 = [1, 8, 27, 64, 125]
y3 = [1, 16, 81, 256, 625]for i, ax in enumerate(axes):
if i == 0:
new_x = [val + 1 for val in x]
new_y1 = [val + 3 for val in y1]
ax.plot(x, y1, ‘r–‘)
ax.plot(new_x, new_y1, ‘r–‘)
ax.set_title(‘Subplot 1’)
ax.legend(labels=[‘Line 1’, ‘Line 2’], loc=1)
elif i == 1:
ax.plot(x, y2, ‘g*-‘)
ax.set_title(‘Subplot 2’)
else:
ax.plot(x, y3, ‘b.-‘)
ax.set_title(‘Subplot 3’)
ax.set_xlabel(‘X_label’)
ax.set_ylabel(‘Y_label’)#調整子圖距離
plt.tight_layout()plt.savefig(“filename.png”)plt.show()

創建圖中圖的一種方式

先看比較

fig = plt.figure()

axes1 = fig.add_axes([0.1, 0.1, 0.8, 0.8]) 
axes2 = fig.add_axes([0.2, 0.5, 0.4, 0.3])

fig = plt.figure()

axes1 = fig.add_axes([0.1, 0.1, 0.8, 0.8]) 
axes2 = fig.add_axes([0.8, 0.15, 0.4, 0.3])

圖中圖

fig = plt.figure()

axes1 = fig.add_axes([0.1, 0.1, 0.8, 0.8])  
axes2 = fig.add_axes([0.2, 0.5, 0.4, 0.3])  


axes1.plot(x, y, color="r", alpha=0.8, lw=1.5, ls='--', marker='s', markersize=10, markeredgewidth=1.5, markerfacecolor="yellow")  

# 透明度 0.8, linewidth 1.5, linestyle --, marker 'square', markeredgewidth 標記外框線寬度
# 可以這樣標 ax.plot(x, x+1, color="#8B008B", alpha=0.5)  
# linestyle 可以換成 ‘-‘, ‘–’, ‘-.’, ‘:’ ...
# marker 可以換成 '+', 'o', '*', 's', '^', ',', '.', '1', '2', '3' ...

axes1.set_xlabel('X1 Axis Title Here')
axes1.set_ylabel('Y1 Axis Title Here') 
axes1.set_title('String Title Here')

axes2.plot(y, x, 'b')
axes2.set_xlabel('X2 Axis Title Here')
axes2.set_ylabel('Y2 Axis Title Here')
axes2.set_title('String Title Here')

plt.savefig("filename.png")

plt.show()

圖中圖練習

fig = plt.figure(figsize=(7,2), dpi=100)

ax = fig.add_axes([0, 0, 0.8, 0.8]) 

ax.plot(x,y, color='purple', lw=4, ls='-.', marker='o', markersize='10', markerfacecolor='yellow',
        alpha=0.3, label="X Squared")

ax.legend(loc=0)

fig = plt.figure(figsize=(7,2), dpi=100)
ax = fig.add_axes([0, 0, 0.8, 0.8]) 
ax.plot(x,y, color='purple', lw=4, ls='-.', marker='o', markersize='10', markerfacecolor='yellow',
        alpha=0.3, label="X Squared")

ax.set_xlim([0,1])  # 設置 x 和 y 軸的範圍
ax.set_ylim([0,2])

散佈圖 scatter

plt.scatter(x,y)

plt.scatter(x=tips['total_bill'], y=tips['tip'])

加上c 分組, s 形狀大小

titanic.plot.scatter(x='fare',y='age',c='survived',cmap='coolwarm')

https://matplotlib.org/stable/plot_types/index
‘default’:默認風格
‘classic’:經典風格,類似於 MATLAB 的風格
‘ggplot’:仿照 ggplot2 的風格
‘seaborn’:仿照 seaborn 的風格
‘fivethirtyeight’:仿照 FiveThirtyEight 網站的風格
‘bmh’:較簡單的風格,適合科學繪圖


import matplotlib.pyplot as plt
import numpy as np

plt.style.use('_mpl-gallery')  # 使用指定的風格

# 生成資料
np.random.seed(3)
x = 4 + np.random.normal(0, 2, 24)
y = 4 + np.random.normal(0, 2, len(x))
# 設定點的大小和顏色
sizes = np.random.uniform(15, 80, len(x))
colors = np.random.uniform(15, 80, len(x))

fig, ax = plt.subplots()  
ax.scatter(x, y, s=sizes, c=colors, vmin=0, vmax=100)  
ax.set(xlim=(0, 8), xticks=np.arange(1, 8),
       ylim=(0, 8), yticks=np.arange(1, 8))   # vmin、vmax 顏色映射的範圍

plt.show()

帶入鳶尾花數據

import seaborn as sns

iris = sns.load_dataset('iris')
iris.head()

import matplotlib.pyplot as plt

plt.scatter(iris['sepal_length'], iris['sepal_width'])
plt.xlabel('Sepal Length')
plt.ylabel('Sepal Width')
plt.show()

螢幕擷取畫面 2024-05-01 180724

直方圖 hist

from random import sample

data = sample(range(1, 1000), 100)
plt.hist(data)

plt.xlabel('X Label')
plt.ylabel('Frequency')
plt.title('Histogram of y')

plt.show()

加上bin,可以控制柱數

from random import sample

data = sample(range(1, 1000), 100)
plt.hist(data, bins=6)

plt.xlabel('X Label')
plt.ylabel('Frequency')
plt.title('Histogram of y')
plt.show()

加上框線

from random import sample

data = sample(range(1, 1000), 100)
plt.hist(data, bins=6, edgecolor='black', lw= 0.3)


plt.xlabel('X Label')
plt.ylabel('Frequency')
plt.title('Histogram of y')

plt.show()

tips['total_bill'].hist(alpha=0.5,bins=25)
plt.xlabel('total bill')
plt.ylabel('count')

帶入鳶尾花數據

import seaborn as sns

iris = sns.load_dataset('iris')
iris.head()

import matplotlib.pyplot as plt

plt.hist(iris['sepal_length'])
plt.xlabel('Sepal Length')
plt.ylabel('Frequency')
plt.show()

螢幕擷取畫面 2024-05-01 180932

橫條圖 barh

假設已經有一個確診國家排名前20

plt.figure(figsize=(15,10))
plots = plt.barh(top_20['Country'], top_20['Confirmed'],
                    height=0.5, 
                    left=None, 
                    align='center', 
                    color=['lightsteelblue', 
                        'cornflowerblue', 
                        'royalblue', 
                        'midnightblue', 
                        'navy', 
                        'darkblue', 
                        'mediumblue'])

plt.xlabel('Confirmed')
plt.ylabel('Country')
plt.title('Covid 19 Statics Top 20 Countries')

plt.show()

top_6_total_bill = tips['total_bill'].value_counts().head(6)

top_6_total_bill.plot(kind='barh')

PS 改成 plt.bar,就是直條圖
加下面這行,x 軸刻度旋轉 90 度,避免文字重疊
plt.xticks(rotation=90)

plt.bar(x=tips['total_bill'], height=tips['tip'])

top_6_total_bill = tips['total_bill'].value_counts().head(6)

top_6_total_bill.plot(kind='bar')

折線圖 plot

import matplotlib.pyplot as plt

plt.style.use('_mpl-gallery')  # 使用指定的風格

# 月份
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun']

# 每個月的支出金額
expenses = [1500, 1800, 1200, 2000, 1600, 1900]

fig, ax = plt.subplots()
ax.plot(months, expenses, marker='o', linestyle='-', linewidth=2.0)
ax.set(xlabel='Month', ylabel='Expenses', title='Monthly Expenses')

plt.show()

import matplotlib.pyplot as plt
import numpy as np

plt.style.use('_mpl-gallery')  # 使用指定的風格

# 生成資料
x = np.linspace(0, 10, 100)
y = 4 + 2 * np.sin(2 * x)

# 繪製圖表
fig, ax = plt.subplots()   
ax.plot(x, y, linewidth=2.0)  
ax.set(xlim=(0, 8), xticks=np.arange(1, 8),
       ylim=(0, 8), yticks=np.arange(1, 8))

plt.show()

也可以把表格某欄位設為索引

import pandas as pd

# 建立要繪製的 DataFrame
data = top_20[['Country', 'Confirmed', 'Active', 'Recovered', 'Deaths']]

# 設定 Country 欄位為索引
data = data.set_index('Country')

data.plot(kind='line', marker='o')
plt.title('COVID-19 Statistics - Top 20 Countries')
plt.xlabel('Country')
plt.ylabel('Confirmed')

plt.show()

tips['total_bill'].plot()

帶入鳶尾花數據

import seaborn as sns

iris = sns.load_dataset('iris')
iris.head()

import matplotlib.pyplot as plt

plt.plot(iris['sepal_length'], label='Sepal Length')
plt.plot(iris['sepal_width'], label='Sepal Width')
plt.xlabel('Index')
plt.ylabel('Value')
plt.legend()
plt.show()

螢幕擷取畫面 2024-05-01 180844

箱型圖 boxplot

data = [np.random.normal(0, std, 100) for std in range(1, 4)]

# rectangular box plot
boxprops = dict(facecolor='green', color='green')
plt.boxplot(data, vert=True, patch_artist=True, boxprops=boxprops)   # vert 垂直, patch_artist 填充顏色  

plt.xlabel('Data Set')
plt.ylabel('Value')
plt.title('Box Plot with Green Fill Color')

plt.show()

plt.boxplot(tips['total_bill'])

補充: plt.style.use(‘ ‘)

default’、ggplot’、’seaborn’、’fivethirtyeight’、’bmh’、’dark_background’

import matplotlib.pyplot as plt
plt.style.use('bmh')

tips['total_bill'].hist()
plt.xlabel('total bill')
plt.ylabel('count')

面積圖 area

tips_2[['total_bill','tip']][0:30].plot.area(alpha=0.4)

線圖 line

tips.plot.line(x='total_bill',y='tip',figsize=(8,3),lw=1)

六邊形圖 hexbin

tips.plot.hexbin(x='tip',y='total_bill',gridsize=25,cmap='Oranges')

密度圖 density、kde

tips.plot.density()

tips.plot.kde()

tips['total_bill'].plot.kde(lw=5,ls='--')

Matplotlib (Seaborn)

基本介紹

Seaborn 是建立在 Matplotlib 的高級數據可視化庫。簡化創建統計圖形

import seaborn as sns
%matplotlib inline   # 如果用jupyter notebook 要加

可以先預設上下文參數
poster 是相對較大的上下文
poster > talk > notebook > paper

sns.set_context('poster',font_scale=0.6)

seaborn 我最喜歡的部分!
有提供kaggle比較有熱門的開源資料: iris, tips, titanic, exercise, flights, anscombe, dots
直接抓取,不需要再下載csv :+1:

tips = sns.load_dataset('tips')
titanic = sns.load_dataset('titanic')
flights = sns.load_dataset('flights')

創建圖中圖

fig = plt.figure(figsize=(6,4))

# 將 axe 新增到實例化後 2×2 的 figure 上,並指定第 1,2,3 個
ax_1 = fig.add_subplot(2, 2, 1)
ax_2 = fig.add_subplot(2, 2, 2)
ax_3 = fig.add_subplot(2, 2, 3)

螢幕擷取畫面 2024-05-01 184732

帶入鳶尾花數據

散點圖 scatterplot

import seaborn as sns

iris = sns.load_dataset('iris')
iris.head()

sns.scatterplot(x='sepal_length', y='sepal_width', data=iris)
plt.show()

螢幕擷取畫面 2024-05-01 182853

散點圖矩陣 pairplot

sns.pairplot(tips, hue='sex', palette='coolwarm', diag_kind='auto')   
# 使用tips數據,'sex' 分組, 調色板 'coolwarm', 對角線上的圖形類型 'auto'

diag_kind=’auto’ , 可以換成’hist’、’None’、diag_kind=’kde’

線圖 lineplot

帶入鳶尾花數據

import seaborn as sns

iris = sns.load_dataset('iris')
iris.head()

sns.lineplot(data=iris[['sepal_length', 'sepal_width']])
plt.show()

螢幕擷取畫面 2024-05-01 183035

直方圖 histplot (displot 更靈活)

histplot

sns.histplot(tips['total_bill'], kde=False)

sns.histplot(tips['total_bill'], kde=True,bins=20)


螢幕擷取畫面 2024-05-01 184447

帶入鳶尾花數據

import seaborn as sns

iris = sns.load_dataset('iris')
iris.head()

sns.histplot(data=iris, x='sepal_length')
plt.show()

displot

sns.displot(tips['total_bill'], kde=True,bins=20)

PS displot 更靈活

加上不同的x軸

sns.displot(data=data, x="total_bill", hue="sex", kde=True, bins=20)

多層次直方圖

sns.displot(data=data, x="total_bill", hue="sex", multiple="stack")

加上時間序列,可以做直方圖

time_data = sns.load_dataset("flights")
sns.displot(data=time_data, x="year", binwidth=5)

也可以做

sns.displot(data=data, x="total_bill", y="tip", kind="kde")

sns.displot(data=data, x="total_bill", y="tip", kind="hist")

條形圖 barplot

# estimator 默認: 平均值,可換 meanmaxmin
sns.barplot(x='sex',y='total_bill',data=tips,estimator=np.std)

計數圖 countplot

sns.countplot(x='sex',data=tips)

聯合圖 jointplot

直條 + 散點圖

sns.jointplot(x='total_bill',y='tip',data=tips,kind='scatter')

直條 + 六邊形圖

sns.jointplot(x='total_bill',y='tip',data=tips,kind='hex')

直條 + 密度圖

sns.jointplot(x='total_bill',y='tip',data=tips,kind='kde', fill=True)

直條 + 線性迴歸散點圖

sns.jointplot(x='total_bill',y='tip',data=tips,kind='reg')

地毯圖 rugplot 、密度估計圖 kdeplot

sns.rugplot(tips['total_bill'])

sns.kdeplot(tips['total_bill'])
sns.rugplot(tips['total_bill'])

箱型圖 boxplot

sns.boxplot(x="day", y="total_bill", data=tips, palette='rainbow')

橫向

sns.boxplot(x="total_bill", y="day", data=tips,palette='rainbow',orient='h')

hue 分組,類似 pandas groupby

sns.boxplot(x="day", y="total_bill", hue="smoker",data=tips, palette="coolwarm")

小提琴圖 viplinplot

sns.violinplot(x="day", y="total_bill", data=tips,palette='rainbow')

橫向

sns.violinplot(x="total_bill", y="day", data=tips,palette='rainbow',orient='h')

hue 分組

sns.violinplot(x="day", y="total_bill", data=tips,hue='sex',palette='Set1')

split 切割

sns.violinplot(x="day", y="total_bill", data=tips, hue='sex', split=True, palette='Set1')

帶狀圖 stirplot、swarmplot

點有可能重疊

sns.stripplot(x="day", y="total_bill", data=tips, dodge=True, hue='sex',palette='Set1')

點有可能重疊,hue 分組

sns.stripplot(x="day", y="total_bill", data=tips, dodge=False, hue='sex', palette='Set1')

點不會重疊

sns.swarmplot(x="day", y="total_bill", hue='sex', data=tips, palette="Set1", dodge=True)

點不會重疊,hue 分組

sns.swarmplot(x="day", y="total_bill", hue='sex', data=tips, palette="Set1", dodge=False)

PS 混和

sns.violinplot(x="tip", y="day", data=tips,palette='rainbow')
sns.swarmplot(x="tip", y="day", data=tips,color='black',size=3)

繪製分類數據圖形 catplot

可替換
bar – 長條圖
point – 散點圖
strip – 長條圖
box – 箱形圖
violin – 小提琴圖
swarm – 蜂群圖
boxen – 箱型圖與蜂群圖組合
pointplot – 點圖

sns.catplot(x="sex", y="total_bill", data=tips, kind="box")

sns.catplot(x="sex", y="total_bill", data=tips, kind="boxen")

熱力圖 heatmap

sns.heatmap(tips.corr())

PS 如果有不能計算的欄位,先轉換為數字

tips['sex'] = tips['sex'].map({'Female': 0, 'Male': 1})
tips['smoker'] = tips['smoker'].map({'No': 0, 'Yes': 1})

day_mapping = {'Thur': 1, 'Fri': 2, 'Sat': 3, 'Sun': 4}
tips['day'] = tips['day'].map(day_mapping)

time_mapping = {'Lunch': 1, 'Dinner': 2}
tips['time'] = tips['time'].map(time_mapping)

或是只看數字欄位

tips_2 = tips[['total_bill', 'tip', 'size']]

sns.heatmap(tips_2.corr())

加上 相關性係數的標籤

sns.heatmap(tips_2.corr(),cmap='coolwarm',annot=True)

補充: .set_style(‘ ‘)

sns.set_style('white')
sns.countplot(x='sex',data=tips)

圖的四個邊緣,添加座標軸刻度線

sns.set_style('ticks')
sns.countplot(x='sex',data=tips,palette='deep')

.despine() 移除Seaborn繪圖中的上、右邊框線,使得圖形更加清晰

sns.countplot(x='sex',data=tips)
sns.despine()
#  <> sns.despine(left=True)

數據透視表 pivot_table

flights.pivot_table(values='passengers',index='month',columns='year')

加上熱力圖

pvflights = flights.pivot_table(values='passengers',index='month',columns='year')
sns.heatmap(pvflights)

# map='magma' 顏色映射
# 'viridis''plasma''inferno''cividis''coolwarm''RdBu''YlGnBu'
sns.heatmap(pvflights, cmap='magma',linecolor='white',linewidths=1)

聚類函數 clustermap

sns.clustermap(pvflights)

關聯性網格圖 sns.FacetGrid

g = sns.FacetGrid(tips, col="time",  row="smoker")
g = g.map(plt.hist, "total_bill")

hue 分組

g = sns.FacetGrid(tips, col="time",  row="smoker",hue='sex')
g = g.map(plt.scatter, "total_bill", "tip")

等同sns.jointplot(x=’total_bill’,y=’tip’,data=tips,kind=’reg’)

g = sns.JointGrid(x="total_bill", y="tip", data=tips)
g = g.plot(sns.regplot, sns.distplot)

線性回歸模型 lmplot

sns.lmplot(x='total_bill',y='tip',data=tips)

hue 分組

sns.lmplot(x='total_bill',y='tip',data=tips,hue='sex',palette='coolwarm')

catter_k 散點的大小 100

sns.lmplot(x='total_bill',y='tip',data=tips,hue='sex',palette='seismic',
           markers=['o','v'],scatter_kws={'s':100})

分組分圖

sns.lmplot(x='total_bill', y='tip', data=tips, col='sex')

sns.lmplot(x='total_bill', y='tip', data=tips, row='sex')

sns.lmplot(x="total_bill", y="tip", data=tips, row="sex", col="time")

sns.lmplot(x='total_bill', y='tip', data=tips, col='day', hue='sex', palette='coolwarm', aspect=0.6, height=8)

Plotly

初始化

參考
https://plotly.com/python/

申請api key
https://chart-studio.plotly.com/settings/api#/

先安裝

pip install plotly
pip install cufflinks 
pip install chart_studio

導入模組

# 離線使用, jupyter notebook 初始化, html file, jupyter notebook 繪圖
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
# Pandas DataFrame 轉為 plotly
import cufflinks as cf

# 連接Plotly Chart Studio、上傳 Plotly 圖表到 Plotly Chart Studio
import chart_studio
import chart_studio.plotly as py

導入api,填入前面申請到的 api key

chart_studio.tools.set_credentials_file(username='你的帳號', api_key='your_api_key')

初始化Plotly以進入離線模式

init_notebook_mode(connected=True)

先創兩個dataframe
df

df = pd.DataFrame(np.random.randn(100,4),columns='A B C D'.split())
df

df2

df2 = pd.DataFrame({'Category':['A','B','C'],'Values':[32,43,50]})
df2.head()

散佈圖 scatter

df.iplot(kind='scatter', x='A', y='B', mode='markers', size=10)

import plotly.offline as pyo
import plotly.graph_objs as go
import numpy as np

np.random.seed(50)
x_values = np.linspace(0, 1, 100)  
y_values = np.random.randn(100)   

trace0 = go.Scatter(
    x = x_values,
    y = y_values+5,
    mode = 'markers',
    name = 'markers'
)
trace1 = go.Scatter(
    x = x_values,
    y = y_values,
    mode = 'lines+markers',
    name = 'lines+markers'
)
trace2 = go.Scatter(
    x = x_values,
    y = y_values-5,
    mode = 'lines',
    name = 'lines'
)
data = [trace0, trace1, trace2] 

layout = go.Layout(
    title = 'three different modes of scatter'
)

fig = go.Figure(data=data,layout=layout)
# pyo.plot(fig, filename='scatter2.html')
pyo.iplot(fig)

import plotly.offline as pyo
import plotly.graph_objs as go
import numpy as np

np.random.seed(42)
random_x = np.random.randint(1,200,100)
random_y = np.random.randint(1,300,100)

data = [go.Scatter(
    x = random_x,
    y = random_y,
    mode = 'markers',
    marker=dict(    # marker style
        size = 12,
        color = 'rgb(50,300,100)',
        symbol = 'pentagon',
        line = dict(
            width = 2,
        )
    )
)]

layout = go.Layout(
    title = 'Random Data Scatterplot',  
    xaxis = dict(title = 'random x-values'), 
    yaxis = dict(title = 'random y-values'),  
    hovermode ='closest'  
)

fig = go.Figure(data=data, layout=layout)

# pyo.plot(fig, filename='scatter2.html')
pyo.iplot(fig)

直方圖 hist

df['A'].iplot(kind='hist',bins=25)

分布圖 distplot

import plotly.offline as pyo
import plotly.figure_factory as ff
import numpy as np

x = np.random.randn(1000)
hist_data = [x]
group_labels = ['distplot']

fig = ff.create_distplot(hist_data, group_labels)
pyo.iplot(fig)

import plotly.offline as pyo
import plotly.figure_factory as ff
import numpy as np

x1 = np.random.randn(200)-2
x2 = np.random.randn(200)
x3 = np.random.randn(200)+2
x4 = np.random.randn(200)+4

hist_data = [x1,x2,x3,x4]
group_labels = ['Group1','Group2','Group3','Group4']

fig = ff.create_distplot(hist_data, group_labels)
pyo.iplot(fig)

import plotly.offline as pyo
import plotly.figure_factory as ff

snodgrass = [.209,.205,.196,.210,.202,.207,.224,.223,.220,.201]
twain = [.225,.262,.217,.240,.230,.229,.235,.217]

hist_data = [snodgrass,twain]
group_labels = ['Snodgrass','Twain']

fig = ff.create_distplot(hist_data, group_labels, bin_size=[.005,.005])
pyo.iplot(fig)

直條圖 barplot

df2.iplot(kind='bar',x='Category',y='Values')

橫條圖 barhplot

df2.iplot(kind='barh', x='Category', y='Values')

import plotly.graph_objs as go
import plotly.offline as pyo

data = [go.Bar(
    x=titanic['alive'].value_counts().index,
    y=titanic['alive'].value_counts().values,
    marker=dict(color='blue')
)]

layout = go.Layout(
    title='Survival Count',
    xaxis=dict(title='Survived'),
    yaxis=dict(title='Count')
)


fig = go.Figure(data=data, layout=layout)
pyo.iplot(fig)

total_count = len(titanic)
titanic['no_survived'] = total_count - df['survived']

trace1 = go.Bar(
    x=df['sex'],
    y=df['survived'],
    name='Survived',
    marker=dict(color='#FFD700')
)

trace2 = go.Bar(
    x=df['sex'],
    y=df['no_survived'],
    name='Not Survived',
    marker=dict(color='#9EA0A1')
)

data = [trace1, trace2]
layout = go.Layout(
    title='Survival by Gender'
)

fig = go.Figure(data=data, layout=layout)
pyo.iplot(fig)

total_count = len(titanic)
titanic['no_survived'] = total_count - df['survived']

trace1 = go.Bar(
    x=df['sex'],
    y=df['survived'],
    name='Survived',
    marker=dict(color='#FFD700')
)

trace2 = go.Bar(
    x=df['sex'],
    y=df['no_survived'],
    name='Not Survived',
    marker=dict(color='#9EA0A1')
)

data = [trace1, trace2]
layout = go.Layout(
    title='Survival by Gender',
    barmode='stack'  # 堆叠
)

fig = go.Figure(data=data, layout=layout)
pyo.iplot(fig)

圓餅圖 pie

import plotly.express as px
import pandas as pd

# 建立交叉表,計算不同性別和生存狀態的人數
cross_tab = pd.crosstab(titanic['sex'], titanic['alive'])

# 重命名列
cross_tab.columns = ['Not Survived', 'Survived']

# 指定順序、顏色
cross_tab['color'] = ['female', 'male']
colors = ['red', 'blue']


fig1 = px.pie(
    values=cross_tab['Survived'],
    names=cross_tab.index,
    title='Survival by Gender',
    color=cross_tab['color'],
    color_discrete_sequence=colors
)


fig2 = px.pie(
    values=cross_tab['Not Survived'],
    names=cross_tab.index,
    title='Not Survival by Gender',
    color=cross_tab['color'],
    color_discrete_sequence=colors 
)

fig1.show()
fig2.show()

箱型圖 boxplot

df.iplot(kind='box')

import plotly.offline as pyo
import plotly.graph_objs as go

snodgrass = [.209,.205,.196,.210,.202,.207,.224,.223,.220,.201]
twain = [.225,.262,.217,.240,.230,.229,.235,.217]

data = [
    go.Box(
        y=snodgrass,
        name='QCS'
    ),
    go.Box(
        y=twain,
        name='MT'
    )
]
layout = go.Layout(
    title = ''
)
fig = go.Figure(data=data, layout=layout)
pyo.iplot(fig)

3D圖

df3 = pd.DataFrame({'x':[1,2,3,4,5],'y':[10,20,30,20,10],'z':[5,4,3,2,1]})
df3.iplot(kind='surface',colorscale='rdylbu')

氣泡圖 bubble

df.iplot(kind='bubble',x='A',y='B',size='C')

熱力圖 choropleth

import chart_studio.plotly as py
import plotly.graph_objs as go 
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

init_notebook_mode(connected=True)

data = dict(type = 'choropleth',
            locations = ['AZ','CA','NY'],
            locationmode = 'USA-states',
            colorscale= 'Portland',
            text= ['text1','text2','text3'],
            z=[1.0,2.0,3.0],
            colorbar = {'title':'Colorbar Title'})
layout = dict(geo = {'scope':'usa'})

choromap = go.Figure(data = [data],layout = layout)
iplot(choromap)

假設有一個df

data = dict(
        type = 'choropleth',
        colorscale = 'viridis',
        locations = df['CODE'],
        z = df['GDP (BILLIONS)'],
        text = df['COUNTRY'],
        colorbar = {'title' : 'GDP Billions US'},
      ) 

layout = dict(
    title = '2014 Global GDP',
    geo = dict(
        scope='world',
        showframe = False,
        projection = {'type':'stereographic'}  # 類型可換 https://plotly.com/python/reference/
    )
)

choromap = go.Figure(data = [data],layout = layout)
iplot(choromap)

熱力圖 heatmap

import plotly.offline as pyo
import plotly.graph_objs as go
import pandas as pd
import numpy as np


np.random.seed(42)   
days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
times = [f'{i:02d}:00' for i in range(24)]
data = {
    'DAY': np.random.choice(days, size=100),
    'LST_TIME': np.random.choice(times, size=100),
    'T_HR_AVG': np.random.uniform(60, 90, size=100)
}

df = pd.DataFrame(data)

heatmap_data = [go.Heatmap(
    x=df['DAY'],
    y=df['LST_TIME'],
    z=df['T_HR_AVG'],
    colorscale='Jet'
)]

layout = go.Layout(
    title='Hourly Temperatures, June 1-7, 2010 in<br>\
    Santa Barbara, CA USA'
)
fig = go.Figure(data=heatmap_data, layout=layout)
pyo.iplot(fig)

PS 假設要顯示不同地區的檔案

import plotly.offline as pyo
import plotly.graph_objs as go
import pandas as pd 

df = sns.load_dataset('flights')

data = [go.Heatmap(
    x=df['month'],
    y=df['year'],
    z=df['passengers']
)]

layout = go.Layout(
    title='Flights'
)

fig = go.Figure(data=data, layout=layout)
pyo.iplot(fig)

Catalina
Catalina

Hi, I’m Catalina!
原本在西語市場做開發業務,2023 年正式轉職資料領域。
目前努力補齊計算機組織、微積分、線性代數與機率論,忙碌中做點筆記提醒自己 🤲

文章: 43

發佈留言

發佈留言必須填寫的電子郵件地址不會公開。 必填欄位標示為 *