Hung-yi Lee PyTorch 實戰指南：從環境設置到模型訓練與預測

Contents 目錄

colab 環境設置

!pip install torch torchvision
torch.cuda.is_available()
!nvidia-smi

# 連上cpu gpu

# cpu
device = torch.device('cpu')
x = torch.ones(3, 3).to(device)
print(x.device)

# gpu
# device = torch.device('cuda')  
# x = torch.ones(3, 3).to(device) 
# print(x.device)

連上google drive

# connect google drive
from google.colab import drive
drive.mount('/content/drive')

下載檔案

!pip install gdown  # 確保 gdown 安裝
!gdown --id 11QTOy1UQxAQzfEjytvsT9pepNF8OqxMH --output train_1.jpg

# !gdown --folder --id 1pJkjKvQqWM_3KvZAfXV_gYiY8Qeq3qj6. # 印出folder檔案
# !ls -l /content/

設置自動點擊

# https://stackoverflow.com/questions/57113226/how-can-i-prevent-google-colab-from-disconnecting?newreg=193c21864d6347c8859f7be882622399
# 設置開發人員console自動點擊
function ConnectButton(){
  console.log("Connect pushed");
  document.querySelector("#top-toolbar > colab-connectbutton").shadowRoot.querySelector("#connect").click()
}
setInterval(ConnectButton, 300000);

Pytorch Basic

# 合併不同形狀的張量
#  沿著維度 1 進行拼接

x = torch.zeros([2, 1, 3])
y = torch.zeros([2, 3, 3])
z = torch.zeros([2, 2, 3])
w = torch.cat([x, y, z], dim=1)
print(w.shape)
print(w.dtype)

# 將維度數量減少

reshaped_w = w.reshape(2, 18)
print(reshaped_w.shape)

viewed_w = w.view(3, 12)
print(viewed_w.shape)

# 移除所有維度為 1 的軸

squeezed_x = x.squeeze()
print(squeezed_x.shape)

# 在指定的維度 (這裡是第 1 維) 插入一個新的大小為 1 的維度

x = torch.zeros([2, 3])

unsqueezed_x = x.unsqueeze(1)

print(unsqueezed_x.shape)

# 1 2 維數據交換

z = torch.tensor([
    [1, 2, 3],
    [4, 5, 6]
])

print("原始 z：")
print(z)
print("z.shape:", z.shape) 
print()

z2 = z.transpose(0, 1)
print("交換後的 z2：")
print(z2)
print("z2.shape:", z2.shape)

# 移除張量中指定維度為 1 的軸
# 計算機視覺 (CNN)    (1, C, H, W)    squeeze(0)    (C, H, W)
# NLP (RNN, Transformer)    (batch, 1, seq_len)    squeeze(1)    (batch, seq_len)

z4 = torch.tensor([[[1, 2, 3, 4],
                   [5, 1, 7, 8],
                   [9, 10, 11, 12]]])

z5 = z4.squeeze(0)

print(z4)
print()
print(z4.shape)
print()
print(z5)
print()
print(z5.shape)
print()

# numpy 需要共享記憶體、減少複製    

np_arr = np.array([[1, -1], [-1, 1]], dtype=np.float32)
x2 = torch.from_numpy(np_arr)
x2[0, 0] = 99
print(np_arr)

# torch_tensor 需要獨立的 PyTorch 張量，不影響原數據

torch_tensor = torch.tensor([[1, -1], [-1, 1]])
torch_tensor[0, 0] = 99
print(torch_tensor)

# numpy

import numpy as np

np_arr = np.array([[1, -1, 3, 4], [-1, 1, 99, 2]], dtype=np.float32)

# 在第一維（索引 2）增加一個新維度
np_arr_2 = np.expand_dims(np_arr, 2)

print(np_arr)
print(np_arr.shape)
print()

print(np_arr_2)
print(np_arr_2.shape)

計算張量梯度

# 計算張量梯度

x = torch.tensor([[1., 0.], [-1., 1.]], requires_grad=True)
print(x)
print()

z0 = x.pow(2) # 平方
print(z0)
print()

z = x.pow(2).sum()   # z = (1^2 + 0^2 + (-1)^2 + 1^2) = 1 + 0 + 1 + 1 = 3
print(z)
print()

# 反向計算
z1 = z.backward()
print(z1)
print()

print(x.grad)
# ∂z/∂x[0,0] = 2 * x[0,0] = 2 * 1 = 2
# ∂z/∂x[0,1] = 2 * x[0,1] = 2 * 0 = 0
# ∂z/∂x[1,0] = 2 * x[1,0] = 2 * -1 = -2
# ∂z/∂x[1,1] = 2 * x[1,1] = 2 * 1 = 2

x = torch.tensor([[2., 0.], [-2., 1.]], requires_grad=True)
print(x)
print()

z0 = x.pow(4) 
print(z0)
print()

z = x.pow(4).sum()   # z = (2^4 + 0^4 + (-2)^4 + 1^4) = 16 + 0 +16 + 1 = 1
print(z)
print()

# 反向計算 ＃ backward() 方法會根據 z 對 x 求導數，並且會把結果存儲在 x.grad
z1 = z.backward()
print(z1)
print()

print(x.grad)
# 梯度計算，冪次法則
# ∂z/∂x[0,0] = 4 * x[0,0]^2 = 4 * 2^3 = 32
# ∂z/∂x[0,1] = 4 * x[0,1]^2 = 4 * 0^3 = 0
# ∂z/∂x[1,0] = 4 * x[1,0]^2 = 4 * (-2)^3 = -32
# ∂z/∂x[1,1] = 4 * x[1,1]^2 = 4 * 1^3 = 4

Training & Testing Neural Networks

import torch
import torch.nn as nn

# 定義線性層

# 輸入32 輸出64
layer = torch.nn.Linear(32, 64)
print(layer.weight.shape)
print()
print(layer.bias.shape)

# nn.Sigmoid()
# 將線性層的輸出轉換成非線性轉換輸出
# nn.Sigmoid(): 二分類問題中的輸出層，用來將模型的預測值轉換為 0 和 1 之間的機率值


# 定義 Sigmoid 激活函數
sigmoid = nn.Sigmoid()

# 創建一個張量
x = torch.tensor([0.0, 1.0, -1.0, 2.0, -2.0])

# 應用 Sigmoid 函數
output = sigmoid(x)
print(output)

# nn.ReLU(): 用於隱藏層，可加速收斂並減少梯度消失，廣泛應用於深度神經網絡
# 但是，當輸入小於 0 時，梯度會是 0，這可能會導致「死區」問題

# 定義 ReLU 激活函數
relu = nn.ReLU()

# 創建一個張量
x = torch.tensor([0.0, 1.0, -1.0, 2.0, -2.0])

# 應用 ReLU 函數
output = relu(x)
print(output)

# 建立神經網絡的模組
# 寫法一

class MyModel(nn.Module):
    def __init__(self):

        # super() 語句來呼叫父類的初始化方法
        super(MyModel, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(3, 16),  # 從 3 欄位 (a, b, c) 到 16 欄位
            nn.ReLU(),         # 使用 ReLU 激活函數
            nn.Linear(16, 3)   # 從 16 欄位到 3 欄位 (d, e, f)
        )

    # 前向傳播方法，接收輸入 x 並將其傳遞通過 self.net 來計算輸出
    def forward(self, x):
        return self.net(x)

# 寫法二
# 當模型需要更多的控制，或是需要根據條件進行不同的操作（如跳躍連接、不同激活函數等），這種顯式定義方法提供了更多靈活性

class MyModel(nn.Module):
  def __init__(self):
    super(MyModel, self).__init__()
    self.layer1 = nn.Linear(10, 32)
    self.layer2 = nn.Sigmoid(),
    self.layer3 = nn.Linear(32,1)
  def forward(self, x):
    out = self.layer1(x)
    out = self.layer2(out)
    out = self.layer3(out)
    return out

# 均方誤差 (Mean Squared Error, MSE)
criterion = nn.MSELoss()

# 交叉熵 (Cross Entropy)：適用於分類任務。它用來衡量預測的機率分佈與實際標籤之間的差異
criterion = nn.CrossEntropyLoss()

# 計算損失：在獲得模型的輸出後，可以使用損失函數來計算預測值與期望值之間的損失
loss = criterion(model_output, expected_value)

ps reference 類神經網路訓練不起來怎麼辦 (三)

# 隨機梯度下降 (Stochastic Gradient Descent, SGD)
# torch.optim 中的優化算法可以幫助你有效地訓練神經網絡，通過調整參數來最小化損失
# 類神經網路訓練不起來怎麼辦 (三) https://www.youtube.com/watch?v=HYUXEeh3kwY

import torch.optim as optim

# 假設模型已經定義為 model，ir 學習率，momentum 動量
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0)

# 重置梯度： optimizer.zero_grad() 清除模型參數的梯度
# 因為 PyTorch 默認會累加梯度，如果不重置，會導致梯度累加，影響訓練效果

optimizer.zero_grad()

# 反向傳播： 使用 loss.backward() 來計算預測損失的梯度
# 會根據損失函數的值計算出每個參數對於損失的貢獻

loss.backward()

# 更新模型參數： optimizer.step() 根據計算出的梯度來調整模型的參數
# 優化器就會根據梯度下降規則更新每個參數的值，以期減少損失函數的值

optimizer.step()

setup

# setup

import torch
import torch.nn as nn
import torch.optim as optim

class MyDataset(torch.utils.data.Dataset):
#     def __init__(self, file):
#         # 初始化時讀取數據
#         self.data = ...

#     def __len__(self):
#         return len(self.data)

#     def __getitem__(self, idx):
#         # 獲取指定索引的數據
#         a = self.data[idx]['a']
#         b = self.data[idx]['b']
#         c = self.data[idx]['c']
#         d = self.data[idx]['d']
#         f = self.data[idx]['f']

#         # 返回輸入 (a, b, c) 和輸出 (c, d, f)
#         x = torch.tensor([a, b, c], dtype=torch.float32)
#         y = torch.tensor([c, d, f], dtype=torch.float32)
#         return x, y


# 導入文件
file = r""
dataset = MyDataset(file)

# 使用 DataLoader 將數據集包裝起來，設置每個批次的大小為 16，並且在每個 epoch 開始前隨機打亂數據
tr_set = DataLoader(dataset, 16, shuffle=True)

# 初始化模型，移動到(gpu, cpu)
# model = MyModel().to(device) # cpu cuda

# 均方誤差 (Mean Squared Error, MSE)
criterion = nn.MSELoss()

# 測試不同的學習率優化器
learning_rates = [0.001, 0.01, 0.1]
best_loss = float('inf')
best_lr = None

for lr in learning_rates:
    optimizer = optim.SGD(model.parameters(), lr=lr)

    # 迭代訓練epoch
    for epoch in range(100):            # 訓練 100 個 epoch
      model.train()                     # 設置模型為訓練模式
      for x, y in tr_set:               # 迭代數據加載器
        optimizer.zero_grad()           # 清除之前的梯度
        x, y = x.to(device), y.to(device) # cpu cuda
        pred = model(x)                 # 前向傳播，獲取預測結果
        loss = criterion(pred, y)
        loss.backward()                 # 反向傳播計算梯度
        optimizer.step()                # 更新模型

    # 檢查損失
    if loss.item() < best_loss:
        best_loss = loss.item()
        best_lr = lr

print(f"最佳學習率: {best_lr}, 最小化的損失: {best_loss}")

計算損失

# 神經網絡驗證循環
# 平均損失

model.eval()

# 初始化總損失
total_loss = 0

# 驗證集
for x, y in dv_set:
  x, y = x.to(device), y.to(device)

  # 禁用梯度計算，不進行反向傳播和參數更新
  with torch.no_grad():
    # 前向傳播計算輸出
    pred = model(x)
    # 計算損失
    loss = criterion(pred, y)
  # 累積損失
  total_loss += loss.cpu().item() * len(x)
  # 計算平均損失
  avg_loss = total_loss / len(dv_set.dataset)

收集預測結果

# 將預測結果轉回 CPU，添加到 preds 列表中，以便後續分析或評估

model.eval()

# 初始化預測列表
pred = []

for x in tt_set:
  x = x.to(device)

  # 禁用梯度計算
  with torch.no_grad():
    # 前向傳播計算輸出
    pred = model(x)
    # 收集預測結果
    preds.append(pred.cpu())

儲存模型的權重

# torch.save() 保存模型的權重，保存的權重未來可以重新加載模型
# model.state_dict() 會返回模型的所有可學習參數的字典

torch.save(model.state_dict(), 'model_weights.pth')

新資料重新加載模型

# 新資料

# 加載已訓練的模型
model.load_state_dict(torch.load('model_weights.pth'))
model.eval()

# 準備新數據
class MyDataset(torch.utils.data.Dataset):
    def __init__(self, file):
        # 初始化時讀取數據
        self.data = ...

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        # 獲取指定索引的數據
        a = self.data[idx]['a']
        b = self.data[idx]['b']
        c = self.data[idx]['c']

        # 返回輸入 (a, b, c) 和輸出 (c, d, f)
        inputs = torch.tensor([a, b, c], dtype=torch.float32)
        return inputs


new_input_data = inputs
new_input_tensor = torch.tensor(new_input_data, dtype=torch.float32).to(device)

# 禁用梯度計算並進行預測
with torch.no_grad():
    prediction = model(new_input_tensor)

# 將預測結果轉回 CPU
prediction = prediction.cpu().numpy()

#
print(prediction)

torch.max()

# max
import torch

x = torch.tensor([[1, 5, 3], [7, 2, 8]])
max_value = torch.max(x)
print(max_value)

x = torch.tensor([[1, 5, 3], [7, 2, 8]])

max_values, indices = torch.max(x, dim=1)  # 0 1 列 行
print(max_values)  # tensor([5, 8]) -> 每行的最大值
print(indices)     # tensor([1, 2]) -> 最大值的索引

x = torch.tensor([[1, 5, 3], [7, 2, 8]])

max_values, indices = torch.max(x, dim=0)  # 0 1 列 行
print(max_values)  
print(indices)

x = torch.tensor([[1, 5, 3], [7, 2, 8]])
y = torch.tensor([[3, 2, 4], [5, 6, 1]])

result = torch.max(x, y)
print(result)
# tensor([[3, 5, 4], 
#         [7, 6, 8]])  # 逐元素選取較大的值