- 张量属于可变对象
# torch.dot(x, y) -> Tensor
用于求向量点积,即相同位置乘积之和。其结果与 ** torch.sum (x * y) ** 相同。
import torch | |
x = torch.tensor([1,2,3]) | |
y = torch.tensor([1,2,3]) | |
print(torch.dot(x, y)) | |
print(x * y) | |
print(torch.sum(x * y)) |
tensor(14) | |
tensor([1, 4, 9]) | |
tensor(14) |
# torch.mv(x, y) -> Tensor
求矩阵 x 和向量 y 的 ** 矩阵 - 向量积 **。
import torch | |
x = torch.tensor([1,2,3]) | |
y = torch.arange(15).reshape(5,3) | |
print(y) | |
print(y * x) | |
print(torch.mv(y, x)) |
tensor([[ 0, 1, 2], | |
[ 3, 4, 5], | |
[ 6, 7, 8], | |
[ 9, 10, 11], | |
[12, 13, 14]]) | |
tensor([[ 0, 2, 6], | |
[ 3, 8, 15], | |
[ 6, 14, 24], | |
[ 9, 20, 33], | |
[12, 26, 42]]) | |
tensor([ 8, 26, 44, 62, 80]) |
# torch.rand() -> Tensor
该函数返回一个填充了区间 [0, 1] 上均匀分布的随机数的张量。
# 自动求导(自动微分机制)
import torch | |
x = torch.arange(4.0, requires_grad=True) | |
print(x) | |
print(x.grad) # 默认为 None | |
y = 2 * torch.dot(x, x) | |
y.backward() | |
print(x.grad) |
tensor([0., 1., 2., 3.], requires_grad=True) | |
None | |
tensor([ 0., 4., 8., 12.]) |
其中 ** x = torch.arange (4.0, requires_grad=True) ** 可以用示例代替:
x = torch.arange(4.0) | |
x.requires_grad_(True) |
当你需要计算其他函数时:
# pytorch 默认会累积梯度,因此需要清除之前得到的值 | |
x.grad.zero_() | |
y = x.sum() | |
y.backward() | |
print(x.grad) |
# 多层感知机
# 多层感知机的手动实现
import torch | |
from torch import nn | |
from util import deep | |
batch_size = 256 | |
train_iter, test_iter = deep.load_data_mnist(batch_size) | |
num_inputs, num_outputs, num_hidden1, num_hidden2 = 784, 10, 256, 128 | |
w1 = nn.Parameter(torch.rand(num_inputs, num_hidden1, requires_grad=True) * 0.01) | |
b1 = nn.Parameter(torch.zeros(num_hidden1, requires_grad=True)) | |
w2 = nn.Parameter(torch.rand(num_hidden1, num_hidden2, requires_grad=True) * 0.01) | |
b2 = nn.Parameter(torch.zeros(num_hidden2, requires_grad=True)) | |
w3 = nn.Parameter(torch.rand(num_hidden2, num_outputs, requires_grad=True) * 0.01) | |
b3 = nn.Parameter(torch.zeros(num_outputs, requires_grad=True)) | |
params = [w1, b1, w2, b2, w3, b3] | |
def relu(x): | |
a = torch.zeros_like(x) | |
return torch.max(x, a) | |
def net(x): | |
x = x.reshape(-1, num_inputs) | |
h = relu(x @ w1 + b1) | |
h2 = relu(h @ w2 + b2) | |
return h2 @ w3 + b3 | |
loss = nn.CrossEntropyLoss(reduction='none') | |
num_epochs, lr = 10, 0.1 | |
updater = torch.optim.SGD(params, lr=lr) | |
if __name__ == '__main__': | |
deep.train_ch3(net, train_iter, test_iter, loss, num_epochs, updater) | |
deep.predict(net, test_iter) |
# 多层感知机的简洁实现
import torch | |
from torch import nn | |
from util import deep | |
net = nn.Sequential( | |
nn.Flatten(), | |
nn.Linear(784, 256), | |
nn.ReLU(), | |
nn.Linear(256, 128), | |
nn.ReLU(), | |
nn.Linear(128, 10) | |
) | |
def init_weights(m): | |
if type(m) == nn.Linear: | |
nn.init.normal_(m.weight, std=0.01) | |
net.apply(init_weights) | |
loss = nn.CrossEntropyLoss(reduction='none') | |
num_epochs, lr, batch_size = 10, 0.1, 256 | |
trainer = torch.optim.SGD(net.parameters(), lr=lr) | |
train_iter, test_iter = deep.load_data_mnist(batch_size) | |
if __name__ == '__main__': | |
deep.train_ch3(net, train_iter, test_iter, loss, num_epochs, trainer) | |
deep.predict(net, test_iter) |
# 权重衰减
# 权重衰减的手动实现
import torch | |
from util import deep | |
n_train, n_test, num_inputs, batch_size = 20, 100, 200, 5 | |
true_w, true_b = torch.ones((num_inputs, 1)) * 0.01, 0.05 | |
train_data = deep.synthetic_data(true_w, true_b, n_train) | |
train_iter = deep.load_array(train_data, batch_size) | |
test_data = deep.synthetic_data(true_w, true_b, n_test) | |
test_iter = deep.load_array(test_data, batch_size, is_train=False) | |
def init_params(): | |
w = torch.normal(0, 1, size=(num_inputs, 1), requires_grad=True) | |
b = torch.zeros(1, requires_grad=True) | |
return [w, b] | |
def l2_penalty(w): | |
"""L2范数惩罚""" | |
return torch.sum(w.pow(2)) / 2 | |
def train(lambd): | |
w, b = init_params() | |
net, loss = lambda X: deep.linreg(X, w, b), deep.squared_loss | |
num_epochs, lr = 100, 0.003 | |
animator = deep.Animator(xlabel='epochs', ylabel='loss', yscale='log', xlim=[5, num_epochs], | |
legend=['train', 'test']) | |
for epoch in range(num_epochs): | |
for x, y in train_iter: | |
l = loss(net(x), y) + lambd * l2_penalty(w) | |
l.sum().backward() | |
deep.sgd([w, b], lr, batch_size) | |
if (epoch + 1) % 5 == 0: | |
animator.add(epoch + 1, (deep.evaluate_loss(net, train_iter, loss), | |
deep.evaluate_loss(net, test_iter, loss))) | |
print('w的L2的范数是:', torch.norm(w).item()) | |
if __name__ == '__main__': | |
train(0) # 忽略正则化直接训练 | |
train(3) # 使用权重衰减 |
# 权重衰减的简洁实现
import torch | |
from torch import nn | |
from util import deep | |
n_train, n_test, num_inputs, batch_size = 20, 100, 200, 5 | |
true_w, true_b = torch.ones((num_inputs, 1)) * 0.01, 0.05 | |
train_data = deep.synthetic_data(true_w, true_b, n_train) | |
train_iter = deep.load_array(train_data, batch_size) | |
test_data = deep.synthetic_data(true_w, true_b, n_test) | |
test_iter = deep.load_array(test_data, batch_size, is_train=False) | |
def train_concise(wd): | |
net = nn.Sequential(nn.Linear(num_inputs, 1)) | |
for param in net.parameters(): | |
param.data.normal_() | |
loss = nn.MSELoss(reduction='none') | |
num_epochs, lr = 100, 0.003 | |
trainer = torch.optim.SGD([ | |
{'params': net[0].weight, 'weight_decay': wd}, | |
{'params': net[0].bias} | |
], lr=lr) | |
animator = deep.Animator(xlabel='epochs', ylabel='loss', yscale='log', xlim=[5, num_epochs], | |
legend=['train', 'test']) | |
for epoch in range(num_epochs): | |
for x, y in train_iter: | |
trainer.zero_grad() | |
l = loss(net(x), y) | |
l.mean().backward() | |
trainer.step() | |
if (epoch + 1) % 5 == 0: | |
animator.add(epoch + 1, (deep.evaluate_loss(net, train_iter, loss), | |
deep.evaluate_loss(net, test_iter, loss))) | |
print('w的L2的范数是:', net[0].weight.norm().item()) | |
if __name__ == '__main__': | |
train_concise(0) # 忽略正则化直接训练 | |
train_concise(3) # 使用权重衰减 |
# 暂退法
# 暂退法的手动实现
import math | |
import numpy as np | |
import torch | |
from torch import nn | |
from util import deep | |
def dropout_layer(x, dropout): | |
assert 0 <= dropout <= 1 | |
if dropout == 1: | |
return torch.zeros_like(x) | |
if dropout == 0: | |
return x | |
mask = (torch.rand(x.shape) > dropout).float() | |
return mask * x / (1.0 - dropout) | |
num_inputs, num_outputs, num_hidden1, num_hidden2 = 784, 10, 256, 256 | |
dropout1, dropout2 = 0.2, 0.5 | |
class Net(nn.Module): | |
def __init__(self, num_inputs, num_outputs, num_hidden1, num_hidden2, is_training=True): | |
super().__init__() | |
self.num_inputs = num_inputs | |
self.training = is_training | |
self.lin1 = nn.Linear(num_inputs, num_hidden1) | |
self.lin2 = nn.Linear(num_hidden1, num_hidden2) | |
self.lin3 = nn.Linear(num_hidden2, num_outputs) | |
self.relu = nn.ReLU() | |
def forward(self, x): | |
h1 = self.relu(self.lin1(x.reshape((-1, self.num_inputs)))) | |
if self.training: | |
h1 = dropout_layer(h1, dropout1) | |
h2 = self.relu(self.lin2(h1)) | |
if self.training: | |
h2 = dropout_layer(h2, dropout2) | |
out = self.lin3(h2) | |
return out | |
net = Net(num_inputs, num_outputs, num_hidden1, num_hidden2) | |
num_epochs, lr, batch_size = 10, 0.5, 256 | |
loss = nn.CrossEntropyLoss(reduction='none') | |
train_iter, test_iter = deep.load_data_mnist(batch_size) | |
trainer = torch.optim.SGD(net.parameters(), lr=lr) | |
if __name__ == '__main__': | |
deep.train_ch3(net, train_iter, test_iter, loss, num_epochs, trainer) |
# 暂退法的简洁实现
import torch | |
from torch import nn | |
from util import deep | |
def dropout_layer(x, dropout): | |
assert 0 <= dropout <= 1 | |
if dropout == 1: | |
return torch.zeros_like(x) | |
if dropout == 0: | |
return x | |
mask = (torch.rand(x.shape) > dropout).float() | |
return mask * x / (1.0 - dropout) | |
num_inputs, num_outputs, num_hidden1, num_hidden2 = 784, 10, 256, 256 | |
dropout1, dropout2 = 0.5, 0.2 | |
net = nn.Sequential( | |
nn.Flatten(), | |
nn.Linear(784, 256), | |
nn.ReLU(), | |
nn.Dropout(dropout1), | |
nn.Linear(256, 256), | |
nn.ReLU(), | |
nn.Dropout(dropout2), | |
nn.Linear(256, 10) | |
) | |
def init_weight(m): | |
if type(m) == nn.Linear: | |
nn.init.normal_(m.weight, std=0.01) | |
net.apply(init_weight) | |
num_epochs, lr, batch_size = 10, 0.5, 256 | |
loss = nn.CrossEntropyLoss(reduction='none') | |
train_iter, test_iter = deep.load_data_mnist(batch_size) | |
trainer = torch.optim.SGD(net.parameters(), lr=lr) | |
if __name__ == '__main__': | |
deep.train_ch3(net, train_iter, test_iter, loss, num_epochs, trainer) |