• 张量属于可变对象

# torch.dot(x, y) -> Tensor

用于求向量点积,即相同位置乘积之和。其结果与 ** torch.sum (x * y) ** 相同。

import torch
x = torch.tensor([1,2,3])
y = torch.tensor([1,2,3])
print(torch.dot(x, y))
print(x * y)
print(torch.sum(x * y))
tensor(14)
tensor([1, 4, 9])
tensor(14)

# torch.mv(x, y) -> Tensor

求矩阵 x 和向量 y 的 ** 矩阵 - 向量积 **。

import torch
x = torch.tensor([1,2,3])
y = torch.arange(15).reshape(5,3)
print(y)
print(y * x)
print(torch.mv(y, x))
tensor([[ 0,  1,  2],
        [ 3,  4,  5],
        [ 6,  7,  8],
        [ 9, 10, 11],
        [12, 13, 14]])
tensor([[ 0,  2,  6],
        [ 3,  8, 15],
        [ 6, 14, 24],
        [ 9, 20, 33],
        [12, 26, 42]])
tensor([ 8, 26, 44, 62, 80])

# torch.rand() -> Tensor

该函数返回一个填充了区间 [0, 1] 上均匀分布的随机数的张量。

# 自动求导(自动微分机制)

import torch
x = torch.arange(4.0, requires_grad=True)
print(x)
print(x.grad)  # 默认为 None
y = 2 * torch.dot(x, x)
y.backward()
print(x.grad)
tensor([0., 1., 2., 3.], requires_grad=True)
None
tensor([ 0.,  4.,  8., 12.])

其中 ** x = torch.arange (4.0, requires_grad=True) ** 可以用示例代替:

x = torch.arange(4.0)
x.requires_grad_(True)

当你需要计算其他函数时:

# pytorch 默认会累积梯度,因此需要清除之前得到的值
x.grad.zero_()
y = x.sum()
y.backward()
print(x.grad)

# 多层感知机

# 多层感知机的手动实现

import torch
from torch import nn
from util import deep
batch_size = 256
train_iter, test_iter = deep.load_data_mnist(batch_size)
num_inputs, num_outputs, num_hidden1, num_hidden2 = 784, 10, 256, 128
w1 = nn.Parameter(torch.rand(num_inputs, num_hidden1, requires_grad=True) * 0.01)
b1 = nn.Parameter(torch.zeros(num_hidden1, requires_grad=True))
w2 = nn.Parameter(torch.rand(num_hidden1, num_hidden2, requires_grad=True) * 0.01)
b2 = nn.Parameter(torch.zeros(num_hidden2, requires_grad=True))
w3 = nn.Parameter(torch.rand(num_hidden2, num_outputs, requires_grad=True) * 0.01)
b3 = nn.Parameter(torch.zeros(num_outputs, requires_grad=True))
params = [w1, b1, w2, b2, w3, b3]
def relu(x):
    a = torch.zeros_like(x)
    return torch.max(x, a)
def net(x):
    x = x.reshape(-1, num_inputs)
    h = relu(x @ w1 + b1)
    h2 = relu(h @ w2 + b2)
    return h2 @ w3 + b3
loss = nn.CrossEntropyLoss(reduction='none')
num_epochs, lr = 10, 0.1
updater = torch.optim.SGD(params, lr=lr)
if __name__ == '__main__':
    deep.train_ch3(net, train_iter, test_iter, loss, num_epochs, updater)
    deep.predict(net, test_iter)

# 多层感知机的简洁实现

import torch
from torch import nn
from util import deep
net = nn.Sequential(
    nn.Flatten(),
    nn.Linear(784, 256),
    nn.ReLU(),
    nn.Linear(256, 128),
    nn.ReLU(),
    nn.Linear(128, 10)
)
def init_weights(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, std=0.01)
net.apply(init_weights)
loss = nn.CrossEntropyLoss(reduction='none')
num_epochs, lr, batch_size = 10, 0.1, 256
trainer = torch.optim.SGD(net.parameters(), lr=lr)
train_iter, test_iter = deep.load_data_mnist(batch_size)
if __name__ == '__main__':
    deep.train_ch3(net, train_iter, test_iter, loss, num_epochs, trainer)
    deep.predict(net, test_iter)

# 权重衰减

# 权重衰减的手动实现

import torch
from util import deep
n_train, n_test, num_inputs, batch_size = 20, 100, 200, 5
true_w, true_b = torch.ones((num_inputs, 1)) * 0.01, 0.05
train_data = deep.synthetic_data(true_w, true_b, n_train)
train_iter = deep.load_array(train_data, batch_size)
test_data = deep.synthetic_data(true_w, true_b, n_test)
test_iter = deep.load_array(test_data, batch_size, is_train=False)
def init_params():
    w = torch.normal(0, 1, size=(num_inputs, 1), requires_grad=True)
    b = torch.zeros(1, requires_grad=True)
    return [w, b]
def l2_penalty(w):
    """L2范数惩罚"""
    return torch.sum(w.pow(2)) / 2
def train(lambd):
    w, b = init_params()
    net, loss = lambda X: deep.linreg(X, w, b), deep.squared_loss
    num_epochs, lr = 100, 0.003
    animator = deep.Animator(xlabel='epochs', ylabel='loss', yscale='log', xlim=[5, num_epochs],
                             legend=['train', 'test'])
    for epoch in range(num_epochs):
        for x, y in train_iter:
            l = loss(net(x), y) + lambd * l2_penalty(w)
            l.sum().backward()
            deep.sgd([w, b], lr, batch_size)
        if (epoch + 1) % 5 == 0:
            animator.add(epoch + 1, (deep.evaluate_loss(net, train_iter, loss),
                                     deep.evaluate_loss(net, test_iter, loss)))
    print('w的L2的范数是:', torch.norm(w).item())
if __name__ == '__main__':
	train(0) # 忽略正则化直接训练
    train(3) # 使用权重衰减

# 权重衰减的简洁实现

import torch
from torch import nn
from util import deep
n_train, n_test, num_inputs, batch_size = 20, 100, 200, 5
true_w, true_b = torch.ones((num_inputs, 1)) * 0.01, 0.05
train_data = deep.synthetic_data(true_w, true_b, n_train)
train_iter = deep.load_array(train_data, batch_size)
test_data = deep.synthetic_data(true_w, true_b, n_test)
test_iter = deep.load_array(test_data, batch_size, is_train=False)
def train_concise(wd):
    net = nn.Sequential(nn.Linear(num_inputs, 1))
    for param in net.parameters():
        param.data.normal_()
    loss = nn.MSELoss(reduction='none')
    num_epochs, lr = 100, 0.003
    trainer = torch.optim.SGD([
        {'params': net[0].weight, 'weight_decay': wd},
        {'params': net[0].bias}
    ], lr=lr)
    animator = deep.Animator(xlabel='epochs', ylabel='loss', yscale='log', xlim=[5, num_epochs],
                             legend=['train', 'test'])
    for epoch in range(num_epochs):
        for x, y in train_iter:
            trainer.zero_grad()
            l = loss(net(x), y)
            l.mean().backward()
            trainer.step()
        if (epoch + 1) % 5 == 0:
            animator.add(epoch + 1, (deep.evaluate_loss(net, train_iter, loss),
                                     deep.evaluate_loss(net, test_iter, loss)))
    print('w的L2的范数是:', net[0].weight.norm().item())
if __name__ == '__main__':
	train_concise(0) # 忽略正则化直接训练
    train_concise(3) # 使用权重衰减

# 暂退法

# 暂退法的手动实现

import math
import numpy as np
import torch
from torch import nn
from util import deep
def dropout_layer(x, dropout):
    assert 0 <= dropout <= 1
    if dropout == 1:
        return torch.zeros_like(x)
    if dropout == 0:
        return x
    mask = (torch.rand(x.shape) > dropout).float()
    return mask * x / (1.0 - dropout)
num_inputs, num_outputs, num_hidden1, num_hidden2 = 784, 10, 256, 256
dropout1, dropout2 = 0.2, 0.5
class Net(nn.Module):
    def __init__(self, num_inputs, num_outputs, num_hidden1, num_hidden2, is_training=True):
        super().__init__()
        self.num_inputs = num_inputs
        self.training = is_training
        self.lin1 = nn.Linear(num_inputs, num_hidden1)
        self.lin2 = nn.Linear(num_hidden1, num_hidden2)
        self.lin3 = nn.Linear(num_hidden2, num_outputs)
        self.relu = nn.ReLU()
    def forward(self, x):
        h1 = self.relu(self.lin1(x.reshape((-1, self.num_inputs))))
        if self.training:
            h1 = dropout_layer(h1, dropout1)
        h2 = self.relu(self.lin2(h1))
        if self.training:
            h2 = dropout_layer(h2, dropout2)
        out = self.lin3(h2)
        return out
net = Net(num_inputs, num_outputs, num_hidden1, num_hidden2)
num_epochs, lr, batch_size = 10, 0.5, 256
loss = nn.CrossEntropyLoss(reduction='none')
train_iter, test_iter = deep.load_data_mnist(batch_size)
trainer = torch.optim.SGD(net.parameters(), lr=lr)
if __name__ == '__main__':
    deep.train_ch3(net, train_iter, test_iter, loss, num_epochs, trainer)

# 暂退法的简洁实现

import torch
from torch import nn
from util import deep
def dropout_layer(x, dropout):
    assert 0 <= dropout <= 1
    if dropout == 1:
        return torch.zeros_like(x)
    if dropout == 0:
        return x
    mask = (torch.rand(x.shape) > dropout).float()
    return mask * x / (1.0 - dropout)
num_inputs, num_outputs, num_hidden1, num_hidden2 = 784, 10, 256, 256
dropout1, dropout2 = 0.5, 0.2
net = nn.Sequential(
    nn.Flatten(),
    nn.Linear(784, 256),
    nn.ReLU(),
    nn.Dropout(dropout1),
    nn.Linear(256, 256),
    nn.ReLU(),
    nn.Dropout(dropout2),
    nn.Linear(256, 10)
)
def init_weight(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, std=0.01)
net.apply(init_weight)
num_epochs, lr, batch_size = 10, 0.5, 256
loss = nn.CrossEntropyLoss(reduction='none')
train_iter, test_iter = deep.load_data_mnist(batch_size)
trainer = torch.optim.SGD(net.parameters(), lr=lr)
if __name__ == '__main__':
    deep.train_ch3(net, train_iter, test_iter, loss, num_epochs, trainer)
更新于 阅读次数

请我喝[茶]~( ̄▽ ̄)~*

ASXE 微信支付

微信支付

ASXE 支付宝

支付宝

ASXE 贝宝

贝宝