Pytorch 报错总结

时间:2022-06-20 18:51:17

目前在学习pytorch,自己写了一些例子,在这里记录下来一些报错及总结

1. RuntimeError: Expected object of type torch.FloatTensor but found type torch.cuda.FloatTensor for argument #2 'weight'

详细报错信息

 Traceback (most recent call last):
File "dogvscat-resnet.py", line , in <module>
outputs = net(inputs)
File "/home/lzx/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line , in __call__
result = self.forward(*input, **kwargs)
File "/home/lzx/anaconda3/envs/pytorch/lib/python3.6/site-packages/torchvision-0.2.1-py3.6.egg/torchvision/models/resnet.py", li
ne , in forward
File "/home/lzx/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line , in __call__
result = self.forward(*input, **kwargs)
File "/home/lzx/anaconda3/envs/pytorch/lib/python3.6/site-packages/torch/nn/modules/conv.py", line , in forward
self.padding, self.dilation, self.groups)
RuntimeError: Expected object of type torch.FloatTensor but found type torch.cuda.FloatTensor for argument # 'weight'

参考:https://github.com/wohlert/semi-supervised-pytorch/issues/7

这个报错其实比较隐蔽,用Google搜索的第一页都没什么参考价值,只有上面的这个链接里提醒了我,

在GPU上进行训练时,需要把模型和数据都加上.cuda(),如

model.cuda()

但是对于数据,这个.cuda()并非是inplace操作,就是说不单单是在变量名后面加上.cuda()就可以了

还必须显示的赋值回去,即:

data.cuda()是不行的,而

data = data.cuda()才是可以的。

这样的显示声明的细节非常重要。

示例代码:用LeNet做猫狗的二分类,自己写的代码

请重点关注以下行的写法:46 47 57 58 96 97

 import os
from PIL import Image
import numpy as np
import torch
from torchvision import transforms as T
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torch.utils import data
import torchvision as tv
from torchvision.transforms import ToPILImage
show = ToPILImage() # 可以把Tensor转成Image,方便可视化 transform = T.Compose([
T.Resize(32), # 缩放图片(Image),保持长宽比不变,最短边为224像素
T.CenterCrop(32), # 从图片中间切出224*224的图片
T.ToTensor(), # 将图片(Image)转成Tensor,归一化至[0, 1]
T.Normalize(mean=[.5, .5, .5], std=[.5, .5, .5]) # 标准化至[-1, 1],规定均值和标准差
]) class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16*5*5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 2) def forward(self, x):
x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
x = F.max_pool2d(F.relu(self.conv2(x)), 2)
x = x.view(x.size()[0], -1)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x net = Net()
if torch.cuda.is_available():
print("Using GPU")
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net.to(device) def test():
correct = 0 # 预测正确的图片数
total = 0 # 总共的图片数
# 由于测试的时候不需要求导,可以暂时关闭autograd,提高速度,节约内存
with torch.no_grad():
for data in testloader:
images, labels = data
images = images.to(device)
labels = labels.to(device)
outputs = net(images)
_, predicted = torch.max(outputs, 1)
total += labels.size(0)
correct += (predicted == labels).sum() print('Accuracy in the test dataset: %.1f %%' % (100 * correct / total)) train_dataset = ImageFolder('/home/lzx/datasets/dogcat/sub-train/', transform=transform)
test_dataset = ImageFolder('/home/lzx/datasets/dogcat/sub-test/', transform=transform)
# dataset = DogCat('/home/lzx/datasets/dogcat/sub-train/', transforms=transform)
# train_dataset = ImageFolder('/Users/lizhixuan/PycharmProjects/pytorch_learning/Chapter5/sub-train/', transform=transform)
# test_dataset = ImageFolder('/Users/lizhixuan/PycharmProjects/pytorch_learning/Chapter5/sub-test/', transform=transform) trainloader = torch.utils.data.DataLoader(
train_dataset,
batch_size=512,
shuffle=True,
num_workers=4)
testloader = torch.utils.data.DataLoader(
test_dataset,
batch_size=512,
shuffle=False,
num_workers=4)
classes = ('cat', 'dog') criterion = nn.CrossEntropyLoss() # 交叉熵损失函数
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) print("Starting to train")
torch.set_num_threads(8)
for epoch in range(1000): running_loss = 0.0
for i, data in enumerate(trainloader, 0): # 输入数据
inputs, labels = data
inputs = inputs.to(device)
labels = labels.to(device) # 梯度清零
optimizer.zero_grad() # forward + backward
outputs = net(inputs)
loss = criterion(outputs, labels)
# print("outputs %s labels %s" % (outputs, labels))
loss.backward() # 更新参数
optimizer.step() # 打印log信息
# loss 是一个scalar,需要使用loss.item()来获取数值,不能使用loss[0]
running_loss += loss.item()
print_gap = 10
if i % print_gap == (print_gap-1): # 每1000个batch打印一下训练状态
print('[%d, %5d] loss: %.3f' \
% (epoch+1, i+1, running_loss / print_gap))
running_loss = 0.0
test()
print('Finished Training')

这样一来,就完全明白了如何把代码放在GPU上运行了,哈哈