ptxas fatal : Unresolved extern function Error 255

时间:2023-03-10 04:59:31
ptxas fatal   : Unresolved extern function  Error 255

This question already has an answer here:

I am trying to understand how to decouple CUDA __device__ codes in separate header files.

I have three files.

File: : int2.cuh

#ifndef INT2_H_
#define INT2_H_ #include "cuda.h"
#include "cuda_runtime.h"
#include "device_launch_parameters.h" __global__ void kernel();
__device__ int k2(int k); int launchKernel(int dim); #endif /* INT2_H_ */
File : int2.cu #include "int2.cuh"
#include "cstdio" __global__ void kernel() {
int tid = threadIdx.x;
printf("%d\n", k2(tid));
} __device__ int k2(int i) {
return i * i;
} int launchKernel(int dim) {
kernel<<<, dim>>>();
cudaDeviceReset();
return ;
}
File : CUDASample.cu include <stdio.h>
#include <stdlib.h>
#include "int2.cuh"
#include "iostream" using namespace std; static const int WORK_SIZE = ; __global__ void sampleCuda() {
int tid = threadIdx.x;
// printf("%d\n", k2(tid)); //Can not call k2
printf("%d\n", tid * tid);
} int main(void) { int var;
var = launchKernel(); kernel<<<, >>>();
cudaDeviceReset(); sampleCuda<<<, >>>();
cudaDeviceReset(); return ;
}

The code works file. I can call the sampleCuda() kernel (in same file), call the C function launchKernel() (in other file), and call kernel() directly (in other file).

The problem I am facing is calling the __device__ function from sampleCuda() kernel. then it shows the following error. However, the same function is callable in kernel().

:: **** Incremental Build of configuration Debug for project CUDASample ****
make all
Building file: ../src/CUDASample.cu
Invoking: NVCC Compiler
/Developer/NVIDIA/CUDA-6.5/bin/nvcc -G -g -O0 -gencode arch=compute_20,code=sm_20 -odir "src" -M -o "src/CUDASample.d" "../src/CUDASample.cu"
/Developer/NVIDIA/CUDA-6.5/bin/nvcc -G -g -O0 --compile --relocatable-device-code=false -gencode arch=compute_20,code=compute_20 -gencode arch=compute_20,code=sm_20 -x cu -o "src/CUDASample.o" "../src/CUDASample.cu"
../src/CUDASample.cu(): warning: variable "var" was set but never used ../src/CUDASample.cu(): warning: variable "WORK_SIZE" was declared but never referenced ../src/CUDASample.cu(): warning: variable "var" was set but never used ../src/CUDASample.cu(): warning: variable "WORK_SIZE" was declared but never referenced ptxas fatal : Unresolved extern function '_Z2k2i'
make: *** [src/CUDASample.o] Error :: Build Finished (took 2s.388ms)


上面这个错误的解决方法都是:需要分开编译文件,对应的编译参数为--relocatable-device-code={true,false}将false 改为 true
nsight 的设置的位置为:
In Nsight Eclipse, the option is available as the radio button "Separate compilation" under Project > Properties > Build > Settings > CUDA > switch to advanced mode 里面勾选就可以了。
VS的设置位置为:刚才突然想写。。找不到了。。具体google 搜索吧,best wishes....