技術の家庭菜園

https://tpcbtw.com/

Adapting rmm-diis.cu in VASP5 to CUDA Version 10

This report was created with the assistance of ChatGPT.

Background and Purpose

VASP5 is a widely used program for electronic structure calculations and was created with CUDA version 8 in mind. However, with the introduction of CUDA 10 and later, changes in the use of texture memory have led to build errors. The purpose of this report is to modify the description in rmm-diis.cu to ensure compatibility with CUDA 10 and later versions.

Environment

VASP version: 5.4.4
OS: WSL2-Docker(nvidia/cuda:12.0.0-cudnn8-devel-ubuntu22.04)
CUDA: 12.0
Compiler: oneAPI 2021.9
GPU: NVIDIA GTX TITAN V
CPU: AMD Ryzen 7 2700X

Build Error Details

The error is caused by the outdated use of CUDA texture memory. As CUDA versions were updated, the usage of texture memory changed, but the old texture memory usage method remained in rmm-diis.cu, causing the build errors.

rmm-diis.cu(45): error: texture is not a template
rmm-diis.cu(46): error: texture is not a template
rmm-diis.cu(293): error: identifier "cudaBindTexture" is undefined
rmm-diis.cu(301): error: identifier "cudaUnbindTexture" is undefined
rmm-diis.cu(386): error: identifier "cudaUnbindTexture" is undefined

Correction

We updated the definition and usage of texture memory to be compatible with CUDA 10 and later versions. The specific changes are as follows.

  1. Change to using texture objects
  2. Discontinue use of cudaBindTexture and cudaUnbindTexture, replacing them with cudaCreateTextureObject and cudaDestroyTextureObject
--- rmm-diis.cu_org
+++ rmm-diis.cu
@@ -42,8 +42,8 @@
 };

 #define LOCAL_CONTRIBV2_THREADS 512
-texture<uint2> texX;
-texture<uint2> texY;
+cudaTextureObject_t texX;
+cudaTextureObject_t texY;

 template<bool, bool>
 __global__ void local_contribution_gld_main (struct localContributionParams parms);
@@ -290,21 +290,31 @@
     //TODO double precision texture
     useTexture = 0;
     if (useTexture) {
-        if ((cudaStat=cudaBindTexture (&texXOfs,texX,W1_CR,sizeX*sizeof(W1_CR[0]))) !=
-            cudaSuccess) {
-            //cublasSetError (ctx, CUBLAS_STATUS_MAPPING_ERROR);
-            printf ("CUBLAS_STATUS_MAPPING_ERROR IN LINE %d\n",__LINE__);
-           exit(-1);
+        cudaResourceDesc resDescX, resDescY;
+        memset(&resDescX, 0, sizeof(resDescX));
+        memset(&resDescY, 0, sizeof(resDescY));
+        resDescX.resType = cudaResourceTypeLinear;
+        resDescY.resType = cudaResourceTypeLinear;
+        resDescX.res.linear.devPtr = W1_CR;
+        resDescY.res.linear.devPtr = W2_CR;
+        resDescX.res.linear.desc = cudaCreateChannelDesc<uint2>();
+        resDescY.res.linear.desc = cudaCreateChannelDesc<uint2>();
+        resDescX.res.linear.sizeInBytes = sizeX*sizeof(W1_CR[0]);
+        resDescY.res.linear.sizeInBytes = sizeY*sizeof(W2_CR[0]);
+
+        cudaTextureDesc texDesc;
+        memset(&texDesc, 0, sizeof(texDesc));
+        texDesc.readMode = cudaReadModeElementType;
+
+        if ((cudaStat=cudaCreateTextureObject(&texX, &resDescX, &texDesc, NULL)) != cudaSuccess) {
+            printf("CUDA ERROR IN LINE %d  Error: %d %s\n", __LINE__, cudaStat, cudaGetErrorString(cudaStat));
+            exit(-1);
         }
-        if ((cudaStat=cudaBindTexture (&texYOfs,texY,W2_CR,sizeY*sizeof(W2_CR[0]))) !=
-            cudaSuccess) {
-            cudaUnbindTexture (texX);
-            //cublasSetError (ctx, CUBLAS_STATUS_MAPPING_ERROR);
-            printf ("CUBLAS_STATUS_MAPPING_ERROR IN LINE %d\n",__LINE__);
-           exit(-1);
+        if ((cudaStat=cudaCreateTextureObject(&texY, &resDescY, &texDesc, NULL)) != cudaSuccess) {
+            cudaDestroyTextureObject(texX);
+            printf("CUDA ERROR IN LINE %d  Error: %d %s\n", __LINE__, cudaStat, cudaGetErrorString(cudaStat));
+            exit(-1);
         }
-        texXOfs /= sizeof(W1_CR[0]);
-        texYOfs /= sizeof(W2_CR[0]);
     }
     /* allocate memory to collect results, one per CTA */
     //printf("nbrCtas = %d\n",nbrCtas);
@@ -383,12 +393,12 @@


     if (useTexture) {
-        if ((cudaStat = cudaUnbindTexture (texX)) != cudaSuccess) {
-            printf ("CUDA ERROR IN LINE %d  Error: %d %s\n", __LINE__, cudaStat, cudaGetErrorString(cudaStat));
+        if ((cudaStat = cudaDestroyTextureObject(texX)) != cudaSuccess) {
+            printf("CUDA ERROR IN LINE %d  Error: %d %s\n", __LINE__, cudaStat, cudaGetErrorString(cudaStat));
             exit(-1);
         }
-        if ((cudaStat = cudaUnbindTexture (texY)) != cudaSuccess) {
-            printf ("CUDA ERROR IN LINE %d  Error: %d %s\n", __LINE__, cudaStat, cudaGetErrorString(cudaStat));
+        if ((cudaStat = cudaDestroyTextureObject(texY)) != cudaSuccess) {
+            printf("CUDA ERROR IN LINE %d  Error: %d %s\n", __LINE__, cudaStat, cudaGetErrorString(cudaStat));
             exit(-1);
         }
     }