技術の家庭菜園

https://tpcbtw.com/

VASP5におけるrmm-diis.cuのCUDAバージョン10への対応

このレポートはChatGPTの支援により作成されました。

背景・目的
環境
ビルドエラー内容
修正

背景・目的

VASP5は電子構造計算に広く使われるプログラムであり、CUDAバージョン8を前提として作成されている。しかし、近年のCUDA10以降、テクスチャメモリの使用方法変更に伴い、ビルドエラーが発生するようになった。本レポートでは、rmm-diis.cuにおける記述の修正を行い、CUDA10以降のバージョンとの互換性を確保することを目的とする。

環境

VASP version: 5.4.4
OS: WSL2-Docker(nvidia/cuda:12.0.0-cudnn8-devel-ubuntu22.04)
CUDA: 12.0
Compiler: oneAPI 2021.9
GPU: NVIDIA GTX TITAN V
CPU: AMD Ryzen 7 2700X

ビルドエラー内容

エラーは、古いCUDAのテクスチャメモリ使用方法が原因で発生している。CUDAバージョンがアップデートされると、テクスチャメモリの使用方法が変わったが、rmm-diis.cuでは旧式のテクスチャメモリ使用方法が残っており、それがビルドエラーの原因である。

rmm-diis.cu(45): error: texture is not a template
rmm-diis.cu(46): error: texture is not a template
rmm-diis.cu(293): error: identifier "cudaBindTexture" is undefined
rmm-diis.cu(301): error: identifier "cudaUnbindTexture" is undefined
rmm-diis.cu(386): error: identifier "cudaUnbindTexture" is undefined

修正

テクスチャメモリの定義と使用方法を更新し、CUDA 10以降のバージョンに対応するように修正した。具体的な変更点は以下の通りである。

テクスチャオブジェクトの使用に変更
cudaBindTextureとcudaUnbindTextureの使用を廃止し、cudaCreateTextureObjectとcudaDestroyTextureObjectに置き換え

--- rmm-diis.cu_org
+++ rmm-diis.cu
@@ -42,8 +42,8 @@
 };

 #define LOCAL_CONTRIBV2_THREADS 512
-texture<uint2> texX;
-texture<uint2> texY;
+cudaTextureObject_t texX;
+cudaTextureObject_t texY;

 template<bool, bool>
 __global__ void local_contribution_gld_main (struct localContributionParams parms);
@@ -290,21 +290,31 @@
     //TODO double precision texture
     useTexture = 0;
     if (useTexture) {
-        if ((cudaStat=cudaBindTexture (&texXOfs,texX,W1_CR,sizeX*sizeof(W1_CR[0]))) !=
-            cudaSuccess) {
-            //cublasSetError (ctx, CUBLAS_STATUS_MAPPING_ERROR);
-            printf ("CUBLAS_STATUS_MAPPING_ERROR IN LINE %d\n",__LINE__);
-           exit(-1);
+        cudaResourceDesc resDescX, resDescY;
+        memset(&resDescX, 0, sizeof(resDescX));
+        memset(&resDescY, 0, sizeof(resDescY));
+        resDescX.resType = cudaResourceTypeLinear;
+        resDescY.resType = cudaResourceTypeLinear;
+        resDescX.res.linear.devPtr = W1_CR;
+        resDescY.res.linear.devPtr = W2_CR;
+        resDescX.res.linear.desc = cudaCreateChannelDesc<uint2>();
+        resDescY.res.linear.desc = cudaCreateChannelDesc<uint2>();
+        resDescX.res.linear.sizeInBytes = sizeX*sizeof(W1_CR[0]);
+        resDescY.res.linear.sizeInBytes = sizeY*sizeof(W2_CR[0]);
+
+        cudaTextureDesc texDesc;
+        memset(&texDesc, 0, sizeof(texDesc));
+        texDesc.readMode = cudaReadModeElementType;
+
+        if ((cudaStat=cudaCreateTextureObject(&texX, &resDescX, &texDesc, NULL)) != cudaSuccess) {
+            printf("CUDA ERROR IN LINE %d  Error: %d %s\n", __LINE__, cudaStat, cudaGetErrorString(cudaStat));
+            exit(-1);
         }
-        if ((cudaStat=cudaBindTexture (&texYOfs,texY,W2_CR,sizeY*sizeof(W2_CR[0]))) !=
-            cudaSuccess) {
-            cudaUnbindTexture (texX);
-            //cublasSetError (ctx, CUBLAS_STATUS_MAPPING_ERROR);
-            printf ("CUBLAS_STATUS_MAPPING_ERROR IN LINE %d\n",__LINE__);
-           exit(-1);
+        if ((cudaStat=cudaCreateTextureObject(&texY, &resDescY, &texDesc, NULL)) != cudaSuccess) {
+            cudaDestroyTextureObject(texX);
+            printf("CUDA ERROR IN LINE %d  Error: %d %s\n", __LINE__, cudaStat, cudaGetErrorString(cudaStat));
+            exit(-1);
         }
-        texXOfs /= sizeof(W1_CR[0]);
-        texYOfs /= sizeof(W2_CR[0]);
     }
     /* allocate memory to collect results, one per CTA */
     //printf("nbrCtas = %d\n",nbrCtas);
@@ -383,12 +393,12 @@


     if (useTexture) {
-        if ((cudaStat = cudaUnbindTexture (texX)) != cudaSuccess) {
-            printf ("CUDA ERROR IN LINE %d  Error: %d %s\n", __LINE__, cudaStat, cudaGetErrorString(cudaStat));
+        if ((cudaStat = cudaDestroyTextureObject(texX)) != cudaSuccess) {
+            printf("CUDA ERROR IN LINE %d  Error: %d %s\n", __LINE__, cudaStat, cudaGetErrorString(cudaStat));
             exit(-1);
         }
-        if ((cudaStat = cudaUnbindTexture (texY)) != cudaSuccess) {
-            printf ("CUDA ERROR IN LINE %d  Error: %d %s\n", __LINE__, cudaStat, cudaGetErrorString(cudaStat));
+        if ((cudaStat = cudaDestroyTextureObject(texY)) != cudaSuccess) {
+            printf("CUDA ERROR IN LINE %d  Error: %d %s\n", __LINE__, cudaStat, cudaGetErrorString(cudaStat));
             exit(-1);
         }
     }