nongnu: Add gloo-cuda, python-pytorch-cuda, tensorpipe-cuda.

* nongnu/packages/machine-learning.scm (gloo-cuda, python-pytorch-cuda, tensorpipe-cuda): New variables.
2024-08-01 17:01:25 +02:00 · 2024-08-01 17:01:25 +02:00 · 6834158025
commit 6834158025
parent a719e9e1bc
1 changed files with 232 additions and 0 deletions
--- a/nongnu/packages/machine-learning.scm
+++ b/nongnu/packages/machine-learning.scm
@ -0,0 +1,232 @@
+;;; SPDX-License-Identifier: GPL-3.0-or-later
+;;; Copyright © 2024 Nicolas Graves <ngraves@ngraves.fr>
+
+(define-module (nongnu packages machine-learning)
+  #:use-module ((guix licenses) #:prefix license:)
+  #:use-module (guix gexp)
+  #:use-module (guix packages)
+  #:use-module (guix utils)
+  #:use-module (guix build-system cmake)
+  #:use-module (guix build-system copy)
+  #:use-module (guix build-system gnu)
+  #:use-module (guix build-system python)
+  #:use-module (guix git-download)
+  #:use-module (gnu packages)
+  #:use-module (gnu packages check)
+  #:use-module (gnu packages cpp)
+  #:use-module (gnu packages libevent)
+  #:use-module (gnu packages machine-learning)
+  #:use-module (gnu packages pkg-config)
+  #:use-module (gnu packages python-xyz)
+  #:use-module (gnu packages serialization)
+  #:use-module (nongnu packages nvidia)
+  #:use-module (ice-9 match))
+
+(define-public gloo-cuda
+  (let ((version "0.0.0")                         ; no proper version tag
+        (commit "e6d509b527712a143996f2f59a10480efa804f8b")
+        (revision "2"))
+    (package
+      (name "gloo-cuda")
+      (version (git-version version revision commit))
+      (source
+       (origin
+         (method git-fetch)
+         (uri (git-reference
+               (url "https://github.com/facebookincubator/gloo")
+               (commit commit)))
+         (file-name (git-file-name name version))
+         (sha256
+          (base32
+           "11ywsn1lrd1cpzm1iwvin2c5l962zib5bd852vl54bp12a0w6klj"))))
+      (build-system cmake-build-system)
+      (native-inputs
+       (list googletest))
+      (inputs
+       (modify-inputs (package-inputs gloo)
+         (append cuda-toolkit nvidia-nccl)))
+      (arguments
+       (substitute-keyword-arguments (package-arguments gloo)
+         ((#:configure-flags flags ''())
+          #~(cons "-DUSE_CUDA=ON" #$flags))))
+      (synopsis "Collective communications library")
+      (description
+       "Gloo is a collective communications library.  It comes with a
+number of collective algorithms useful for machine learning applications.
+These include a barrier, broadcast, and allreduce.
+
+Note: This package provides NVIDIA GPU support.")
+      (home-page "https://github.com/facebookincubator/gloo")
+      (license license:bsd-3))))
+
+(define %python-pytorch-version "2.4.0")
+
+(define %python-pytorch-src
+  (origin
+    (method git-fetch)
+    (uri (git-reference
+          (url "https://github.com/pytorch/pytorch")
+          (commit (string-append "v" %python-pytorch-version))))
+    (file-name (git-file-name "python-pytorch" %python-pytorch-version))
+    (sha256
+     (base32
+      "18hdhzr12brj0b7ppyiscax0dbra30207qx0cckw78midfkcn7cn"))
+    (patches (search-patches "python-pytorch-system-libraries.patch"
+                             "python-pytorch-runpath.patch"
+                             "python-pytorch-without-kineto.patch"
+                             ;; Some autogeneration scripts depend on the
+                             ;; compile PyTorch library. Therefore, we create
+                             ;; dummy versions which are regenerated later.
+                             "python-pytorch-fix-codegen.patch"))
+    (modules '((guix build utils)))
+    (snippet
+     '(begin
+        ;; Bundled or unused code
+        (for-each
+         (lambda (dir)
+           (when (file-exists? dir)
+             (delete-file-recursively dir)))
+         '("android"
+           ;; "aten/src/ATen/native/cuda/cutlass_extensions"
+           "aten/src/ATen/native/quantized/cpu/qnnpack"
+           "caffe2/mobile/contrib/libopencl-stub"
+           "caffe2/mobile/contrib/libvulkan-stub"
+           "third_party"))
+
+        ;; Autogenerated files
+        (for-each
+         delete-file
+         '("aten/src/ATen/nnapi/nnapi_wrapper.cpp"
+           "aten/src/ATen/nnapi/nnapi_wrapper.h"
+           ;; These files contain just lists of floating point values and
+           ;; might be as well hand-written.
+           ;; "test/cpp/api/init_baseline.h"
+           ;; "test/cpp/api/optim_baseline.h"
+           "test/mobile/test_upgrader_bytecode_table_example.cpp"
+           "torch/csrc/jit/mobile/upgrader_mobile.cpp"
+           "torch/csrc/jit/runtime/decomposition_registry_util.cpp"
+           "torch/csrc/jit/runtime/serialized_shape_function_registry.cpp"
+           "torch/csrc/jit/tensorexpr/external_functions_codegen.cpp"
+           "torch/csrc/jit/serialization/mobile_bytecode_generated.h"))
+        (delete-file-recursively ".github")
+        ;; These files are needed for CUDA.
+        ;; (for-each
+        ;;  (lambda (dir)
+        ;;    (for-each
+        ;;     delete-file
+        ;;     (find-files dir "\\.cu$")))
+        ;;  '("aten/src/ATen/native/transformers/cuda/flash_attn/kernels"
+        ;;    "aten/src/ATen/native/transformers/cuda/mem_eff_attention/kernels"))
+        ))))
+
+(define-public python-pytorch-cuda
+  (package
+    (name "python-pytorch-cuda")
+    (version %python-pytorch-version)
+    (source %python-pytorch-src)
+    (build-system python-build-system)
+    (arguments
+     (substitute-keyword-arguments (package-arguments python-pytorch)
+       ((#:phases phases)
+        #~(modify-phases #$phases
+            (add-after 'cmake-patches 'cuda-cmake-patches
+              (lambda _
+                ;; XXX: Currently nvidia-cudnn-frontend doesn't install CMake
+                ;; configuration files, we must add unbundled nlohmann-json.
+                ;; Additionally, it won't work without CUDNN_INCLUDE_DIR.
+                (substitute* "cmake/Dependencies.cmake"
+                  (("set\\(CUDNN_FRONTEND_INCLUDE_DIR.*$")
+                   (format #f "set(CUDNN_FRONTEND_INCLUDE_DIR ~a/include)
+  target_include_directories(torch::cudnn INTERFACE
+      ${CUDNN_INCLUDE_DIR} ${~a/include}
+  )~%"
+                           #$(this-package-input "nvidia-cudnn-frontend")
+                           #$(this-package-input "nlohmann-json"))))
+                ;; XXX: Link the right include dir for cutlass.
+                (substitute* "aten/src/ATen/CMakeLists.txt"
+                  (("\
+\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/\\.\\./\\.\\./\\.\\./third_party/cutlass")
+                   #$(this-package-input "nvidia-cutlass")))
+                ;; XXX: Not linking gtest+gtest_main breaks compilation
+                (substitute* '("c10/cuda/test/CMakeLists.txt"
+                               "caffe2/CMakeLists.txt")
+                  (("target_link_libraries\\((.* gtest_main)\\)" all content)
+                   (format #f "target_link_libraries(~a gtest)"
+                           content)))))
+            (add-after 'use-system-libraries 'use-cuda-libraries
+              (lambda _
+                (setenv "USE_CUDA" "1")
+                (setenv "CUDA_HOME" #$(this-package-input "cuda-dev"))
+                (setenv "CUDA_TOOLKIT_ROOT_DIR"
+                        #$(this-package-input "cuda-dev"))
+                (setenv "CUDA_USE_STATIC_CUDA_RUNTIME" "0")
+                (setenv "CUDA_PROPAGATE_HOST_FLAGS" "0")
+                (setenv "CUSPARSELT_LIBRARY"
+                        #$(file-append
+                           (this-package-input "cuda-dev") "/lib"))
+                (setenv "CUSPARSELT_INCLUDE_DIR"
+                        #$(file-append
+                           (this-package-input "cuda-dev") "/include"))
+                (setenv "USE_CUDNN" "1")
+                (setenv "CUDNN_LIB_DIR"
+                        #$(file-append
+                           (this-package-input "nvidia-cudnn") "/lib"))
+                (setenv "CUDNN_INCLUDE_DIR"
+                        #$(file-append
+                           (this-package-input "nvidia-cudnn") "/include"))
+                ;; XXX: 3.5, 5.0 and 9.0a break tests compilation
+                ;; See https://github.com/pytorch/pytorch/issues/113948
+                (setenv "TORCH_CUDA_ARCH_LIST" "8.0 8.6 8.9 9.0")
+                ;; XXX: Current cutlass package doesn't have necessary
+                ;; headers to enable this option.
+                (setenv "USE_ROCM" "0")))))))
+    (native-inputs (package-native-inputs python-pytorch))
+    (inputs
+     (modify-inputs (package-inputs python-pytorch)
+       (replace "tensorpipe" tensorpipe-cuda)
+       (replace "gloo" gloo-cuda)
+       (append nvidia-cudnn
+               nvidia-cudnn-frontend
+               cuda-dev
+               nlohmann-json
+               nvidia-cutlass
+               nvidia-nccl)))
+    (propagated-inputs (package-propagated-inputs python-pytorch))
+    (home-page "https://pytorch.org/")
+    (synopsis "Python library for tensor computation and deep neural networks")
+    (description
+     "PyTorch is a Python package that provides two high-level features:
+
+@itemize
+@item tensor computation (like NumPy) with strong GPU acceleration;
+@item deep neural networks (DNNs) built on a tape-based autograd system.
+@end itemize
+
+You can reuse Python packages such as NumPy, SciPy, and Cython to extend
+PyTorch when needed.
+
+Note: This package provides NVIDIA GPU support.")
+    (license license:bsd-3)))
+
+(define-public tensorpipe-cuda
+  (package
+    (name "tensorpipe-cuda")
+    (version (package-version tensorpipe))
+    (source (package-source tensorpipe))
+    (build-system cmake-build-system)
+    (arguments
+     (list
+      #:configure-flags
+      ''("-DBUILD_SHARED_LIBS=ON" "-DTP_USE_CUDA=1")
+        ;; There are no tests
+        #:tests? #f))
+    (inputs (list cuda-nvml-dev cuda-toolkit libuv))
+    (native-inputs (list googletest pkg-config pybind11 libnop))
+    (home-page "https://github.com/pytorch/tensorpipe")
+    (synopsis "Tensor-aware point-to-point communication primitive for
+machine learning")
+    (description "TensorPipe provides a tensor-aware channel to transfer
+rich objects from one process to another while using the fastest transport for
+the tensors contained therein.
+Note: This version includes NVIDIA CUDA API and headers.")
+    (license license:bsd-3)))