;;; SPDX-License-Identifier: GPL-3.0-or-later ;;; Copyright © 2024 Nicolas Graves (define-module (nongnu packages machine-learning) #:use-module ((guix licenses) #:prefix license:) #:use-module (guix gexp) #:use-module (guix packages) #:use-module (guix utils) #:use-module (guix build-system cmake) #:use-module (guix build-system copy) #:use-module (guix build-system gnu) #:use-module (guix build-system python) #:use-module (guix git-download) #:use-module (gnu packages) #:use-module (gnu packages check) #:use-module (gnu packages cpp) #:use-module (gnu packages libevent) #:use-module (gnu packages machine-learning) #:use-module (gnu packages pkg-config) #:use-module (gnu packages python-xyz) #:use-module (gnu packages serialization) #:use-module (nongnu packages nvidia) #:use-module (ice-9 match)) (define-public gloo-cuda (let ((version "0.0.0") ; no proper version tag (commit "e6d509b527712a143996f2f59a10480efa804f8b") (revision "2")) (package (name "gloo-cuda") (version (git-version version revision commit)) (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/facebookincubator/gloo") (commit commit))) (file-name (git-file-name name version)) (sha256 (base32 "11ywsn1lrd1cpzm1iwvin2c5l962zib5bd852vl54bp12a0w6klj")))) (build-system cmake-build-system) (native-inputs (list googletest)) (inputs (modify-inputs (package-inputs gloo) (append cuda-toolkit nvidia-nccl))) (arguments (substitute-keyword-arguments (package-arguments gloo) ((#:configure-flags flags ''()) #~(cons "-DUSE_CUDA=ON" #$flags)))) (synopsis "Collective communications library") (description "Gloo is a collective communications library. It comes with a number of collective algorithms useful for machine learning applications. These include a barrier, broadcast, and allreduce. Note: This package provides NVIDIA GPU support.") (home-page "https://github.com/facebookincubator/gloo") (license license:bsd-3)))) (define %python-pytorch-version "2.4.0") (define %python-pytorch-src (origin (method git-fetch) (uri (git-reference (url "https://github.com/pytorch/pytorch") (commit (string-append "v" %python-pytorch-version)))) (file-name (git-file-name "python-pytorch" %python-pytorch-version)) (sha256 (base32 "18hdhzr12brj0b7ppyiscax0dbra30207qx0cckw78midfkcn7cn")) (patches (search-patches "python-pytorch-system-libraries.patch" "python-pytorch-runpath.patch" "python-pytorch-without-kineto.patch" ;; Some autogeneration scripts depend on the ;; compile PyTorch library. Therefore, we create ;; dummy versions which are regenerated later. "python-pytorch-fix-codegen.patch")) (modules '((guix build utils))) (snippet '(begin ;; Bundled or unused code (for-each (lambda (dir) (when (file-exists? dir) (delete-file-recursively dir))) '("android" ;; "aten/src/ATen/native/cuda/cutlass_extensions" "aten/src/ATen/native/quantized/cpu/qnnpack" "caffe2/mobile/contrib/libopencl-stub" "caffe2/mobile/contrib/libvulkan-stub" "third_party")) ;; Autogenerated files (for-each delete-file '("aten/src/ATen/nnapi/nnapi_wrapper.cpp" "aten/src/ATen/nnapi/nnapi_wrapper.h" ;; These files contain just lists of floating point values and ;; might be as well hand-written. ;; "test/cpp/api/init_baseline.h" ;; "test/cpp/api/optim_baseline.h" "test/mobile/test_upgrader_bytecode_table_example.cpp" "torch/csrc/jit/mobile/upgrader_mobile.cpp" "torch/csrc/jit/runtime/decomposition_registry_util.cpp" "torch/csrc/jit/runtime/serialized_shape_function_registry.cpp" "torch/csrc/jit/tensorexpr/external_functions_codegen.cpp" "torch/csrc/jit/serialization/mobile_bytecode_generated.h")) (delete-file-recursively ".github") ;; These files are needed for CUDA. ;; (for-each ;; (lambda (dir) ;; (for-each ;; delete-file ;; (find-files dir "\\.cu$"))) ;; '("aten/src/ATen/native/transformers/cuda/flash_attn/kernels" ;; "aten/src/ATen/native/transformers/cuda/mem_eff_attention/kernels")) )))) (define-public python-pytorch-cuda (package (name "python-pytorch-cuda") (version %python-pytorch-version) (source %python-pytorch-src) (build-system python-build-system) (arguments (substitute-keyword-arguments (package-arguments python-pytorch) ((#:phases phases) #~(modify-phases #$phases (add-after 'cmake-patches 'cuda-cmake-patches (lambda _ ;; XXX: Currently nvidia-cudnn-frontend doesn't install CMake ;; configuration files, we must add unbundled nlohmann-json. ;; Additionally, it won't work without CUDNN_INCLUDE_DIR. (substitute* "cmake/Dependencies.cmake" (("set\\(CUDNN_FRONTEND_INCLUDE_DIR.*$") (format #f "set(CUDNN_FRONTEND_INCLUDE_DIR ~a/include) target_include_directories(torch::cudnn INTERFACE ${CUDNN_INCLUDE_DIR} ${~a/include} )~%" #$(this-package-input "nvidia-cudnn-frontend") #$(this-package-input "nlohmann-json")))) ;; XXX: Link the right include dir for cutlass. (substitute* "aten/src/ATen/CMakeLists.txt" (("\ \\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/\\.\\./\\.\\./\\.\\./third_party/cutlass") #$(this-package-input "nvidia-cutlass"))) ;; XXX: Not linking gtest+gtest_main breaks compilation (substitute* '("c10/cuda/test/CMakeLists.txt" "caffe2/CMakeLists.txt") (("target_link_libraries\\((.* gtest_main)\\)" all content) (format #f "target_link_libraries(~a gtest)" content))))) (add-after 'use-system-libraries 'use-cuda-libraries (lambda _ (setenv "USE_CUDA" "1") (setenv "CUDA_HOME" #$(this-package-input "cuda-dev")) (setenv "CUDA_TOOLKIT_ROOT_DIR" #$(this-package-input "cuda-dev")) (setenv "CUDA_USE_STATIC_CUDA_RUNTIME" "0") (setenv "CUDA_PROPAGATE_HOST_FLAGS" "0") (setenv "CUSPARSELT_LIBRARY" #$(file-append (this-package-input "cuda-dev") "/lib")) (setenv "CUSPARSELT_INCLUDE_DIR" #$(file-append (this-package-input "cuda-dev") "/include")) (setenv "USE_CUDNN" "1") (setenv "CUDNN_LIB_DIR" #$(file-append (this-package-input "nvidia-cudnn") "/lib")) (setenv "CUDNN_INCLUDE_DIR" #$(file-append (this-package-input "nvidia-cudnn") "/include")) ;; XXX: 3.5, 5.0 and 9.0a break tests compilation ;; See https://github.com/pytorch/pytorch/issues/113948 (setenv "TORCH_CUDA_ARCH_LIST" "8.0 8.6 8.9 9.0") ;; XXX: Current cutlass package doesn't have necessary ;; headers to enable this option. (setenv "USE_ROCM" "0"))))))) (native-inputs (package-native-inputs python-pytorch)) (inputs (modify-inputs (package-inputs python-pytorch) (replace "tensorpipe" tensorpipe-cuda) (replace "gloo" gloo-cuda) (append nvidia-cudnn nvidia-cudnn-frontend cuda-dev nlohmann-json nvidia-cutlass nvidia-nccl))) (propagated-inputs (package-propagated-inputs python-pytorch)) (home-page "https://pytorch.org/") (synopsis "Python library for tensor computation and deep neural networks") (description "PyTorch is a Python package that provides two high-level features: @itemize @item tensor computation (like NumPy) with strong GPU acceleration; @item deep neural networks (DNNs) built on a tape-based autograd system. @end itemize You can reuse Python packages such as NumPy, SciPy, and Cython to extend PyTorch when needed. Note: This package provides NVIDIA GPU support.") (license license:bsd-3))) (define-public tensorpipe-cuda (package (name "tensorpipe-cuda") (version (package-version tensorpipe)) (source (package-source tensorpipe)) (build-system cmake-build-system) (arguments (list #:configure-flags ''("-DBUILD_SHARED_LIBS=ON" "-DTP_USE_CUDA=1") ;; There are no tests #:tests? #f)) (inputs (list cuda-nvml-dev cuda-toolkit libuv)) (native-inputs (list googletest pkg-config pybind11 libnop)) (home-page "https://github.com/pytorch/tensorpipe") (synopsis "Tensor-aware point-to-point communication primitive for machine learning") (description "TensorPipe provides a tensor-aware channel to transfer rich objects from one process to another while using the fastest transport for the tensors contained therein. Note: This version includes NVIDIA CUDA API and headers.") (license license:bsd-3)))