* nongnu/packages/machine-learning.scm (gloo-cuda, python-pytorch-cuda, tensorpipe-cuda): New variables.
233 lines
9.7 KiB
Scheme
233 lines
9.7 KiB
Scheme
;;; SPDX-License-Identifier: GPL-3.0-or-later
|
|
;;; Copyright © 2024 Nicolas Graves <ngraves@ngraves.fr>
|
|
|
|
(define-module (nongnu packages machine-learning)
|
|
#:use-module ((guix licenses) #:prefix license:)
|
|
#:use-module (guix gexp)
|
|
#:use-module (guix packages)
|
|
#:use-module (guix utils)
|
|
#:use-module (guix build-system cmake)
|
|
#:use-module (guix build-system copy)
|
|
#:use-module (guix build-system gnu)
|
|
#:use-module (guix build-system python)
|
|
#:use-module (guix git-download)
|
|
#:use-module (gnu packages)
|
|
#:use-module (gnu packages check)
|
|
#:use-module (gnu packages cpp)
|
|
#:use-module (gnu packages libevent)
|
|
#:use-module (gnu packages machine-learning)
|
|
#:use-module (gnu packages pkg-config)
|
|
#:use-module (gnu packages python-xyz)
|
|
#:use-module (gnu packages serialization)
|
|
#:use-module (nongnu packages nvidia)
|
|
#:use-module (ice-9 match))
|
|
|
|
(define-public gloo-cuda
|
|
(let ((version "0.0.0") ; no proper version tag
|
|
(commit "e6d509b527712a143996f2f59a10480efa804f8b")
|
|
(revision "2"))
|
|
(package
|
|
(name "gloo-cuda")
|
|
(version (git-version version revision commit))
|
|
(source
|
|
(origin
|
|
(method git-fetch)
|
|
(uri (git-reference
|
|
(url "https://github.com/facebookincubator/gloo")
|
|
(commit commit)))
|
|
(file-name (git-file-name name version))
|
|
(sha256
|
|
(base32
|
|
"11ywsn1lrd1cpzm1iwvin2c5l962zib5bd852vl54bp12a0w6klj"))))
|
|
(build-system cmake-build-system)
|
|
(native-inputs
|
|
(list googletest))
|
|
(inputs
|
|
(modify-inputs (package-inputs gloo)
|
|
(append cuda-toolkit nvidia-nccl)))
|
|
(arguments
|
|
(substitute-keyword-arguments (package-arguments gloo)
|
|
((#:configure-flags flags ''())
|
|
#~(cons "-DUSE_CUDA=ON" #$flags))))
|
|
(synopsis "Collective communications library")
|
|
(description
|
|
"Gloo is a collective communications library. It comes with a
|
|
number of collective algorithms useful for machine learning applications.
|
|
These include a barrier, broadcast, and allreduce.
|
|
|
|
Note: This package provides NVIDIA GPU support.")
|
|
(home-page "https://github.com/facebookincubator/gloo")
|
|
(license license:bsd-3))))
|
|
|
|
(define %python-pytorch-version "2.4.0")
|
|
|
|
(define %python-pytorch-src
|
|
(origin
|
|
(method git-fetch)
|
|
(uri (git-reference
|
|
(url "https://github.com/pytorch/pytorch")
|
|
(commit (string-append "v" %python-pytorch-version))))
|
|
(file-name (git-file-name "python-pytorch" %python-pytorch-version))
|
|
(sha256
|
|
(base32
|
|
"18hdhzr12brj0b7ppyiscax0dbra30207qx0cckw78midfkcn7cn"))
|
|
(patches (search-patches "python-pytorch-system-libraries.patch"
|
|
"python-pytorch-runpath.patch"
|
|
"python-pytorch-without-kineto.patch"
|
|
;; Some autogeneration scripts depend on the
|
|
;; compile PyTorch library. Therefore, we create
|
|
;; dummy versions which are regenerated later.
|
|
"python-pytorch-fix-codegen.patch"))
|
|
(modules '((guix build utils)))
|
|
(snippet
|
|
'(begin
|
|
;; Bundled or unused code
|
|
(for-each
|
|
(lambda (dir)
|
|
(when (file-exists? dir)
|
|
(delete-file-recursively dir)))
|
|
'("android"
|
|
;; "aten/src/ATen/native/cuda/cutlass_extensions"
|
|
"aten/src/ATen/native/quantized/cpu/qnnpack"
|
|
"caffe2/mobile/contrib/libopencl-stub"
|
|
"caffe2/mobile/contrib/libvulkan-stub"
|
|
"third_party"))
|
|
|
|
;; Autogenerated files
|
|
(for-each
|
|
delete-file
|
|
'("aten/src/ATen/nnapi/nnapi_wrapper.cpp"
|
|
"aten/src/ATen/nnapi/nnapi_wrapper.h"
|
|
;; These files contain just lists of floating point values and
|
|
;; might be as well hand-written.
|
|
;; "test/cpp/api/init_baseline.h"
|
|
;; "test/cpp/api/optim_baseline.h"
|
|
"test/mobile/test_upgrader_bytecode_table_example.cpp"
|
|
"torch/csrc/jit/mobile/upgrader_mobile.cpp"
|
|
"torch/csrc/jit/runtime/decomposition_registry_util.cpp"
|
|
"torch/csrc/jit/runtime/serialized_shape_function_registry.cpp"
|
|
"torch/csrc/jit/tensorexpr/external_functions_codegen.cpp"
|
|
"torch/csrc/jit/serialization/mobile_bytecode_generated.h"))
|
|
(delete-file-recursively ".github")
|
|
;; These files are needed for CUDA.
|
|
;; (for-each
|
|
;; (lambda (dir)
|
|
;; (for-each
|
|
;; delete-file
|
|
;; (find-files dir "\\.cu$")))
|
|
;; '("aten/src/ATen/native/transformers/cuda/flash_attn/kernels"
|
|
;; "aten/src/ATen/native/transformers/cuda/mem_eff_attention/kernels"))
|
|
))))
|
|
|
|
(define-public python-pytorch-cuda
|
|
(package
|
|
(name "python-pytorch-cuda")
|
|
(version %python-pytorch-version)
|
|
(source %python-pytorch-src)
|
|
(build-system python-build-system)
|
|
(arguments
|
|
(substitute-keyword-arguments (package-arguments python-pytorch)
|
|
((#:phases phases)
|
|
#~(modify-phases #$phases
|
|
(add-after 'cmake-patches 'cuda-cmake-patches
|
|
(lambda _
|
|
;; XXX: Currently nvidia-cudnn-frontend doesn't install CMake
|
|
;; configuration files, we must add unbundled nlohmann-json.
|
|
;; Additionally, it won't work without CUDNN_INCLUDE_DIR.
|
|
(substitute* "cmake/Dependencies.cmake"
|
|
(("set\\(CUDNN_FRONTEND_INCLUDE_DIR.*$")
|
|
(format #f "set(CUDNN_FRONTEND_INCLUDE_DIR ~a/include)
|
|
target_include_directories(torch::cudnn INTERFACE
|
|
${CUDNN_INCLUDE_DIR} ${~a/include}
|
|
)~%"
|
|
#$(this-package-input "nvidia-cudnn-frontend")
|
|
#$(this-package-input "nlohmann-json"))))
|
|
;; XXX: Link the right include dir for cutlass.
|
|
(substitute* "aten/src/ATen/CMakeLists.txt"
|
|
(("\
|
|
\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/\\.\\./\\.\\./\\.\\./third_party/cutlass")
|
|
#$(this-package-input "nvidia-cutlass")))
|
|
;; XXX: Not linking gtest+gtest_main breaks compilation
|
|
(substitute* '("c10/cuda/test/CMakeLists.txt"
|
|
"caffe2/CMakeLists.txt")
|
|
(("target_link_libraries\\((.* gtest_main)\\)" all content)
|
|
(format #f "target_link_libraries(~a gtest)"
|
|
content)))))
|
|
(add-after 'use-system-libraries 'use-cuda-libraries
|
|
(lambda _
|
|
(setenv "USE_CUDA" "1")
|
|
(setenv "CUDA_HOME" #$(this-package-input "cuda-dev"))
|
|
(setenv "CUDA_TOOLKIT_ROOT_DIR"
|
|
#$(this-package-input "cuda-dev"))
|
|
(setenv "CUDA_USE_STATIC_CUDA_RUNTIME" "0")
|
|
(setenv "CUDA_PROPAGATE_HOST_FLAGS" "0")
|
|
(setenv "CUSPARSELT_LIBRARY"
|
|
#$(file-append
|
|
(this-package-input "cuda-dev") "/lib"))
|
|
(setenv "CUSPARSELT_INCLUDE_DIR"
|
|
#$(file-append
|
|
(this-package-input "cuda-dev") "/include"))
|
|
(setenv "USE_CUDNN" "1")
|
|
(setenv "CUDNN_LIB_DIR"
|
|
#$(file-append
|
|
(this-package-input "nvidia-cudnn") "/lib"))
|
|
(setenv "CUDNN_INCLUDE_DIR"
|
|
#$(file-append
|
|
(this-package-input "nvidia-cudnn") "/include"))
|
|
;; XXX: 3.5, 5.0 and 9.0a break tests compilation
|
|
;; See https://github.com/pytorch/pytorch/issues/113948
|
|
(setenv "TORCH_CUDA_ARCH_LIST" "8.0 8.6 8.9 9.0")
|
|
;; XXX: Current cutlass package doesn't have necessary
|
|
;; headers to enable this option.
|
|
(setenv "USE_ROCM" "0")))))))
|
|
(native-inputs (package-native-inputs python-pytorch))
|
|
(inputs
|
|
(modify-inputs (package-inputs python-pytorch)
|
|
(replace "tensorpipe" tensorpipe-cuda)
|
|
(replace "gloo" gloo-cuda)
|
|
(append nvidia-cudnn
|
|
nvidia-cudnn-frontend
|
|
cuda-dev
|
|
nlohmann-json
|
|
nvidia-cutlass
|
|
nvidia-nccl)))
|
|
(propagated-inputs (package-propagated-inputs python-pytorch))
|
|
(home-page "https://pytorch.org/")
|
|
(synopsis "Python library for tensor computation and deep neural networks")
|
|
(description
|
|
"PyTorch is a Python package that provides two high-level features:
|
|
|
|
@itemize
|
|
@item tensor computation (like NumPy) with strong GPU acceleration;
|
|
@item deep neural networks (DNNs) built on a tape-based autograd system.
|
|
@end itemize
|
|
|
|
You can reuse Python packages such as NumPy, SciPy, and Cython to extend
|
|
PyTorch when needed.
|
|
|
|
Note: This package provides NVIDIA GPU support.")
|
|
(license license:bsd-3)))
|
|
|
|
(define-public tensorpipe-cuda
|
|
(package
|
|
(name "tensorpipe-cuda")
|
|
(version (package-version tensorpipe))
|
|
(source (package-source tensorpipe))
|
|
(build-system cmake-build-system)
|
|
(arguments
|
|
(list
|
|
#:configure-flags
|
|
''("-DBUILD_SHARED_LIBS=ON" "-DTP_USE_CUDA=1")
|
|
;; There are no tests
|
|
#:tests? #f))
|
|
(inputs (list cuda-nvml-dev cuda-toolkit libuv))
|
|
(native-inputs (list googletest pkg-config pybind11 libnop))
|
|
(home-page "https://github.com/pytorch/tensorpipe")
|
|
(synopsis "Tensor-aware point-to-point communication primitive for
|
|
machine learning")
|
|
(description "TensorPipe provides a tensor-aware channel to transfer
|
|
rich objects from one process to another while using the fastest transport for
|
|
the tensors contained therein.
|
|
Note: This version includes NVIDIA CUDA API and headers.")
|
|
(license license:bsd-3)))
|