diff --git a/guix/import/cuda.scm b/guix/import/cuda.scm new file mode 100644 index 0000000..55cf0ed --- /dev/null +++ b/guix/import/cuda.scm @@ -0,0 +1,189 @@ +;;; SPDX-License-Identifier: GPL-3.0-or-later +;;; Copyright © 2025 Nicolas Graves + +;;; This file is not part of GNU Guix but requires this naming scheme +;;; so that the %cuda-updater is properly read when using +;;; `guix refresh -L$(pwd) cuda-cccl' in nonguix root. + +(define-module (guix import cuda) + #:use-module (gcrypt hash) + #:use-module (gnu packages) + #:use-module (guix base16) + #:use-module (guix base32) + #:use-module (guix http-client) + #:use-module (guix import json) + #:use-module (guix import utils) + #:use-module (guix memoization) + #:use-module (guix packages) + #:use-module (guix records) + #:use-module (guix upstream) + #:use-module (guix utils) + #:use-module (ice-9 match) + #:use-module (ice-9 regex) + #:use-module (ice-9 textual-ports) + #:use-module (json) + #:use-module (nonguix build-system cuda) + #:use-module (srfi srfi-1) + #:export (%cuda-updater)) + +(define %cuda-repository-url + "https://developer.download.nvidia.com/compute/cuda/redist/") + +(define (cuda-system->guix-system system) + (match system + ("linux-x86_64" "x86_64-linux") + ("linux-aarch64" "aarch64-linux") + ("linux-ppc64le" "powerpc64le-linux") + (_ #f))) + +(define (valid-version? version-string) + (false-if-exception (version-prefix version-string 3))) + +(define-record-type* + cuda-package make-cuda-package + cuda-package? this-cuda-package + (name cuda-package-name) + (version cuda-package-version valid-version?) + (hash-info cuda-package-hash-info cuda-hash-info?)) + +(define-record-type* + cuda-hash-info make-cuda-hash-info + cuda-hash-info? this-cuda-hash-info + (system cuda-hash-info-system) + (sha256 cuda-hash-info-sha256)) + +(define (cuda-toolkit-latest-version) + (let* ((url (string-append %cuda-repository-url "index.html")) + (port (http-fetch url #:text? #t)) ; FIXME no error management + (html (get-string-all port)) + (regex "redistrib_[0-9.]*.json") + (file-string + (fold-matches regex html "" + (lambda (matching void) + (match:substring matching)))) + (version-string + (string-drop-right + (string-drop file-string (string-length "redistrib_")) + (string-length ".json")))) + (close-port port) + version-string)) + +(define (cuda-json-pkg-alist->cuda-package cuda-pkg-alist) + (make-cuda-package + (snake-case (first cuda-pkg-alist)) + (assoc-ref cuda-pkg-alist "version") + (filter + identity + (map (lambda (system) + (let ((inner-alist (assoc-ref cuda-pkg-alist system))) + (if inner-alist + (make-cuda-hash-info + (cuda-system->guix-system system) + (bytevector->nix-base32-string + (base16-string->bytevector + (assoc-ref inner-alist "sha256")))) + #f))) + (list "linux-x86_64" "linux-aarch64" "linux-ppc64le"))))) + +(define cuda-db-fetch + (memoize + (lambda (toolkit-version) + (map + cuda-json-pkg-alist->cuda-package + (filter list? + (json-fetch + (string-append %cuda-repository-url + "redistrib_" toolkit-version ".json"))))))) + +(define (cuda-fetch name tk-version) + (let ((candidates (filter + (lambda (pkg) (equal? (cuda-package-name pkg) name)) + (cuda-db-fetch tk-version)))) + (and (not (null? candidates)) (car candidates)))) + +(define* (latest-release package #:key (version #f)) + "Return an for the latest-release of PACKAGE." + (let* ((name (package-name package)) + (version (or version (cuda-toolkit-latest-version))) + (package (cuda-fetch name version)) + (version (and=> package cuda-package-version))) + (and version + (upstream-source + (package name) + (version version) + (urls (list (cuda-module-url name version))))))) + +(define (make-cuda-sexp cuda-package) + `(define-public ,(string->symbol (cuda-package-name cuda-package)) + (package + (name ,(cuda-package-name cuda-package)) + (version ,(cuda-package-version cuda-package)) + (source + (origin + (method url-fetch) + (uri (cuda-module-url name version)) + (sha256 + (base32 + (match (or (%current-target-system) (%current-system)) + ,@(map + (lambda (info) + (list (cuda-hash-info-system info) + (cuda-hash-info-sha256 info))) + (cuda-package-hash-info cuda-package))))))) + (build-system cuda-build-system) + (synopsis #f) + (description #f) + (home-page "https://developer.nvidia.com/cuda-toolkit") + (license (cuda-license name))))) + +(define (guix-name->cuda-name package) + (string-join (string-split package #\-) "_")) + +(define (cuda-package? package) + "Return true if PACKAGE is a CUDA Toolkit package." + (eq? (package-build-system package) cuda-build-system)) + +(define %cuda-updater + (upstream-updater + (name 'cuda) + (description "Updater for Cuda packages") + (pred cuda-package?) + (import latest-release))) + +;; The issue with guix native importer is that it will only update +;; the x64_86 hash, but we do have different sources based on +;; (%current-target-system). + +;; To update all hashes of a package, use: +;; (make-cuda-sexp (cuda-fetch "cuda-profiler-api" "12.1.1")) + +;; To update all hashes of all packages, use: +;; (use-modules (ice-9 pretty-print)) +;; (for-each +;; (lambda (name) +;; (pretty-print (make-cuda-sexp (cuda-fetch name "12.1.1")))) +;; '("cuda-cccl" +;; "cuda-cudart" +;; "cuda-cuobjdump" +;; "cuda-cuxxfilt" +;; "cuda-cupti" +;; "cuda-gdb" +;; "cuda-nvcc" +;; "cuda-nvml-dev" +;; "cuda-nvdisasm" +;; "cuda-nvprune" +;; "cuda-nvrtc" +;; "cuda-nvtx" +;; "cuda-opencl" +;; "cuda-sanitizer-api" +;; "libcublas" +;; "libcufft" +;; "libcurand" +;; "libcusolver" +;; "libcusparse" +;; ;; "libnvfatbin" +;; "libnvjitlink" +;; "libnvjpeg" +;; "libnpp")) + +;; cuda.scm ends here. diff --git a/nongnu/packages/machine-learning.scm b/nongnu/packages/machine-learning.scm new file mode 100644 index 0000000..9c411fd --- /dev/null +++ b/nongnu/packages/machine-learning.scm @@ -0,0 +1,232 @@ +;;; SPDX-License-Identifier: GPL-3.0-or-later +;;; Copyright © 2024 Nicolas Graves + +(define-module (nongnu packages machine-learning) + #:use-module ((guix licenses) #:prefix license:) + #:use-module (guix gexp) + #:use-module (guix packages) + #:use-module (guix utils) + #:use-module (guix build-system cmake) + #:use-module (guix build-system copy) + #:use-module (guix build-system gnu) + #:use-module (guix build-system python) + #:use-module (guix git-download) + #:use-module (gnu packages) + #:use-module (gnu packages check) + #:use-module (gnu packages cpp) + #:use-module (gnu packages libevent) + #:use-module (gnu packages machine-learning) + #:use-module (gnu packages pkg-config) + #:use-module (gnu packages python-xyz) + #:use-module (gnu packages serialization) + #:use-module (nongnu packages nvidia) + #:use-module (ice-9 match)) + +(define-public gloo-cuda + (let ((version "0.0.0") ; no proper version tag + (commit "e6d509b527712a143996f2f59a10480efa804f8b") + (revision "2")) + (package + (name "gloo-cuda") + (version (git-version version revision commit)) + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/facebookincubator/gloo") + (commit commit))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "11ywsn1lrd1cpzm1iwvin2c5l962zib5bd852vl54bp12a0w6klj")))) + (build-system cmake-build-system) + (native-inputs + (list googletest)) + (inputs + (modify-inputs (package-inputs gloo) + (append cuda-toolkit nvidia-nccl))) + (arguments + (substitute-keyword-arguments (package-arguments gloo) + ((#:configure-flags flags ''()) + #~(cons "-DUSE_CUDA=ON" #$flags)))) + (synopsis "Collective communications library") + (description + "Gloo is a collective communications library. It comes with a +number of collective algorithms useful for machine learning applications. +These include a barrier, broadcast, and allreduce. + +Note: This package provides NVIDIA GPU support.") + (home-page "https://github.com/facebookincubator/gloo") + (license license:bsd-3)))) + +(define %python-pytorch-version "2.4.0") + +(define %python-pytorch-src + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/pytorch/pytorch") + (commit (string-append "v" %python-pytorch-version)))) + (file-name (git-file-name "python-pytorch" %python-pytorch-version)) + (sha256 + (base32 + "18hdhzr12brj0b7ppyiscax0dbra30207qx0cckw78midfkcn7cn")) + (patches (search-patches "python-pytorch-system-libraries.patch" + "python-pytorch-runpath.patch" + "python-pytorch-without-kineto.patch" + ;; Some autogeneration scripts depend on the + ;; compile PyTorch library. Therefore, we create + ;; dummy versions which are regenerated later. + "python-pytorch-fix-codegen.patch")) + (modules '((guix build utils))) + (snippet + '(begin + ;; Bundled or unused code + (for-each + (lambda (dir) + (when (file-exists? dir) + (delete-file-recursively dir))) + '("android" + ;; "aten/src/ATen/native/cuda/cutlass_extensions" + "aten/src/ATen/native/quantized/cpu/qnnpack" + "caffe2/mobile/contrib/libopencl-stub" + "caffe2/mobile/contrib/libvulkan-stub" + "third_party")) + + ;; Autogenerated files + (for-each + delete-file + '("aten/src/ATen/nnapi/nnapi_wrapper.cpp" + "aten/src/ATen/nnapi/nnapi_wrapper.h" + ;; These files contain just lists of floating point values and + ;; might be as well hand-written. + ;; "test/cpp/api/init_baseline.h" + ;; "test/cpp/api/optim_baseline.h" + "test/mobile/test_upgrader_bytecode_table_example.cpp" + "torch/csrc/jit/mobile/upgrader_mobile.cpp" + "torch/csrc/jit/runtime/decomposition_registry_util.cpp" + "torch/csrc/jit/runtime/serialized_shape_function_registry.cpp" + "torch/csrc/jit/tensorexpr/external_functions_codegen.cpp" + "torch/csrc/jit/serialization/mobile_bytecode_generated.h")) + (delete-file-recursively ".github") + ;; These files are needed for CUDA. + ;; (for-each + ;; (lambda (dir) + ;; (for-each + ;; delete-file + ;; (find-files dir "\\.cu$"))) + ;; '("aten/src/ATen/native/transformers/cuda/flash_attn/kernels" + ;; "aten/src/ATen/native/transformers/cuda/mem_eff_attention/kernels")) + )))) + +(define-public python-pytorch-cuda + (package + (name "python-pytorch-cuda") + (version %python-pytorch-version) + (source %python-pytorch-src) + (build-system python-build-system) + (arguments + (substitute-keyword-arguments (package-arguments python-pytorch) + ((#:phases phases) + #~(modify-phases #$phases + (add-after 'cmake-patches 'cuda-cmake-patches + (lambda _ + ;; XXX: Currently nvidia-cudnn-frontend doesn't install CMake + ;; configuration files, we must add unbundled nlohmann-json. + ;; Additionally, it won't work without CUDNN_INCLUDE_DIR. + (substitute* "cmake/Dependencies.cmake" + (("set\\(CUDNN_FRONTEND_INCLUDE_DIR.*$") + (format #f "set(CUDNN_FRONTEND_INCLUDE_DIR ~a/include) + target_include_directories(torch::cudnn INTERFACE + ${CUDNN_INCLUDE_DIR} ${~a/include} + )~%" + #$(this-package-input "nvidia-cudnn-frontend") + #$(this-package-input "nlohmann-json")))) + ;; XXX: Link the right include dir for cutlass. + (substitute* "aten/src/ATen/CMakeLists.txt" + (("\ +\\$\\{CMAKE_CURRENT_SOURCE_DIR\\}/\\.\\./\\.\\./\\.\\./third_party/cutlass") + #$(this-package-input "nvidia-cutlass"))) + ;; XXX: Not linking gtest+gtest_main breaks compilation + (substitute* '("c10/cuda/test/CMakeLists.txt" + "caffe2/CMakeLists.txt") + (("target_link_libraries\\((.* gtest_main)\\)" all content) + (format #f "target_link_libraries(~a gtest)" + content))))) + (add-after 'use-system-libraries 'use-cuda-libraries + (lambda _ + (setenv "USE_CUDA" "1") + (setenv "CUDA_HOME" #$(this-package-input "cuda-dev")) + (setenv "CUDA_TOOLKIT_ROOT_DIR" + #$(this-package-input "cuda-dev")) + (setenv "CUDA_USE_STATIC_CUDA_RUNTIME" "0") + (setenv "CUDA_PROPAGATE_HOST_FLAGS" "0") + (setenv "CUSPARSELT_LIBRARY" + #$(file-append + (this-package-input "cuda-dev") "/lib")) + (setenv "CUSPARSELT_INCLUDE_DIR" + #$(file-append + (this-package-input "cuda-dev") "/include")) + (setenv "USE_CUDNN" "1") + (setenv "CUDNN_LIB_DIR" + #$(file-append + (this-package-input "nvidia-cudnn") "/lib")) + (setenv "CUDNN_INCLUDE_DIR" + #$(file-append + (this-package-input "nvidia-cudnn") "/include")) + ;; XXX: 3.5, 5.0 and 9.0a break tests compilation + ;; See https://github.com/pytorch/pytorch/issues/113948 + (setenv "TORCH_CUDA_ARCH_LIST" "8.0 8.6 8.9 9.0") + ;; XXX: Current cutlass package doesn't have necessary + ;; headers to enable this option. + (setenv "USE_ROCM" "0"))))))) + (native-inputs (package-native-inputs python-pytorch)) + (inputs + (modify-inputs (package-inputs python-pytorch) + (replace "tensorpipe" tensorpipe-cuda) + (replace "gloo" gloo-cuda) + (append nvidia-cudnn + nvidia-cudnn-frontend + cuda-dev + nlohmann-json + nvidia-cutlass + nvidia-nccl))) + (propagated-inputs (package-propagated-inputs python-pytorch)) + (home-page "https://pytorch.org/") + (synopsis "Python library for tensor computation and deep neural networks") + (description + "PyTorch is a Python package that provides two high-level features: + +@itemize +@item tensor computation (like NumPy) with strong GPU acceleration; +@item deep neural networks (DNNs) built on a tape-based autograd system. +@end itemize + +You can reuse Python packages such as NumPy, SciPy, and Cython to extend +PyTorch when needed. + +Note: This package provides NVIDIA GPU support.") + (license license:bsd-3))) + +(define-public tensorpipe-cuda + (package + (name "tensorpipe-cuda") + (version (package-version tensorpipe)) + (source (package-source tensorpipe)) + (build-system cmake-build-system) + (arguments + (list + #:configure-flags + ''("-DBUILD_SHARED_LIBS=ON" "-DTP_USE_CUDA=1") + ;; There are no tests + #:tests? #f)) + (inputs (list cuda-nvml-dev cuda-toolkit libuv)) + (native-inputs (list googletest pkg-config pybind11 libnop)) + (home-page "https://github.com/pytorch/tensorpipe") + (synopsis "Tensor-aware point-to-point communication primitive for +machine learning") + (description "TensorPipe provides a tensor-aware channel to transfer +rich objects from one process to another while using the fastest transport for +the tensors contained therein. +Note: This version includes NVIDIA CUDA API and headers.") + (license license:bsd-3))) diff --git a/nongnu/packages/nvidia.scm b/nongnu/packages/nvidia.scm index bbdebed..959a274 100644 --- a/nongnu/packages/nvidia.scm +++ b/nongnu/packages/nvidia.scm @@ -7,6 +7,7 @@ ;;; Copyright © 2022, 2023 Petr Hodina ;;; Copyright © 2022 Alexey Abramov ;;; Copyright © 2022, 2023, 2024 Hilton Chain +;;; Copyright © 2024 Nicolas Graves (define-module (nongnu packages nvidia) #:use-module (guix packages) @@ -14,6 +15,7 @@ #:use-module (guix download) #:use-module (guix gexp) #:use-module (guix git-download) + #:use-module (guix build utils) #:use-module (guix utils) #:use-module ((guix licenses) #:prefix license-gnu:) #:use-module ((nonguix licenses) #:prefix license:) @@ -22,24 +24,36 @@ #:use-module (guix build-system copy) #:use-module (guix build-system gnu) #:use-module (guix build-system meson) + #:use-module (guix build-system pyproject) #:use-module (guix build-system python) #:use-module (guix build-system trivial) + #:use-module (nonguix build-system cuda) + #:use-module (gnu packages) #:use-module (gnu packages base) #:use-module (gnu packages bash) #:use-module (gnu packages bootstrap) #:use-module (gnu packages check) + #:use-module (gnu packages cmake) #:use-module (gnu packages compression) + #:use-module (gnu packages cpp) #:use-module (gnu packages elf) #:use-module (gnu packages freedesktop) #:use-module (gnu packages gawk) #:use-module (gnu packages gcc) #:use-module (gnu packages gl) #:use-module (gnu packages glib) + #:use-module (gnu packages graphviz) #:use-module (gnu packages gtk) #:use-module (gnu packages linux) #:use-module (gnu packages m4) #:use-module (gnu packages lsof) + #:use-module (gnu packages machine-learning) + #:use-module (gnu packages multiprecision) #:use-module (gnu packages pkg-config) + #:use-module (gnu packages python) + #:use-module (gnu packages python-build) + #:use-module (gnu packages python-check) + #:use-module (gnu packages python-science) #:use-module (gnu packages python-xyz) #:use-module (gnu packages qt) #:use-module (gnu packages tls) @@ -50,7 +64,8 @@ #:use-module (gnu packages xml) #:use-module (gnu packages xorg) #:use-module (nongnu packages linux) - #:use-module (ice-9 match)) + #:use-module (ice-9 match) + #:use-module (gcrypt base16)) (define-public %nvidia-environment-variable-regexps '("^__GL_" ; NVIDIA OpenGL settings. @@ -856,6 +871,1310 @@ variables @code{__GLX_VENDOR_LIBRARY_NAME=nvidia} and (define-public replace-mesa (package-input-rewriting `((,mesa . ,mesa/fake)))) + + +;;; +;;; CUDA packages +;;; + + +(define-public cuda-cccl + (package + (name "cuda-cccl") + (version "12.1.109") + (source + (origin + (method url-fetch) + (uri (cuda-module-url name version)) + (sha256 + (base32 + (match (or (%current-target-system) (%current-system)) + ("x86_64-linux" + "1ahvk632nh05m3mmjk8mhkxgkmry1ipq89dycw98kd617png6kmq") + ("aarch64-linux" + "1yc5irxn35ii0qal1qi8v6gq25ws4a7axjnmc5b20g0ypzxdlc2n") + ("powerpc64le-linux" + "0s6zidp5ajsqh519x3c38ihip4m1hkdzhrsdq04pybk8sfjh7z2l")))))) + (build-system cuda-build-system) + (arguments + (list #:install-plan ''(("include" "include") + ("lib" "lib")))) + (synopsis + "C++ Core Compute Libraries for the CUDA language") + (description + "This package provides the CUDA C++ developers with building blocks that +make it easier to write safe and efficient code. It unifies three essential former +CUDA C++ libraries into a single repository: +@itemize +@item Thrust (former repo) +@item CUB (former repo) +@item libcudacxx (former repo) +@end itemize") + (home-page "https://developer.nvidia.com/cuda-toolkit") + (license (cuda-license name)))) + +(define-public cuda-cudart + (package + (name "cuda-cudart") + (version "12.1.105") + (source + (origin + (method url-fetch) + (uri (cuda-module-url name version)) + (sha256 + (base32 + (match (or (%current-target-system) (%current-system)) + ("x86_64-linux" + "1nbbmd3x0dm3qpyr99cdmbw2gwffvvr9qvlwsdc34i4cij3yr5k0") + ("aarch64-linux" + "1q8mrsvj5w4v81w7fs73jq1z0ilishkfg5pq5ncb85yjg345hwya") + ("powerpc64le-linux" + "1ffqr6d28rpwzx9swmwj8s6p8llfvwrzpnnjcgjgskqygf5lfl2y")))))) + (build-system cuda-build-system) + (arguments + (list #:install-plan ''(("include" "include") + ("lib" "lib") + ("pkg-config" "share/pkg-config")) + #:phases + #~(modify-phases %standard-phases + (delete 'install-static) + (add-after 'install 'add-symlink + (lambda _ + (with-directory-excursion + (string-append #$output "/lib/stubs") + (symlink "libcuda.so" "libcuda.so.1"))))))) + (inputs (list cuda-nvrtc `(,gcc "lib") glibc)) + (synopsis "CUDA runtime") + (description + "This package provides the CUDA run-time support libraries for NVIDIA +GPUs, all of which are proprietary.") + (home-page "https://developer.nvidia.com/cuda-toolkit") + (license (cuda-license name)))) + +(define-public cuda-cuobjdump + (package + (name "cuda-cuobjdump") + (version "12.1.111") + (source + (origin + (method url-fetch) + (uri (cuda-module-url name version)) + (sha256 + (base32 + (match (or (%current-target-system) (%current-system)) + ("x86_64-linux" + "0lnsmz06kim978lcfbyl1n58883wq76wjri7kazrdr1bmj6vb60h") + ("aarch64-linux" + "0dqis4m2wlplp5hzjn92q65vs8gshn4nc7200gyvdr7midqcw0xz") + ("powerpc64le-linux" + "118ipzj28i4668jpr3svnzw5r3hgmwvg618s6y3axfn5picv4f4q")))))) + (build-system cuda-build-system) + (arguments + (list #:install-plan ''(("bin" "bin")))) + (synopsis "Extract information from CUDA binary files") + (description + "This binary extracts information from CUDA binary files (both standalone +and those embedded in host binaries) and presents them in human readable +format. The output of @code{cuobjdump} includes CUDA assembly code for each +kernel, CUDA ELF section headers, string tables, relocators and other CUDA +specific sections. It also extracts embedded ptx text from host binaries.") + (home-page "https://docs.nvidia.com/cuda/\ +cuda-binary-utilities/index.html#cuobjdump") + (license (cuda-license name)))) + +(define-public cuda-cuxxfilt + (package + (name "cuda-cuxxfilt") + (version "12.1.105") + (source + (origin + (method url-fetch) + (uri (cuda-module-url name version)) + (sha256 + (base32 + (match (or (%current-target-system) (%current-system)) + ("x86_64-linux" + "0va13gfay4as0fnc23n0gqhnylyhykp5cmmxjhlminfi735zki0x") + ("aarch64-linux" + "15jbqssx0nzi8l411m41393jpzc8kbd2qa0jri22cp5c4cnls9bz") + ("powerpc64le-linux" + "0m3nmsl59r2apd1dpm3a8ch788kq2krrl1x50agqk3z2wl8zhy1p")))))) + (build-system cuda-build-system) + (arguments + (list #:install-plan ''(("bin" "bin") + ("include" "include") + ("lib" "lib")))) + (synopsis "Decodes low-level CUDA C++ identifiers into readable names") + (description + "This package decodes (demangles) low-level identifiers that have been +mangled by CUDA C++ into user readable names. For every input alphanumeric +word, the output of cu++filt is either the demangled name if the name decodes +to a CUDA C++ name, or the original name itself.") + (home-page "https://docs.nvidia.com/cuda/\ +cuda-binary-utilities/index.html#cu-filt") + (license (cuda-license name)))) + +(define-public cuda-cupti + (package + (name "cuda-cupti") + (version "12.1.105") + (source + (origin + (method url-fetch) + (uri (cuda-module-url name version)) + (sha256 + (base32 + (match (or (%current-target-system) (%current-system)) + ("x86_64-linux" + "0qy3pvqkvr16xp2l0jb202xxvgq1pxdwkqfrpm4ag6k102i98x9r") + ("aarch64-linux" + "14j7kb6izvvgmla92lxyhlw482v7hxqsfpcl4gvpg6nspa0p6vbs") + ("powerpc64le-linux" + "0rfkvvv0i8450bpmanbq72cg98grpskxdrwswj7zch9gwkh4qyhr")))))) + (build-system cuda-build-system) + (arguments + (list #:install-plan ''(("include" "include") + ("doc" "share/doc") + ("lib" "lib") + ("samples" "share/samples")))) + (inputs (list `(,gcc "lib") glibc)) + (outputs (list "out" "static")) + (synopsis "CUDA Profiling Tools Interface") + (description + "This package enables the creation of profiling and tracing tools that +target CUDA applications and give insight into the CPU and GPU behavior of +CUDA applications. It provides the following APIs: +@itemize +@item the Activity API, +@item the Callback API, +@item the Event API, +@item the Metric API, +@item the Profiling API, +@item the PC Sampling API, +@item the Checkpoint API. +@end itemize") + (home-page "https://docs.nvidia.com/cuda/cupti/index.html") + (license (cuda-license name)))) + +(define-public cuda-gdb + (package + (name "cuda-gdb") + (version "12.1.105") + (source + (origin + (method url-fetch) + (uri (cuda-module-url name version)) + (sha256 + (base32 + (match (or (%current-target-system) (%current-system)) + ("x86_64-linux" + "0205f2ix06ry404l0ymrwx23k3nsnvhm1clg52hsnxmzqplfmgn4") + ("aarch64-linux" + "1v8cprz20yqjy8g1s9rbrvly1dr5icfam7c8rzqvzs25l8dcynjw") + ("powerpc64le-linux" + "1l2gl6pcvmdqcvd45513in915ij9cf9ljii5vfgh1y13apnk8ykz")))))) + (build-system cuda-build-system) + (arguments + (list #:install-plan + ``(("bin" "bin") + ("extras/Debugger/include" "include") + ("extras/Debugger/lib64" "lib") + ("share/gdb/python" + ,,(string-append "lib/python" + (version-major+minor (package-version python)) + "/site-packages/gdb"))) + #:strip-binaries? #f ; FIXME breaks 'validate-runpath + #:patchelf-inputs ''("gcc" "glibc" "gmp"))) + (inputs (list `(,gcc "lib") glibc gmp)) + (synopsis "Tool for debugging CUDA applications") + (description + "This package provides the NVIDIA tool for debugging CUDA applications +running. CUDA-GDB is an extension to GDB, the GNU Project debugger. The tool +provides developers with a mechanism for debugging CUDA applications running +on actual hardware. This enables developers to debug applications without the +potential variations introduced by simulation and emulation environments.") + (home-page "https://docs.nvidia.com/cuda/cuda-gdb/index.html") + (license (cuda-license name)))) + +;; This package must be defined before cuda-nvcc for inheritance. +(define-public libnvvm + (package + (name "libnvvm") + (version "12.1.105") + (source + (origin + (method url-fetch) + (uri (cuda-module-url "cuda-nvcc" version)) + (sha256 + (base32 + (match (or (%current-target-system) (%current-system)) + ("x86_64-linux" + "0fq8w5jq2drckjwn2i30m7arybnffhy4j2qb2yysp23pw7pgg18b") + ("aarch64-linux" + "0di51rdd08fwg6as1fqixkw7g052qv3sx9f9y06dkdbq0i563y0n") + ("powerpc64le-linux" + "1830cvqpmjsv83wk1lfjpjlc8j3wdpaiyvvc03crqh241v4c9qp6")))))) + (build-system cuda-build-system) + (arguments + (list + #:strip-binaries? #f ; XXX: breaks 'validate-runpath phase + #:install-plan ''(("nvvm/bin" "/bin") + ("nvvm/include" "/include") + ("nvvm/lib64" "/lib") + ;; nvvm prefix is necessary for cmake + ("nvvm/libdevice" "nvvm/libdevice")))) + (inputs (list cuda-cudart `(,gcc-12 "lib") glibc)) + (synopsis + "Generate CUDA PTX code from binary or text inputs") + (description + "This package provides an interface for generating PTX code from both +binary and text NVVM IR inputs.") + (home-page "https://docs.nvidia.com/cuda/libnvvm-api/index.html") + (license (cuda-license name)))) + +(define-public cuda-nvcc + (package + (inherit libnvvm) + (name "cuda-nvcc") + (arguments + (list + #:strip-binaries? #f ; XXX: breaks 'validate-runpath phase + #:patchelf-inputs ''("gcc" "glibc" "libnvvm") + #:install-plan ''(("bin" "bin") + ("include" "include") + ("lib" "lib")) + #:phases + #~(modify-phases %standard-phases + (add-after 'unpack 'patch-nvcc.profile + (lambda _ + (define (append-to-file name body) + (let ((file (open-file name "a"))) + (display body file) + (close-port file))) + + (substitute* "bin/nvcc.profile" + (("\\$\\(TOP\\)/\\$\\(_NVVM_BRANCH_\\)") + #$(this-package-input "libnvvm")) + (("\\$\\(TOP\\)/lib") + (string-append #$output "/lib")) + (("\\$\\(TOP\\)/nvvm") + (string-append #$output "/nvvm")) + (("\\$\\(TOP\\)/\\$\\(_TARGET_DIR_\\)/include") + (string-append #$output "/include"))) + (append-to-file + "bin/nvcc.profile" + (string-join + (list + (string-append "PATH += " #$(this-package-input "gcc") "/bin") + (string-append + "LIBRARIES =+ -L" + #$(this-package-input "cuda-cudart") "/lib -L" + #$(this-package-input "cuda-cudart") "/lib/stubs -L" + #$(this-package-input "libnvvm") "/lib") + (string-append + "INCLUDES =+ -I" + #$(this-package-input "cuda-cudart") "/include -I" + #$(this-package-input "libnvvm") "/include\n")) + "\n"))))))) + (inputs (list cuda-cudart `(,gcc "lib") glibc libnvvm)) + (synopsis + "Compiler for the CUDA language and associated run-time support") + (description + "This package provides the CUDA compiler and the CUDA run-time support +libraries for NVIDIA GPUs, all of which are proprietary.") + (home-page "https://docs.nvidia.com/cuda/\ +cuda-compiler-driver-nvcc/index.html") + (license (cuda-license name)))) + +(define-public cuda-nvml-dev + (package + (name "cuda-nvml-dev") + (version "12.1.105") + (source + (origin + (method url-fetch) + (uri (cuda-module-url name version)) + (sha256 + (base32 + (match (or (%current-target-system) (%current-system)) + ("x86_64-linux" + "0zyp4c4zf4kjjdw0dzjncclyamazlg5z4lncl7y0g8bq3idpgbi0") + ("aarch64-linux" + "0wal0bjvhd9wr4cnvr4s9m330awj2mqqvpq0rh6wzaykas40zmcx") + ("powerpc64le-linux" + "1zjh6mmp5nl3s5wm5jwfzh9bazzhl2vr76c9cdfrjjryyd2pkr92")))))) + (build-system cuda-build-system) + (arguments + (list #:install-plan ''(("include" "include") + ("lib" "lib") + ("nvml/example" "share/example") + ("pkg-config" "share/pkg-config")))) + (inputs (list `(,gcc "lib") glibc)) + (outputs (list "out" "static")) + (synopsis "NVIDIA Management Library Headers") + (description + "The NVIDIA Management Library Headers (NVML) is a C-based API for +monitoring and managing various states of the NVIDIA GPU devices. It provides +a direct access to the queries and commands exposed via @code{nvidia-smi}.") + (home-page "https://developer.nvidia.com/management-library-nvml") + (license (cuda-license name)))) + +(define-public cuda-nvdisasm + (package + (name "cuda-nvdisasm") + (version "12.1.105") + (source + (origin + (method url-fetch) + (uri (cuda-module-url name version)) + (sha256 + (base32 + (match (or (%current-target-system) (%current-system)) + ("x86_64-linux" + "1sd9wqf5y4xvz70yh58mdxxddwnkyfjfaj6nrykpvqrry79vyz7l") + ("aarch64-linux" + "0pnk1x1c7msz93r5kgkb218akf02ymjar2dz8s3sx08hicaslff2") + ("powerpc64le-linux" + "04xjcjj055ffs58gkf86jzryyzxia8c995g8xpj5nf2zhaw030hw")))))) + (build-system cuda-build-system) + (arguments + (list #:install-plan ''(("bin" "bin")))) + (synopsis "Extract information from CUDA cubin files") + (description "This binary extracts information from standalone cubin files +and presents them in human readable format. The output of @code{nvdisasm} +includes CUDA assembly code for each kernel, listing of ELF data sections and +other CUDA specific sections. Output style and options are controlled through +nvdisasm command-line options. @code{nvdisasm} also does control flow +analysis to annotate jump/branch targets and makes the output easier to +read.") + (home-page "https://docs.nvidia.com/cuda/\ +cuda-binary-utilities/index.html#nvdisasm") + (license (cuda-license name)))) + +(define-public cuda-nvprof + (package + (name "cuda-nvprof") + (version "12.1.105") + (source + (origin + (method url-fetch) + (uri (cuda-module-url name version)) + (sha256 + (base32 + (match (or (%current-target-system) (%current-system)) + ("x86_64-linux" + "18z522w0rnrqbqymigsd88rscz29z9fg3bf5w6ri4yjr8a1ycdg9") + ("powerpc64le-linux" + "1sd9wbb2zdc29jx7m3m5qs29s67ww71g659228y2045nr340qjc4")))))) + (build-system cuda-build-system) + (arguments + (list + #:strip-binaries? #f ; XXX: breaks 'validate-runpath phase + #:install-plan ''(("bin" "bin") + ("lib" "lib") + ("pkg-config" "share/pkg-config")) + #:patchelf-inputs + ''(("cuda-cudart" "/lib/stubs") "cuda-cupti" "gcc" "glibc"))) + (inputs (list cuda-cudart cuda-cupti `(,gcc "lib") glibc)) + (synopsis "Command-line NVIDIA GPU profiler") + (description + "This package provides a command-line tool to profile CUDA kernels. It +enables the collection of a timeline of CUDA-related activities on both CPU +and GPU, including kernel execution, memory transfers, memory set and CUDA API +calls and events or metrics for CUDA kernels.") + (home-page "https://developer.nvidia.com/cuda-toolkit") + (license (cuda-license name)))) + +(define-public cuda-nvprune + (package + (name "cuda-nvprune") + (version "12.1.105") + (source + (origin + (method url-fetch) + (uri (cuda-module-url name version)) + (sha256 + (base32 + (match (or (%current-target-system) (%current-system)) + ("x86_64-linux" + "0qrisahad4n2g8n40i0gpq986ni8qjg53fd23vycmmmkggvb3wxa") + ("aarch64-linux" + "1hdih73ph80iwmjmz7dywz995626x64jkqfaybw7a908nxkjalpy") + ("powerpc64le-linux" + "0n92fcp5qms6dvg5hq1wl29wmh32wjfkykccjpqd8c40qrmd9ngh")))))) + (build-system cuda-build-system) + (arguments + (list #:install-plan ''(("bin" "bin")))) + (synopsis "Prune host NVIDIA binaries for the specified target") + (description + "This package provides a binary that prunes host object files and +libraries to only contain device code for the specified targets.") + (home-page "https://docs.nvidia.com/cuda/\ +cuda-binary-utilities/index.html#nvprune") + (license (cuda-license name)))) + +(define-public cuda-nvrtc + (package + (name "cuda-nvrtc") + (version "12.1.105") + (source + (origin + (method url-fetch) + (uri (cuda-module-url name version)) + (sha256 + (base32 + (match (or (%current-target-system) (%current-system)) + ("x86_64-linux" + "0yriv3gcb4kpvpav3ilv8zyhravmz0blb0gv1c7pfq37r9m705dv") + ("aarch64-linux" + "0amp7qg64i6rfkqnjinizh9vhpajvqdpyan4jda9vqr7ckrdfq31") + ("powerpc64le-linux" + "10dwwhk2pfz6dcqpgjp2dryg5qb08ghnbxvbk4mfhvsajj9ik4wv")))))) + (build-system cuda-build-system) + (arguments + (list #:install-plan ''(("include" "include") + ("lib" "lib") + ("pkg-config" "share/pkg-config")))) + (inputs (list `(,gcc "lib") glibc)) + (outputs (list "out" "static")) + (synopsis "Runtime compilation library for CUDA C++") + (description + "This package accepts CUDA C++ source code in character string form and +creates handles that can be used to obtain the CUDA PTX, for further +instrumentation with the CUDA Toolkit. It allows to shrink compilation +overhead and simplify application deployment.") + (home-page "https://docs.nvidia.com/cuda/nvrtc/index.html") + (license (cuda-license name)))) + +(define-public cuda-nvtx + (package + (name "cuda-nvtx") + (version "12.1.105") + (source + (origin + (method url-fetch) + (uri (cuda-module-url name version)) + (sha256 + (base32 + (match (or (%current-target-system) (%current-system)) + ("x86_64-linux" + "1hpibjs9hpc1qhbxihgcpsf298cjwxh7qqsk0shhrwbv4hncg8lc") + ("aarch64-linux" + "1j841pl7n2waal2nclz076yxmzsibxssy8gnkb14yyc8sj657ajp") + ("powerpc64le-linux" + "1p0ml8p8dpzwp2kkgvv0yr4f61if33srpzbj1mjpzc70a0l55a31")))))) + (build-system cuda-build-system) + (arguments + (list #:install-plan ''(("include" "include") + ("lib" "lib") + ("pkg-config" "share/pkg-config")))) + (inputs (list `(,gcc "lib") glibc)) + (synopsis "NVIDIA Tools Extension Library") + (description + "This package provides a cross-platform API for annotating source code to +provide contextual information to developer tools.") + (home-page "https://docs.nvidia.com/nvtx/index.html") + (license (cuda-license name)))) + +(define-public cuda-opencl + (package + (name "cuda-opencl") + (version "12.1.105") + (source + (origin + (method url-fetch) + (uri (cuda-module-url name version)) + (sha256 + (base32 + (match (or (%current-target-system) (%current-system)) + ("x86_64-linux" + "1k4ab28kg5plr0nn83amr6j7cqg54vpis00am9dpiy4kgj2izgcx")))))) + (build-system cuda-build-system) + (arguments + (list #:install-plan ''(("include" "include") + ("lib" "lib") + ("pkg-config" "share/pkg-config")))) + (synopsis "CUDA OpenCL API") + (description + "OpenCL (Open Computing Language) is a multi-vendor open standard for +general-purpose parallel programming of heterogeneous systems that include +CPUs, GPUs and other processors. This package provides the API to use OpenCL +on NVIDIA GPUs.") + (home-page "https://developer.nvidia.com/cuda-toolkit") + (license (cuda-license name)))) + +(define-public cuda-profiler-api + (package + (name "cuda-profiler-api") + (version "12.1.105") + (source + (origin + (method url-fetch) + (uri (cuda-module-url name version)) + (sha256 + (base32 + (match (or (%current-target-system) (%current-system)) + ("x86_64-linux" + "187dngq2p66jz3yd5l6klqgcvjl6fkcjdjjz1dmzj10fxfv6rzrz") + ("aarch64-linux" + "1zq8qrh13ibm9c2km8lj4fmddc8smgh75ajpwb0l7rfg12dajnpr") + ("powerpc64le-linux" + "0mhk9cgac2jc4dmqic5ym34cwpz15b0qk824230bhgmwarjwzhiz")))))) + (build-system cuda-build-system) + (arguments + (list #:install-plan ''(("include" "include")))) + (synopsis "Low-level CUDA profiling API") + (description + "This package provides a minimal low-level profiling API for CUDA.") + (home-page "https://developer.nvidia.com/cuda-toolkit") + (license (cuda-license name)))) + +(define-public cuda-sanitizer-api + (package + (name "cuda-sanitizer-api") + (version "12.1.105") + (source + (origin + (method url-fetch) + (uri (cuda-module-url name version)) + (sha256 + (base32 + (match (or (%current-target-system) (%current-system)) + ("x86_64-linux" + "00m6mw9vw8xgjbm8xzbpgirw8xcrdb13bgwkp4hxayy313d13afz") + ("aarch64-linux" + "01iv9qawabr2llq7nwcrpc1fb03yp9a311p08bafhbakk272nwwq") + ("powerpc64le-linux" + "1hp1kd7q5dj8adyv4haaz119qcmmc5gqs3g8zqik5rnmck6qk3p3")))))) + (build-system cuda-build-system) + (arguments + (list #:install-plan + ''(("compute-sanitizer" "compute-sanitizer") + ("bin" "bin")))) + (synopsis "Functional correctness checking suite for CUDA") + (description + "This package provides a functional correctness checking suite included in +the CUDA toolkit. This suite contains multiple tools that can perform +different type of checks. The @code{memcheck} tool is capable of precisely +detecting and attributing out of bounds and misaligned memory access errors in +CUDA applications, and can also report hardware exceptions encountered by the +GPU. The @code{racecheck} tool can report shared memory data access hazards +that can cause data races. The @code{initcheck} tool can report cases where +the GPU performs uninitialized accesses to global memory. The +@code{synccheck} tool can report cases where the application is attempting +invalid usages of synchronization primitives.") + (home-page "https://docs.nvidia.com/cuda/compute-sanitizer/index.html") + (license (cuda-license name)))) + +(define-public libcublas + (package + (name "libcublas") + (version "12.1.3.1") + (source + (origin + (method url-fetch) + (uri (cuda-module-url name version)) + (sha256 + (base32 + (match (or (%current-target-system) (%current-system)) + ("x86_64-linux" + "1323rg663fvjl73j5ny249ndnii2qbrfc7qccz5k6ky4v1x4s14h") + ("aarch64-linux" + "1bzzxzppz3ypx6q3gg7w6sfnwnypl974ppmbxh0j2jafvwy5nf9f") + ("powerpc64le-linux" + "1wgrgkn9mvh9k1d58ka92gbq11ckl8pyhz7za8lsrhjpw6c8iw15")))))) + (build-system cuda-build-system) + (arguments + (list + #:install-plan ''(("include" "include") + ("lib" "lib") + ("pkg-config" "share/pkg-config") + ("src" "share/src")))) + (inputs (list `(,gcc "lib") glibc)) + (outputs (list "out" "static")) + (synopsis + "GPU-accelerated library for accelerating AI and HPC applications") + (description + "This package provides the NVIDIA cuBLAS library. It includes several +API extensions for providing drop-in industry standard BLAS APIs and GEMM APIs +with support for fusions that are highly optimized for NVIDIA GPUs. The +cuBLAS library also contains extensions for batched operations, execution +across multiple GPUs, and mixed- and low-precision execution with additional +tuning for the best performance.") + (home-page "https://developer.nvidia.com/cublas") + (license (cuda-license name)))) + +(define-public libcufft + (package + (name "libcufft") + (version "11.0.2.54") + (source + (origin + (method url-fetch) + (uri (cuda-module-url name version)) + (sha256 + (base32 + (match (or (%current-target-system) (%current-system)) + ("x86_64-linux" + "053vgq3lavrydna1gl7lry0lp78nby6iqh1gvclvq7vx5kac2dki") + ("aarch64-linux" + "0kmyxk9420vgm0ipr8a6fx1kcw19h8awy21l92lg4h7nzp58ig76") + ("powerpc64le-linux" + "02kklsdi43fvs2bi9s534rniqh43hqj9aq4i1m01yq6ya1cqqz1c")))))) + (build-system cuda-build-system) + (arguments + (list #:install-plan ''(("include" "include") + ("lib" "lib") + ("pkg-config" "share/pkg-config")))) + (inputs (list `(,gcc "lib") glibc)) + (outputs (list "out" "static")) + (synopsis "CUDA Fast Fourier Transform library") + (description + "This package provides cuFFT, the NVIDIA® CUDA® Fast Fourier Transform +(FFT) product. It consists of two separate libraries: cuFFT and cuFFTW. The +cuFFT library is designed to provide high performance on NVIDIA GPUs. The +cuFFTW library is provided as a porting tool to enable users of FFTW to start +using NVIDIA GPUs with a minimum amount of effort. + +The FFT is a divide-and-conquer algorithm for efficiently computing discrete +Fourier transforms of complex or real-valued data sets. It is one of the most +important and widely used numerical algorithms in computational physics and +general signal processing. The cuFFT library provides a simple interface for +computing FFTs on an NVIDIA GPU, which allows users to quickly leverage the +floating-point power and parallelism of the GPU in a highly optimized and +tested FFT library. The cuFFTW library provides the FFTW3 API to facilitate +porting of existing FFTW applications.") + (home-page "https://docs.nvidia.com/cuda/cufft/index.html") + (license (cuda-license name)))) + +(define-public libcurand + (package + (name "libcurand") + (version "10.3.2.106") + (source + (origin + (method url-fetch) + (uri (cuda-module-url name version)) + (sha256 + (base32 + (match (or (%current-target-system) (%current-system)) + ("x86_64-linux" + "1pk4ngmqdhigg2889h3521kzxvvp3m1yxlnvf9hrwh9dmmpj2hcr") + ("aarch64-linux" + "0lw53j57g1094bzlx43dyq7iwwpljdkg17dnl8lk7n5vyrvjk4j3") + ("powerpc64le-linux" + "05r8fcam75m9zv853vl0zzp67jy0yacq09q8xx5ymxx7pcj58g7s")))))) + (build-system cuda-build-system) + (arguments + (list #:install-plan ''(("include" "include") + ("lib" "lib") + ("pkg-config" "share/pkg-config")))) + (inputs (list `(,gcc "lib") glibc)) + (outputs (list "out" "static")) + (synopsis "CUDA random number generation library") + (description + "This package provides facilities that focus on the simple and efficient +generation of high-quality pseudorandom and quasirandom numbers. A +pseudorandom sequence of numbers satisfies most of the statistical properties +of a truly random sequence but is generated by a deterministic algorithm. A +quasirandom sequence of -dimensional points is generated by a deterministic +algorithm designed to fill an -dimensional space evenly.") + (home-page "https://docs.nvidia.com/cuda/curand/index.html") + (license (cuda-license name)))) + +(define-public libcusolver + (package + (name "libcusolver") + (version "11.4.5.107") + (source + (origin + (method url-fetch) + (uri (cuda-module-url name version)) + (sha256 + (base32 + (match (or (%current-target-system) (%current-system)) + ("x86_64-linux" + "1y34wk7xx9h0kj13rxb504yx5vchkapk1237ya7vs7z70409fsbi") + ("aarch64-linux" + "0wr8xa4hqay94gc1b9jzig24f7q3s2ykakppxv42pxp86dbjyp0q") + ("powerpc64le-linux" + "12jkky40g1xpjr1lkz925q93zbc84g559mhv94x70i4dmy6b4rj3")))))) + (build-system cuda-build-system) + (arguments + (list + #:install-plan ''(("include" "include") + ("lib" "lib") + ("pkg-config" "share/pkg-config")) + #:patchelf-inputs + ''("gcc" "glibc" "libcublas" "libcusparse" "libnvjitlink"))) + (inputs (list `(,gcc "lib") glibc + libcublas libcusparse libnvjitlink)) + (outputs (list "out" "static")) + (synopsis + "GPU-accelerated library for decompositions and linear system solutions") + (description + "This package provides a high-level library based on the cuBLAS and +cuSPARSE libraries. It consists of two modules corresponding to two sets of +API: the cuSolver API on a single GPU; and the cuSolverMG API on a single node +multiGPU. Each of these can be used independently or in concert with other +toolkit libraries. The intent of cuSolver is to provide useful LAPACK-like +features, such as common matrix factorization and triangular solve routines +for dense matrices, a sparse least-squares solver and an eigenvalue solver. +In addition, cuSolver provides a new refactorization library useful for +solving sequences of matrices with a shared sparsity pattern.") + (home-page "https://docs.nvidia.com/cuda/cusolver/index.html") + (license (cuda-license name)))) + +(define-public libcusparse + (package + (name "libcusparse") + (version "12.1.0.106") + (source + (origin + (method url-fetch) + (uri (cuda-module-url name version)) + (sha256 + (base32 + (match (or (%current-target-system) (%current-system)) + ("x86_64-linux" + "01rrz1wdsfmpz9wbvir7nwvlpdrqk6i1j987wdbb2lx7d96n07xf") + ("aarch64-linux" + "1vxmiw9qzg67sr4m9mpzhcy392z8vx2m09yl5h2bhb8kjxrdljik") + ("powerpc64le-linux" + "13ji6dlipzahlrri5sp00qyrfa3wgp9z5mv3075qksmnjhi7wxkv")))))) + (build-system cuda-build-system) + (arguments + (list + #:install-plan ''(("include" "include") + ("lib" "lib") + ("pkg-config" "share/pkg-config") + ("src" "share/src")) + #:patchelf-inputs ''("gcc" "glibc" "libnvjitlink"))) + (inputs (list `(,gcc "lib") glibc libnvjitlink)) + (outputs (list "out" "static")) + (synopsis "CUDA sparse matrix library") + (description + "This package provides a set of GPU-accelerated basic linear algebra +subroutines used for handling sparse matrices that perform significantly +faster than CPU-only alternatives. Depending on the specific operation, the +library targets matrices with sparsity ratios in the range between 70%-99.9%.") + (home-page "https://docs.nvidia.com/cuda/cusparse/index.html") + (license (cuda-license name)))) + +;; XXX: This library is introduced in a later version of cuda-toolkit. +;; (define-public libnvfatbin +;; (package +;; (name "libnvfatbin") +;; (version "12.4.127") +;; (source +;; (origin +;; (method url-fetch) +;; (uri (cuda-module-url name version)) +;; (sha256 +;; (base32 +;; (match (or (%current-target-system) (%current-system)) +;; ("x86_64-linux" +;; "03mfxy8k07ks3srqmwwbhmr6961w0djsdgy0qdwaxl9favvgay0j") +;; ("aarch64-linux" +;; "0b6kamwgg424yibcb1f0pqmmd7jgxlnsxd37drj4fh7823glf4i7") +;; ("powerpc64le-linux" +;; "1jg4z8h2wrldxb1cfzbrw69sjw4h2hxja82jqkxp19aacbdcs7h7")))))) +;; (build-system cuda-build-system) +;; (outputs (list "out" "static")) +;; (synopsis "Combine multiple CUDA objects into one CUDA fatbin") +;; (description +;; "This package provides a set of APIs which can be used at runtime to +;; combine multiple CUDA objects into one CUDA fat binary (fatbin). The APIs +;; accept inputs in multiple formats, either device cubins, PTX, or LTO-IR. The +;; output is a fatbin that can be loaded by @code{cuModuleLoadData} of the CUDA +;; Driver API. The functionality in this library is similar to the +;; @code{fatbinary} offline tool in the CUDA toolkit, with the following +;; advantages: +;; @itemize +;; @item Support for runtime fatbin creation. +;; @item The clients get fine grain control over the input process. +;; @item Supports direct input from memory, rather than requiring inputs be +;; written to files. +;; @end itemize") +;; (home-page "https://docs.nvidia.com/cuda/nvfatbin/index.html") +;; (license (cuda-license name)))) + +(define-public libnvjitlink + (package + (name "libnvjitlink") + (version "12.1.105") + (source + (origin + (method url-fetch) + (uri (cuda-module-url name version)) + (sha256 + (base32 + (match (or (%current-target-system) (%current-system)) + ("x86_64-linux" + "1d5ngmf10l37rm7814jlghgfpa0xjyqiis8vqg0y22cmrw365vi1") + ("aarch64-linux" + "15fbd3ygk41wbsjyzsharncd94pzn0ikwhq5fq5x7lyh9g0frkfz") + ("powerpc64le-linux" + "1gq93cp68x0nivajz9bh7mvykfzcfhim5l907lg1kp2jb3rnrssg")))))) + (build-system cuda-build-system) + (arguments + (list #:install-plan ''(("lib" "lib") + ("pkg-config" "share/pkg-config") + ("include" "include")))) + (inputs (list `(,gcc "lib") glibc)) + (outputs (list "out" "static")) + (synopsis "Link GPU devide code at runtime") + (description + "This package provides a set of APIs which can be used at runtime to link +together GPU devide code. It supports Link Time Optimization.") + (home-page "https://docs.nvidia.com/cuda/nvjitlink/index.html") + (license (cuda-license name)))) + +(define-public libnvjpeg + (package + (name "libnvjpeg") + (version "12.2.0.2") + (source + (origin + (method url-fetch) + (uri (cuda-module-url name version)) + (sha256 + (base32 + (match (or (%current-target-system) (%current-system)) + ("x86_64-linux" + "0xbzbhf7s7gsilr7gx4r7g2j1sxj977wr5zf7jjqg31ch9x2d4yj") + ("powerpc64le-linux" + "1z90kf95045s6q44rm2da3g31icb3hyh3jmv9a5s5bvx6flfs4lk")))))) + (build-system cuda-build-system) + (arguments + (list #:install-plan ''(("include" "include") + ("lib" "lib") + ("pkg-config" "share/pkg-config")))) + (inputs (list `(,gcc "lib") glibc)) + (outputs (list "out" "static")) + (synopsis "GPU-accelerated JPEG codec library") + (description + "This package provides a high-performance, GPU accelerated JPEG decoding +functionality for image formats commonly used in deep learning and hyperscale +multimedia applications. The library offers single and batched JPEG decoding +capabilities which efficiently utilize the available GPU resources for optimum +performance; and the flexibility for users to manage the memory allocation +needed for decoding. + +The nvJPEG library enables the following functions: use the JPEG image data +stream as input; retrieve the width and height of the image from the data +stream, and use this retrieved information to manage the GPU memory allocation +and the decoding. A dedicated API is provided for retrieving the image +information from the raw JPEG image data stream. + +The encoding functions of the nvJPEG library perform GPU-accelerated +compression of user’s image data to the JPEG bitstream. User can provide input +data in a number of formats and colorspaces, and control the encoding process +with parameters. Encoding functionality will allocate temporary buffers using +user-provided memory allocator.") + (home-page "https://docs.nvidia.com/cuda/nvjpeg/index.html") + (license (cuda-license name)))) + +(define-public libnpp + (package + (name "libnpp") + (version "12.1.0.40") + (source + (origin + (method url-fetch) + (uri (cuda-module-url name version)) + (sha256 + (base32 + (match (or (%current-target-system) (%current-system)) + ("x86_64-linux" + "1lcb8hdqv2h3i33iinfj6nljh6bhlvy4c3pgis5wy7lnqwr2xi2j") + ("aarch64-linux" + "048blkq0qibj54a70pwn49w4y525if35djkfqx7l7p7ibm47qx3h") + ("powerpc64le-linux" + "140w44a5q5pcfzkn0dl5ibkhshd3pb7jczgddpklqv2a5pkngd2y")))))) + (build-system cuda-build-system) + (arguments + (list #:install-plan ''(("include" "include") + ("lib" "lib") + ("pkg-config" "share/pkg-config")))) + (inputs (list `(,gcc "lib") glibc)) + (outputs (list "out" "static")) + (synopsis + "NVIDIA 2D Image and Signal Processing Performance Primitives") + (description + "This package provides a library of functions for performing CUDA +accelerated 2D image and signal processing. + +The primary library focuses on image processing and is widely applicable for +developers in these areas. NPP will evolve over time to encompass more of the +compute heavy tasks in a variety of problem domains. The NPP library is +written to maximize flexibility, while maintaining high performance.") + (home-page "https://docs.nvidia.com/cuda/npp/index.html") + (license (cuda-license name)))) + +(define-public cuda-toolkit + (package + (name "cuda-toolkit") + (version "12.1.1") + (source #f) + (build-system trivial-build-system) + (arguments + '(#:modules ((guix build union)) + #:builder + (begin + (use-modules (ice-9 match) + (guix build union)) + (match %build-inputs + (((names . directories) ...) + (union-build (assoc-ref %outputs "out") + directories)))))) + (inputs + (list cuda-cccl + ;; FIXME: cuda-compat is only used for aarch64 for this version + cuda-cudart + cuda-nvcc + cuda-nvml-dev + cuda-nvtx + cuda-nvrtc + libcublas + ;; libcudla seems very specialized for now + libcufft + libcurand + libcusolver + libcusparse + libnpp + ;; libnvfatbin is introduced in a later version + ;; libnvidia-nscq seems very specialized for now + libnvjitlink + libnvjpeg + libnvvm)) + ;; TODO Add nsight suite, probably in a new metapackage. + (synopsis "Metapackage for CUDA") + (description + "This package provides the CUDA compiler and the CUDA run-time support +libraries for NVIDIA GPUs, all of which are proprietary.") + (home-page "https://developer.nvidia.com/cuda-toolkit") + (license (package-license cuda-cudart)))) + +(define-public cuda-dev + (package + (name "cuda-dev") + (version "12.1.1") + (source #f) + (build-system trivial-build-system) + (arguments + '(#:modules ((guix build union)) + #:builder + (begin + (use-modules (ice-9 match) + (guix build union)) + (match %build-inputs + (((names . directories) ...) + (union-build (assoc-ref %outputs "out") + directories)))))) + (inputs + (list cuda-toolkit + cuda-cuobjdump + cuda-cupti + cuda-cuxxfilt + cuda-gdb + cuda-nvdisasm + cuda-nvprof + cuda-nvprune + ;; cuda-nvvp will be deprecated soon + cuda-profiler-api + ;; fabricmanager seems very specialized + ;; imex is poorly documented + cuda-sanitizer-api)) + (synopsis "Metapackage for CUDA development") + (description + "This package provides the CUDA compiler and the CUDA run-time support +libraries for NVIDIA GPUs, all of which are proprietary.") + (home-page "https://developer.nvidia.com/cuda-toolkit") + (license (package-license cuda-cudart)))) + +(define-public cuda-python + (package + (name "cuda-python") + (version "12.1.0") + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/NVIDIA/cuda-python") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) + (sha256 + (base32 "0i0wvx5kxckphsf1n02rr86hrnc2r6p8wlrvq1n1w9c3l6m24d13")))) + (build-system pyproject-build-system) + (arguments + (list + #:tests? #f ; FIXME: most tests fail. + #:phases + #~(modify-phases %standard-phases + (add-after 'unpack 'fix-setup.py + (lambda _ + (substitute* "setup.py" + (("import versioneer" all) + (format #f "~a~%import pyparsing" all))))) + (add-before 'build 'set_cuda_paths + (lambda _ + (setenv "CUDA_HOME" + #$(this-package-input "cuda-dev")) + (setenv "PARALLEL_LEVEL" + (number->string (parallel-job-count)))))))) + (native-inputs (list python-cython + python-numpy + python-pytest + python-pytest-benchmark + python-setuptools + python-wheel)) + (inputs (list cuda-dev)) + (propagated-inputs (list python-pyclibrary)) + (home-page "https://github.com/NVIDIA/cuda-python") + (synopsis "CUDA Python low-level bindings") + (description "This package provides Python low-level bindings for NVIDIA +CUDA toolkit.") + (license + (license:nonfree + "https://github.com/NVIDIA/cuda-python/blob/main/LICENSE")))) + +(define (nvidia-cudnn-samples system version) + (origin + (method url-fetch) + (uri + (format #f + "https://developer.download.nvidia.com/compute/cudnn/redist\ +/cudnn_samples/~a/cudnn_samples-~a-~a_cuda12-archive.tar.xz" + system + system + version)) + (sha256 + (base32 "01drxcyj8r4zsrc7i9cwczd185dcacxgwllipf9w612byzrs9afk")))) + +(define-public nvidia-cudnn + (package + (name "nvidia-cudnn") + (version "8.9.7.29") + (source + (origin + (method url-fetch) + (uri (let ((system (cuda-current-system))) + (format #f + "https://developer.download.nvidia.com/compute/cudnn/redist\ +/cudnn/~a/cudnn-~a-~a_cuda12-archive.tar.xz" + system + system + version))) + (sha256 + (base32 "1fz345pgngn1v4f0i80s7g4k0vhhd98ggcm07jpsfhkybii36ls7")))) + (build-system cuda-build-system) + (arguments + (list + #:install-plan ''(("include" "include") + ("lib" "lib")) + #:patchelf-inputs ''("gcc" "glibc" "out" "zlib") + #:modules '((nonguix build cuda-build-system) + ((guix build gnu-build-system) #:prefix gnu:) + (guix build union) + (guix build utils) + (ice-9 ftw)) + #:imported-modules `(,@%cuda-build-system-modules + (guix build gnu-build-system) + (guix build union)) + #:phases + #~(modify-phases %standard-phases + (add-after 'install 'prepare-tests + (lambda* (#:key outputs #:allow-other-keys) + (mkdir "tests") + (with-directory-excursion "tests" + ((assoc-ref gnu:%standard-phases 'unpack) + #:source #$(nvidia-cudnn-samples + (cuda-current-system) + (package-version this-package)))) + (chdir "tests") + (chdir (caddr (scandir "."))) + (union-build + "cuda+cudnn" + (list (assoc-ref outputs "out") + '#$(this-package-native-input "cuda-toolkit"))) + (setenv "CUDA_PATH" (canonicalize-path "cuda+cudnn")) + (chdir "src/cudnn_samples_v8"))) + (add-after 'prepare-tests 'check + (lambda _ + (for-each + (lambda (dir) + (format #t "Building ~a...~%" dir) + (with-directory-excursion dir + (assoc-ref gnu:%standard-phases 'build))) + (cdr (find-files "." (lambda (file stat) + (eq? 'directory (stat:type stat))) + #:directories? #t)))))))) + (native-inputs (list cuda-toolkit)) + (inputs (list `(,gcc "lib") glibc zlib)) + (outputs (list "out" "static")) + (synopsis "NVIDIA CUDA Deep Neural Network library (cuDNN)") + (description + "This package provides a GPU-accelerated library of primitives for deep +neural networks, with highly tuned implementations for standard routines such +as forward and backward convolution, attention, matmul, pooling, and +normalization.") + (home-page "https://developer.nvidia.com/cudnn") + (license + (license:nonfree "https://developer.download.nvidia.com/\ +compute/cudnn/redist/cudnn/LICENSE.txt")))) + +(define-public nvidia-cudnn-frontend + (package + (name "nvidia-cudnn-frontend") + (version "1.5.2") + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/NVIDIA/cudnn-frontend") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) + (sha256 + (base32 "04aglaxh4mgm94qwia293gqn7gmlw5w6mk8nky4k6l1m2615swyd")) + (modules '((guix build utils))) + (snippet + #~(begin + (delete-file-recursively "include/cudnn_frontend/thirdparty") + (substitute* (find-files "include" "\\.(cpp|h|hpp)") + (("\"cudnn_frontend/thirdparty/nlohmann/json\\.hpp\"") + "")))) + (patches + (parameterize + ((%patch-path + (map + (lambda (directory) + (string-append directory "/nongnu/packages/patches")) + %load-path))) + (search-patches "nvidia-cudnn-frontend_find_built_dlpack.patch" + "nvidia-cudnn-frontend_find_nlohmann_json.patch" + "nvidia-cudnn-frontend_use_store_so.patch"))))) + (build-system pyproject-build-system) + (arguments + (list + #:modules '((guix build pyproject-build-system) + (guix build union) + (guix build utils)) + #:imported-modules `(,@%pyproject-build-system-modules + (guix build union)) + #:phases + #~(modify-phases %standard-phases + (add-before 'build 'set_cuda_paths + (lambda _ + (substitute* "python/cudnn/__init__.py" + (("@store-cudnn\\.so-path@") + (format #f "\"~a/lib/libcudnn.so\"" + #$(this-package-input "nvidia-cudnn")))) + (setenv "CUDA_PATH" + #$(this-package-input "cuda-toolkit")) + (setenv "CUDNN_PATH" + #$(this-package-input "nvidia-cudnn")) + (setenv "CUDNN_FRONTEND_FETCH_PYBINDS_IN_CMAKE" "0") + (setenv "CMAKE_BUILD_PARALLEL_LEVEL" + (number->string (parallel-job-count))))) + (add-after 'install 'post-install + (lambda _ + (union-build + (string-append #$output "/include") + (find-files + (string-append #$output "/lib") + (lambda (file stat) + (string-suffix? "include" file)) + #:directories? #t))))))) + (native-inputs (list cmake dlpack pybind11 python-setuptools python-wheel)) + (inputs (list cuda-toolkit nlohmann-json nvidia-cudnn)) + (home-page "https://github.com/NVIDIA/cudnn-frontend") + (synopsis "cuDNN API header-only library") + (description "This package provides a C++ header-only library that wraps +the NVIDIA CUDA Deep Neural Network library (cuDNN) C backend API. This entry +point to the same API is less verbose (without loss of control), and adds +functionality on top of the backend API, such as errata filters and +autotuning.") + (license license-gnu:expat))) + +(define-public nvidia-cutlass + (package + (name "nvidia-cutlass") + (version "3.2.2") + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/NVIDIA/cutlass") + (commit (string-append "v" version)))) + (file-name (git-file-name name version)) + (sha256 + (base32 "0qyxkp3pmndlzm3aw9xwrx57znj9p4xlvqahavgzq8c1nd7bj3wp")) + (patches + (parameterize + ((%patch-path + (map + (lambda (directory) + (string-append directory "/nongnu/packages/patches")) + %load-path))) + (search-patches "nvidia-cutlass-3.2.2_disable_static_lib.patch"))))) + (build-system cmake-build-system) + (arguments + (list + ;; XXX: Cutlass is incredibly heavy to build when not specifying target + ;; GPU architecture (4G), avoid tests, examples and static library. + ;; Setting the contrary often runs out of RAM even on beefy laptops. + #:configure-flags ''("-DCUTLASS_ENABLE_TESTS=OFF" + "-DCUTLASS_INSTALL_TESTS=OFF" + "-DCUTLASS_BUILD_STATIC_LIBRARY=OFF" + "-DCUTLASS_ENABLE_EXAMPLES=OFF" + "-DCUTLASS_UNITY_BUILD_ENABLED=ON") + #:phases + #~(modify-phases %standard-phases + ;; XXX: This phase is not necessary on earlier versions. + ;; Remove it when updating. + (add-after 'unpack 'fix-cuda-build + (lambda _ + (substitute* "CMakeLists.txt" + (("--user") + (string-append "--prefix=" #$output))) + (setenv "PYTHONPATH" + (string-append (getcwd) "/python")))) + (add-before 'build 'set_cuda_paths + (lambda _ + (setenv "CUDACXX" + #$(file-append (this-package-input "cuda-toolkit") + "/bin/nvcc")))) + (add-after 'install 'cleanup + (lambda _ + (delete-file-recursively + (string-append #$output "/test"))))))) + (native-inputs (list python python-setuptools)) + (inputs (list cuda-toolkit)) + (propagated-inputs (list cuda-python + python-networkx + python-numpy + python-pydot + python-scipy + python-treelib)) + (home-page "https://developer.nvidia.com/blog/cutlass-linear-algebra-cuda") + (synopsis "CUDA Templates for Linear Algebra Subroutines") + (description + "This package provides a collection of CUDA C++ template abstractions for +implementing high-performance matrix-matrix multiplication (GEMM) and related +computations at all levels and scales within CUDA. It incorporates strategies +for hierarchical decomposition and data movement similar to those used to +implement cuBLAS and cuDNN. CUTLASS decomposes these moving parts into +reusable, modular software components abstracted by C++ template +classes. Primitives for different levels of a conceptual parallelization +hierarchy can be specialized and tuned via custom tiling sizes, data types, +and other algorithmic policy. The resulting flexibility simplifies their use +as building blocks within custom kernels and applications.") + (license + (license:nonfree + "https://github.com/NVIDIA/cutlass/blob/main/LICENSE.txt")))) + + ;;; ;;; Other packages @@ -1003,6 +2322,80 @@ laptops.") nvidia-smi.") (license license-gnu:bsd-3))) +(define nvidia-nccl-tests + (let* ((name "nvidia-nccl-tests") + (revision "0") + ;; Commit at the date of the version of nvidia-nccl + (commit "e98ef24bc03bef33054c3bc690ce622576c803b6") + (version (git-version "2.18.1" revision commit))) + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/nvidia/nccl-tests") + (commit commit))) + (file-name (git-file-name name version)) + (sha256 + (base32 "07z26jivpc7iwx8dirs520g6db3b3r0rckqq1g47242f312f5h1s"))))) + +(define-public nvidia-nccl + (package + (name "nvidia-nccl") + (version "2.18.1") + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/NVIDIA/nccl") + (commit (string-append "v" version "-1")))) + (file-name (git-file-name name version)) + (sha256 + (base32 "10w5gkfac5jdi2dlavvlb7v6fq1cz08bs943kjvqy0sa2kjcwbk6")))) + (build-system gnu-build-system) + (arguments + (list #:modules '((guix build gnu-build-system) + (guix build utils) + (nonguix build utils)) + #:imported-modules `(,@%default-gnu-imported-modules + (guix build utils) + (nonguix build utils)) + #:test-target "all" + #:phases + #~(modify-phases %standard-phases + (replace 'configure + (lambda _ + (setenv "CUDA_HOME" + #$(this-package-input "cuda-toolkit")) + (setenv "PREFIX" #$output) + (substitute* "src/Makefile" + (("\\$\\(PREFIX\\)/lib/pkgconfig") + "$(PREFIX)/share/pkg-config")))) + (add-after 'install 'install-static install-static-output) + (add-after 'build 'prepare-tests + (lambda* (#:key outputs #:allow-other-keys) + (mkdir "tests") + (with-directory-excursion "tests" + ((assoc-ref %standard-phases 'unpack) + #:source #$nvidia-nccl-tests)) + (setenv "NCCL_HOME" (canonicalize-path "build")) + (chdir "tests/source"))) + (add-after 'check 'step-out-of-tests + (lambda _ + (chdir "../..")))))) + (native-inputs (list which)) + (inputs (list cuda-toolkit)) + (outputs (list "out" "static")) + (home-page "https://developer.nvidia.com/nccl") + (synopsis "NVIDIA Collective Communications Library (NCCL)") + (description "The NVIDIA Collective Communication Library (NCCL) +implements multi-GPU and multi-node communication primitives optimized for +NVIDIA GPUs and Networking. NCCL provides routines such as all-gather, +all-reduce, broadcast, reduce, reduce-scatter as well as point-to-point send +and receive that are optimized to achieve high bandwidth and low latency over +PCIe and NVLink high-speed interconnects within a node and over NVIDIA +Mellanox Network across nodes.") + (license + (license:nonfree + "https://github.com/NVIDIA/nccl/blob/master/LICENSE.txt")))) + (define-public nvidia-nvml (package (name "nvidia-nvml") diff --git a/nongnu/packages/patches/nvidia-cudnn-frontend_find_built_dlpack.patch b/nongnu/packages/patches/nvidia-cudnn-frontend_find_built_dlpack.patch new file mode 100644 index 0000000..2aaf0ec --- /dev/null +++ b/nongnu/packages/patches/nvidia-cudnn-frontend_find_built_dlpack.patch @@ -0,0 +1,43 @@ +From 1b73d8d74b3ec7949e21d926d28385543c202dc7 Mon Sep 17 00:00:00 2001 +From: Nicolas Graves +Date: Thu, 25 Jul 2024 14:33:24 +0200 +Subject: [PATCH] Find dlpack package instead of building it. + +--- + python/CMakeLists.txt | 13 +++---------- + 1 file changed, 3 insertions(+), 10 deletions(-) + +diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt +index cdfbf55..4168411 100644 +--- a/python/CMakeLists.txt ++++ b/python/CMakeLists.txt +@@ -2,15 +2,8 @@ cmake_minimum_required(VERSION 3.18) + + Include(FetchContent) + +-# Fetch and build dlpack +-set(CMAKE_POLICY_DEFAULT_CMP0077 NEW) +-set(BUILD_MOCK OFF) +-FetchContent_Declare( +- dlpack +- GIT_REPOSITORY https://github.com/dmlc/dlpack +- GIT_TAG v0.8 +-) +-FetchContent_MakeAvailable(dlpack) ++# Find dlpack ++find_package(dlpack CONFIG REQUIRED) + + # Find python + find_package(Python 3.8 COMPONENTS Interpreter Development.Module REQUIRED) +@@ -60,7 +53,7 @@ target_compile_definitions(_compiled_module PRIVATE NV_CUDNN_FRONTEND_USE_DYNAMI + target_link_libraries( + _compiled_module + +- PRIVATE dlpack ++ PRIVATE dlpack::dlpack + ) + + set_target_properties( +-- +2.45.2 + diff --git a/nongnu/packages/patches/nvidia-cudnn-frontend_find_nlohmann_json.patch b/nongnu/packages/patches/nvidia-cudnn-frontend_find_nlohmann_json.patch new file mode 100644 index 0000000..cd8e664 --- /dev/null +++ b/nongnu/packages/patches/nvidia-cudnn-frontend_find_nlohmann_json.patch @@ -0,0 +1,36 @@ +From 3f7a23cc5a84af36442c4035db78e616d884b540 Mon Sep 17 00:00:00 2001 +From: Nicolas Graves +Date: Thu, 25 Jul 2024 16:43:12 +0200 +Subject: [PATCH] Find unbundled nlohmann-json package. + +--- + CMakeLists.txt | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index adf22fc..8211fcd 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -16,6 +16,11 @@ endif() + + add_library(cudnn_frontend INTERFACE) + ++# Find the nlohmann_json package ++if(NOT CUDNN_FRONTEND_SKIP_NLOHMANN_JSON) ++ find_package(nlohmann_json CONFIG REQUIRED) ++endif() ++ + target_compile_definitions( + cudnn_frontend INTERFACE + $<$:CUDNN_FRONTEND_SKIP_JSON_LIB> +@@ -25,6 +30,7 @@ target_include_directories( + cudnn_frontend INTERFACE + $ + $ ++ $<$>:$> + ) + + # Find the cuda compiler +-- +2.45.2 + diff --git a/nongnu/packages/patches/nvidia-cudnn-frontend_use_store_so.patch b/nongnu/packages/patches/nvidia-cudnn-frontend_use_store_so.patch new file mode 100644 index 0000000..acceced --- /dev/null +++ b/nongnu/packages/patches/nvidia-cudnn-frontend_use_store_so.patch @@ -0,0 +1,39 @@ +From 0c16ed53cae242b02069a1f6fed463dc819526e0 Mon Sep 17 00:00:00 2001 +From: Nicolas Graves +Date: Thu, 25 Jul 2024 14:58:42 +0200 +Subject: [PATCH] Use absolute store cudnn.so path. + +--- + python/cudnn/__init__.py | 16 +--------------- + 1 file changed, 1 insertion(+), 15 deletions(-) + +diff --git a/python/cudnn/__init__.py b/python/cudnn/__init__.py +index 35eb883..39dc047 100644 +--- a/python/cudnn/__init__.py ++++ b/python/cudnn/__init__.py +@@ -137,21 +137,7 @@ pygraph.execute_plan_at_index = _execute_plan_at_index + + + def _dlopen_cudnn(): +- # First look at python site packages +- lib_path = glob.glob( +- os.path.join( +- sysconfig.get_path("purelib"), "nvidia/cudnn/lib/libcudnn.so.*[0-9]" +- ) +- ) +- +- if lib_path: +- assert ( +- len(lib_path) == 1 +- ), f"Found {len(lib_path)} libcudnn.so.x in nvidia-cudnn-cuXX." +- lib = ctypes.CDLL(lib_path[0]) +- else: # Fallback +- lib = ctypes.CDLL("libcudnn.so") +- ++ lib = ctypes.CDLL(@store-cudnn.so-path@) + handle = ctypes.cast(lib._handle, ctypes.c_void_p).value + _compiled_module._set_dlhandle_cudnn(handle) + +-- +2.45.2 + diff --git a/nongnu/packages/patches/nvidia-cutlass-3.2.2_disable_static_lib.patch b/nongnu/packages/patches/nvidia-cutlass-3.2.2_disable_static_lib.patch new file mode 100644 index 0000000..8c693d6 --- /dev/null +++ b/nongnu/packages/patches/nvidia-cutlass-3.2.2_disable_static_lib.patch @@ -0,0 +1,73 @@ +From 7ee9ec4c2636cca833761d3466df27edc4e3f952 Mon Sep 17 00:00:00 2001 +From: Nicolas Graves +Date: Tue, 30 Jul 2024 14:13:09 +0200 +Subject: [PATCH] Add CUTLASS_BUILD_STATIC_LIBRARY option + +--- + tools/library/CMakeLists.txt | 22 ++++++++++++++++++---- + 1 file changed, 18 insertions(+), 4 deletions(-) + +diff --git a/tools/library/CMakeLists.txt b/tools/library/CMakeLists.txt +index a11ebcf6..79f7ccd1 100644 +--- a/tools/library/CMakeLists.txt ++++ b/tools/library/CMakeLists.txt +@@ -34,6 +34,7 @@ include(GNUInstallDirs) + + set(CUTLASS_BUILD_MONO_LIBRARY OFF CACHE BOOL + "Determines whether the cutlass library is generated as a single file or multiple files.") ++option(CUTLASS_BUILD_STATIC_LIBRARY "Build static libary for CUTLASS" ON) + + ################################################################################ + +@@ -126,7 +127,9 @@ function(cutlass_add_cutlass_library) + # simply link the generated object files to the default library. + + target_link_libraries(${DEFAULT_NAME} PRIVATE $) +- target_link_libraries(${DEFAULT_NAME}_static PRIVATE $) ++ if (CUTLASS_BUILD_STATIC_LIBRARY) ++ target_link_libraries(${DEFAULT_NAME}_static PRIVATE $) ++ endif() + + else() + +@@ -152,7 +155,7 @@ function(cutlass_add_cutlass_library) + ) + + set_target_properties(${__NAME} PROPERTIES DEBUG_POSTFIX "${CUTLASS_LIBRARY_DEBUG_POSTFIX}") +- ++ if (CUTLASS_BUILD_STATIC_LIBRARY) + cutlass_add_library( + ${__NAME}_static + STATIC +@@ -189,6 +192,15 @@ function(cutlass_add_cutlass_library) + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + ) ++ else() ++ install( ++ TARGETS ${__NAME} ++ EXPORT NvidiaCutlass ++ RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} ++ LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ++ ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} ++ ) ++ endif() + + if (__SUFFIX) + +@@ -197,8 +209,10 @@ function(cutlass_add_cutlass_library) + # commands to pull in all kernels by default. + + target_link_libraries(${DEFAULT_NAME} INTERFACE ${__NAME}) +- target_link_libraries(${DEFAULT_NAME}_static INTERFACE ${__NAME}_static) +- ++ if (CUTLASS_BUILD_STATIC_LIBRARY) ++ target_link_libraries(${DEFAULT_NAME}_static INTERFACE ${__NAME}_static) ++ endif() ++ + endif() + + endif() +-- +2.45.2 + diff --git a/nongnu/packages/patches/nvidia-cutlass-3.4.0_disable_static_lib.patch b/nongnu/packages/patches/nvidia-cutlass-3.4.0_disable_static_lib.patch new file mode 100644 index 0000000..8fab0c1 --- /dev/null +++ b/nongnu/packages/patches/nvidia-cutlass-3.4.0_disable_static_lib.patch @@ -0,0 +1,82 @@ +From ce4a14ae4041d6cfb69987fef5a65c50754c89b6 Mon Sep 17 00:00:00 2001 +From: Nicolas Graves +Date: Sun, 28 Jul 2024 16:57:16 +0200 +Subject: [PATCH] Add option CUTLASS_BUILD_STATIC_LIBRARY + +--- + tools/library/CMakeLists.txt | 26 +++++++++++++++++++++----- + 1 file changed, 20 insertions(+), 4 deletions(-) + +diff --git a/tools/library/CMakeLists.txt b/tools/library/CMakeLists.txt +index 60a6cca5..f096c84d 100644 +--- a/tools/library/CMakeLists.txt ++++ b/tools/library/CMakeLists.txt +@@ -34,6 +34,7 @@ include(GNUInstallDirs) + + set(CUTLASS_BUILD_MONO_LIBRARY OFF CACHE BOOL + "Determines whether the cutlass library is generated as a single file or multiple files.") ++option(CUTLASS_BUILD_STATIC_LIBRARY "Build static libary for CUTLASS" ON) + + ################################################################################ + +@@ -126,7 +127,9 @@ function(cutlass_add_cutlass_library) + # simply link the generated object files to the default library. + + target_link_libraries(${DEFAULT_NAME} PRIVATE $) +- target_link_libraries(${DEFAULT_NAME}_static PRIVATE $) ++ if (CUTLASS_BUILD_STATIC_LIBRARY) ++ target_link_libraries(${DEFAULT_NAME}_static PRIVATE $) ++ endif() + + else() + +@@ -154,7 +157,7 @@ function(cutlass_add_cutlass_library) + ) + + set_target_properties(${__NAME} PROPERTIES DEBUG_POSTFIX "${CUTLASS_LIBRARY_DEBUG_POSTFIX}") +- ++ if (CUTLASS_BUILD_STATIC_LIBRARY) + cutlass_add_library( + ${__NAME}_static + STATIC +@@ -193,6 +196,15 @@ function(cutlass_add_cutlass_library) + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + ) ++ else() ++ install( ++ TARGETS ${__NAME} ++ EXPORT NvidiaCutlass ++ RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} ++ LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ++ ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} ++ ) ++ endif() + + if (__SUFFIX) + +@@ -201,7 +213,9 @@ function(cutlass_add_cutlass_library) + # commands to pull in all kernels by default. + + target_link_libraries(${DEFAULT_NAME} PUBLIC ${__NAME}) +- target_link_libraries(${DEFAULT_NAME}_static PUBLIC ${__NAME}_static) ++ if (CUTLASS_BUILD_STATIC_LIBRARY) ++ target_link_libraries(${DEFAULT_NAME}_static PUBLIC ${__NAME}_static) ++ endif() + + endif() + +@@ -250,7 +264,9 @@ cutlass_add_cutlass_library( + + # For backward compatibility with the old name + add_library(cutlass_lib ALIAS cutlass_library) +-add_library(cutlass_lib_static ALIAS cutlass_library_static) ++if (CUTLASS_BUILD_STATIC_LIBRARY) ++ add_library(cutlass_lib_static ALIAS cutlass_library_static) ++endif() + + ################################################################################ + +-- +2.45.2 + diff --git a/nonguix/build-system/cuda.scm b/nonguix/build-system/cuda.scm new file mode 100644 index 0000000..de6dac5 --- /dev/null +++ b/nonguix/build-system/cuda.scm @@ -0,0 +1,181 @@ +;;; SPDX-License-Identifier: GPL-3.0-or-later +;;; Copyright © 2024 Nicolas Graves + +(define-module (nonguix build-system cuda) + #:use-module (gnu packages gcc) + #:use-module (guix store) + #:use-module (guix utils) + #:use-module (guix gexp) + #:use-module (guix monads) + #:use-module (guix derivations) + #:use-module (guix search-paths) + #:use-module (guix build-system) + #:use-module (guix build-system gnu) + #:use-module (guix packages) + #:use-module (ice-9 match) + #:use-module (srfi srfi-1) + #:use-module (nonguix build-system binary) + #:use-module (nonguix utils) + #:use-module ((nonguix licenses) #:prefix license:) + #:export (cuda-license + cuda-current-system + cuda-module-url + guix-system->cuda-system + + %cuda-build-system-modules + lower + cuda-build + cuda-build-system)) + +;; Commentary: +;; +;; Standard build procedure for Cuda binary packages. This is +;; implemented as an extension of `binary-build-system'. +;; +;; Code: + +(define %cuda-build-system-modules + ;; Build-side modules imported by default. + `((nonguix build cuda-build-system) + (nonguix build utils) + ,@%binary-build-system-modules)) + +(define (build-patchelf-plan wrapper-plan inputs) + #~(let ((patchelf-inputs + (list #$@(map car inputs)))) + (map (lambda (file) + (cons file (cons* "out" patchelf-inputs))) + #$wrapper-plan))) + +(define (cuda-license name) + (license:nonfree + (format #f "\ +https://developer.download.nvidia.com/compute/cuda/redist/~a/LICENSE.txt" + (string-join (string-split name #\-) "_")))) + +(define (guix-system->cuda-system system) + (match system + ("x86_64-linux" "linux-x86_64") + ("aarch64-linux" "linux-aarch64") + ("powerpc64le-linux" "linux-ppc64le") + (_ #f))) + +(define (cuda-current-system) + (guix-system->cuda-system + (or (%current-target-system) (%current-system)))) + +(define (cuda-module-url name version) + (let ((system (cuda-current-system)) + (snake-name (string-join (string-split name #\-) "_"))) + (format #f + "https://developer.download.nvidia.com/compute/cuda/redist\ +/~a/~a/~a-~a-~a-archive.tar.xz" + snake-name + system + snake-name + system + version))) + +(define* (lower name + #:key source inputs native-inputs outputs system target + (patchelf (default-patchelf)) + (glibc (default-glibc)) + #:allow-other-keys + #:rest arguments) + "Return a bag for NAME." + (define private-keywords + '(#:target #:patchelf #:inputs #:native-inputs)) + (define host-inputs + `(,@(if source + `(("source" ,source)) + '()) + + ("gcc:lib" ,gcc "lib") + ("glibc" ,glibc) + + ,@inputs + ;; Keep the standard inputs of 'gnu-build-system'. + ,@(standard-packages))) + + (and (not target) ;XXX: no cross-compilation + (bag + (name name) + (system system) + (host-inputs host-inputs) + (build-inputs `(("patchelf" ,patchelf) + ,@native-inputs + ;; If current system is i686, the *32 packages will be the + ;; same as the non-32, but that's OK. + ("libc32" ,(to32 glibc)))) + (outputs outputs) + (build cuda-build) + (arguments (append + (strip-keyword-arguments private-keywords arguments) + (list #:wrap-inputs (alist-delete "source" host-inputs))))))) + +(define* (cuda-build name inputs + #:key + guile source wrap-inputs + (outputs '("out")) + (patchelf-inputs ''("gcc" "glibc")) + (patchelf-plan ''()) + (install-plan ''(("." "./"))) + (search-paths '()) + (out-of-source? #t) + (validate-runpath? #t) + (patch-shebangs? #t) + (strip-binaries? #t) + (strip-flags ''("--strip-debug")) + (strip-directories ''("lib" "lib64" "libexec" + "bin" "sbin")) + (phases '(@ (nonguix build cuda-build-system) + %standard-phases)) + (system (%current-system)) + (imported-modules %cuda-build-system-modules) + (modules '((nonguix build cuda-build-system) + (guix build utils) + (nonguix build utils))) + (substitutable? #t) + allowed-references + disallowed-references) + "Build SOURCE using binary-build-system." + (define builder + (with-imported-modules imported-modules + #~(begin + (use-modules #$@modules) + + #$(with-build-variables inputs outputs + #~(cuda-build #:source #+source + #:system #$system + #:outputs %outputs + #:inputs %build-inputs + #:patchelf-inputs #$patchelf-inputs + #:patchelf-plan #$patchelf-plan + #:install-plan #$install-plan + #:search-paths '#$(map search-path-specification->sexp + search-paths) + #:phases #$phases + #:out-of-source? #$out-of-source? + #:validate-runpath? #$validate-runpath? + #:patch-shebangs? #$patch-shebangs? + #:strip-binaries? #$strip-binaries? + #:strip-flags #$strip-flags + #:strip-directories #$strip-directories))))) + + (mlet %store-monad ((guile (package->derivation (or guile (default-guile)) + system #:graft? #f))) + (gexp->derivation name builder + #:system system + #:target #f + #:substitutable? substitutable? + #:allowed-references allowed-references + #:disallowed-references disallowed-references + #:guile-for-build guile))) + +(define cuda-build-system + (build-system + (name 'cuda) + (description "The Cuda build system") + (lower lower))) + +;;; cuda.scm ends here diff --git a/nonguix/build/binary-build-system.scm b/nonguix/build/binary-build-system.scm index ccfc3eb..24f146f 100644 --- a/nonguix/build/binary-build-system.scm +++ b/nonguix/build/binary-build-system.scm @@ -3,6 +3,7 @@ ;;; Copyright © 2022 Attila Lendvai ;;; Copyright © 2023 Giacomo Leidi ;;; Copyright © 2024 Ashish SHUKLA +;;; Copyright © 2024 Nicolas Graves (define-module (nonguix build binary-build-system) #:use-module ((guix build gnu-build-system) #:prefix gnu:) @@ -11,6 +12,7 @@ #:use-module (ice-9 match) #:use-module (srfi srfi-1) #:export (%standard-phases + autopatchelf binary-build)) ;; Commentary: @@ -140,6 +142,27 @@ The inputs are optional when the file is an executable." patchelf-plan))) #t) +(define* (autopatchelf #:key inputs outputs patchelf-plan patchelf-inputs + #:allow-other-keys) + "Automatically build patchelf-plan if not defined, then run patchelf phase. + +The plan is the product of all elf-files with all inputs and \"out\"." + (if (equal? patchelf-plan '()) + (let* ((elf-files (find-files + "." (lambda (name stat) + (and (elf-file? name) + (not (eq? 'symlink (stat:type stat))))))) + (plan (map (lambda (file) + (list file (cons* "out" patchelf-inputs))) + elf-files))) + (format #t "Applying patchelf-plan: ~a~%" plan) + (patchelf #:inputs inputs + #:outputs outputs + #:patchelf-plan plan)) + (patchelf #:inputs inputs + #:outputs outputs + #:patchelf-plan patchelf-plan))) + (define (deb-file? binary-file) (string-suffix? ".deb" binary-file)) diff --git a/nonguix/build/cuda-build-system.scm b/nonguix/build/cuda-build-system.scm new file mode 100644 index 0000000..8b874f7 --- /dev/null +++ b/nonguix/build/cuda-build-system.scm @@ -0,0 +1,73 @@ +;;; SPDX-License-Identifier: GPL-3.0-or-later +;;; Copyright © 2024 Nicolas Graves + +(define-module (nonguix build cuda-build-system) + #:use-module ((guix build gnu-build-system) #:prefix gnu:) + #:use-module ((nonguix build binary-build-system) #:prefix binary:) + #:use-module (guix build utils) + #:use-module (nonguix build utils) + #:use-module (ice-9 ftw) + #:use-module (ice-9 match) + #:export (%standard-phases + cuda-build)) + +;; Commentary: +;; +;; Builder-side code of the Cuda binary build procedure. +;; +;; Code: + +;;; XXX: Copied from upstream guix in tests/store-deduplication.scm +(define (cartesian-product . lst) + "Return the Cartesian product of all the given lists." + (match lst + ((head) + (map list head)) + ((head . rest) + (let ((others (apply cartesian-product rest))) + (apply append + (map + (lambda (init) + (map (lambda (lst) + (cons init lst)) + others)) + head)))) + (() + '()))) + +(define* (install-pkg-config-files #:key outputs #:allow-other-keys) + (if (directory-exists? "pkg-config") + (with-directory-excursion "pkg-config" + (for-each + (match-lambda + ((output file) + (substitute* file + (("^cudaroot=.*") + (string-append "cudaroot=" output "\n")) + (("^libdir=.*") + (string-append "libdir=" output "/lib\n")) + (("^includedir=.*") + (string-append "includedir=" output "/include\n"))) + (install-file file + (string-append output "/share/pkg-config")) + (with-directory-excursion + (string-append output "/share/pkg-config") + (symlink (basename file) + (string-append + (string-take file (string-index file #\-)) ".pc"))))) + (cartesian-product (map cdr outputs) (find-files "." "\\.pc")))) + (format #t "pkg-config directory doesn't exist, nothing to be done.~%"))) + +(define %standard-phases + (modify-phases binary:%standard-phases + (replace 'patchelf binary:autopatchelf) + (add-after 'install 'install-static install-static-output) + (add-after 'install-static 'install-pkg-config-files + install-pkg-config-files))) + +(define* (cuda-build #:key inputs (phases %standard-phases) + #:allow-other-keys #:rest args) + "Build the given package, applying all of PHASES in order." + (apply gnu:gnu-build #:inputs inputs #:phases phases args)) + +;;; cuda-build-system.scm ends here diff --git a/nonguix/build/utils.scm b/nonguix/build/utils.scm index 4de2ac2..3cf2ad4 100644 --- a/nonguix/build/utils.scm +++ b/nonguix/build/utils.scm @@ -12,7 +12,8 @@ #:export (64-bit? make-wrapper concatenate-files - build-paths-from-inputs)) + build-paths-from-inputs + install-static-output)) (define (64-bit? file) "Return true if ELF file is in 64-bit format, false otherwise. @@ -97,3 +98,22 @@ contents: (call-with-output-file result (lambda (port) (for-each (cut dump <> port) files)))) + +(define* (install-static-output #:key outputs #:allow-other-keys) + (let ((out (assoc-ref outputs "out")) + (static (assoc-ref outputs "static"))) + (if static + (begin + (for-each + (lambda (file) + (if (eq? 'symlink (stat:type (lstat file))) + (with-directory-excursion (string-append static "/lib") + (symlink (basename (readlink file)) + (basename file))) + (install-file file (string-append static "/lib"))) + (delete-file file)) + (find-files (string-append out "/lib") "\\.a$")) + (for-each + (cute install-file <> (string-append static "/include")) + (find-files (string-append out "/include")))) + (format #t "no static output, nothing to be done~%"))))