vllm.triton_utils.importing ¶

_configure_triton_ptxas_for_new_gpus ¶

_configure_triton_ptxas_for_new_gpus()

Configure TRITON_PTXAS_PATH for GPUs that may not be supported by Triton's bundled ptxas (e.g., Jetson Thor sm_110a, DGX Spark sm_121a).

Triton bundles a ptxas binary (currently CUDA 12.8) that may not support the newest GPU architectures. When running on such GPUs, Triton kernel compilation fails with errors like: ptxas fatal: Value 'sm_121a' is not defined for option 'gpu-name'

This function uses Triton's native GPU detection to check the architecture and configures Triton to use the system's CUDA toolkit ptxas instead, which typically has broader architecture support (e.g., CUDA 13.0+).

Source code in vllm/triton_utils/importing.py

def _configure_triton_ptxas_for_new_gpus():
    """
    Configure TRITON_PTXAS_PATH for GPUs that may not be supported by
    Triton's bundled ptxas (e.g., Jetson Thor sm_110a, DGX Spark sm_121a).

    Triton bundles a ptxas binary (currently CUDA 12.8) that may not support
    the newest GPU architectures. When running on such GPUs, Triton kernel
    compilation fails with errors like:
        ptxas fatal: Value 'sm_121a' is not defined for option 'gpu-name'

    This function uses Triton's native GPU detection to check the architecture
    and configures Triton to use the system's CUDA toolkit ptxas instead,
    which typically has broader architecture support (e.g., CUDA 13.0+).
    """
    # Don't override if already set by user
    if os.environ.get("TRITON_PTXAS_PATH"):
        return

    # Try to find system ptxas
    cuda_home = os.environ.get("CUDA_HOME", "/usr/local/cuda")
    system_ptxas_paths = [
        os.path.join(cuda_home, "bin", "ptxas"),
        "/usr/local/cuda/bin/ptxas",
        shutil.which("ptxas"),  # Check PATH
    ]

    system_ptxas = None
    for path in system_ptxas_paths:
        if path and os.path.isfile(path) and os.access(path, os.X_OK):
            system_ptxas = path
            break

    if not system_ptxas:
        # No system ptxas found, can't help
        return

    # Use Triton's native GPU detection to get the architecture.
    # This is how Triton itself determines the target GPU.
    try:
        from triton.backends import backends

        nvidia_backend = backends.get("nvidia")
        if nvidia_backend is None or nvidia_backend.driver is None:
            return

        if not nvidia_backend.driver.is_active():
            return

        # Get the current GPU target using Triton's driver
        driver_instance = nvidia_backend.driver()
        target = driver_instance.get_current_target()
        arch = target.arch  # e.g., 121 for sm_121a (CC 12.1)

        # GPUs with arch >= 110 (compute capability >= 11.0) may need system ptxas
        # - arch 110: Jetson Thor (sm_110a, CC 11.0)
        # - arch 120: Blackwell B100/B200 (sm_120, CC 12.0)
        # - arch 121: DGX Spark GB10 (sm_121a, CC 12.1)
        if arch >= 110:
            # Check if system ptxas is functional
            try:
                result = subprocess.run(
                    [system_ptxas, "--version"],
                    capture_output=True,
                    text=True,
                    timeout=5,
                )
                if result.returncode == 0:
                    # System ptxas is available, use it
                    os.environ["TRITON_PTXAS_PATH"] = system_ptxas
                    major, minor = divmod(arch, 10)
                    logger.info(
                        "Detected GPU with compute capability %d.%d (arch=%d). "
                        "Configuring TRITON_PTXAS_PATH=%s to ensure "
                        "Triton kernel compilation compatibility.",
                        major,
                        minor,
                        arch,
                        system_ptxas,
                    )
            except (subprocess.TimeoutExpired, FileNotFoundError, OSError) as e:
                logger.debug("Cannot use system ptxas: %s", e)

    except Exception as e:
        # Don't fail if detection doesn't work - user can still set
        # TRITON_PTXAS_PATH manually
        logger.debug("Failed to auto-configure TRITON_PTXAS_PATH: %s", e)