Configure TRITON_PTXAS_PATH for GPUs that may not be supported by Triton's bundled ptxas (e.g., Jetson Thor sm_110a, DGX Spark sm_121a).
Triton bundles a ptxas binary (currently CUDA 12.8) that may not support the newest GPU architectures. When running on such GPUs, Triton kernel compilation fails with errors like: ptxas fatal: Value 'sm_121a' is not defined for option 'gpu-name'
This function uses Triton's native GPU detection to check the architecture and configures Triton to use the system's CUDA toolkit ptxas instead, which typically has broader architecture support (e.g., CUDA 13.0+).
Source code in vllm/triton_utils/importing.py
| def _configure_triton_ptxas_for_new_gpus():
"""
Configure TRITON_PTXAS_PATH for GPUs that may not be supported by
Triton's bundled ptxas (e.g., Jetson Thor sm_110a, DGX Spark sm_121a).
Triton bundles a ptxas binary (currently CUDA 12.8) that may not support
the newest GPU architectures. When running on such GPUs, Triton kernel
compilation fails with errors like:
ptxas fatal: Value 'sm_121a' is not defined for option 'gpu-name'
This function uses Triton's native GPU detection to check the architecture
and configures Triton to use the system's CUDA toolkit ptxas instead,
which typically has broader architecture support (e.g., CUDA 13.0+).
"""
# Don't override if already set by user
if os.environ.get("TRITON_PTXAS_PATH"):
return
# Try to find system ptxas
cuda_home = os.environ.get("CUDA_HOME", "/usr/local/cuda")
system_ptxas_paths = [
os.path.join(cuda_home, "bin", "ptxas"),
"/usr/local/cuda/bin/ptxas",
shutil.which("ptxas"), # Check PATH
]
system_ptxas = None
for path in system_ptxas_paths:
if path and os.path.isfile(path) and os.access(path, os.X_OK):
system_ptxas = path
break
if not system_ptxas:
# No system ptxas found, can't help
return
# Use Triton's native GPU detection to get the architecture.
# This is how Triton itself determines the target GPU.
try:
from triton.backends import backends
nvidia_backend = backends.get("nvidia")
if nvidia_backend is None or nvidia_backend.driver is None:
return
if not nvidia_backend.driver.is_active():
return
# Get the current GPU target using Triton's driver
driver_instance = nvidia_backend.driver()
target = driver_instance.get_current_target()
arch = target.arch # e.g., 121 for sm_121a (CC 12.1)
# GPUs with arch >= 110 (compute capability >= 11.0) may need system ptxas
# - arch 110: Jetson Thor (sm_110a, CC 11.0)
# - arch 120: Blackwell B100/B200 (sm_120, CC 12.0)
# - arch 121: DGX Spark GB10 (sm_121a, CC 12.1)
if arch >= 110:
# Check if system ptxas is functional
try:
result = subprocess.run(
[system_ptxas, "--version"],
capture_output=True,
text=True,
timeout=5,
)
if result.returncode == 0:
# System ptxas is available, use it
os.environ["TRITON_PTXAS_PATH"] = system_ptxas
major, minor = divmod(arch, 10)
logger.info(
"Detected GPU with compute capability %d.%d (arch=%d). "
"Configuring TRITON_PTXAS_PATH=%s to ensure "
"Triton kernel compilation compatibility.",
major,
minor,
arch,
system_ptxas,
)
except (subprocess.TimeoutExpired, FileNotFoundError, OSError) as e:
logger.debug("Cannot use system ptxas: %s", e)
except Exception as e:
# Don't fail if detection doesn't work - user can still set
# TRITON_PTXAS_PATH manually
logger.debug("Failed to auto-configure TRITON_PTXAS_PATH: %s", e)
|