This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/*===- TableGen'erated file -------------------------------------*- C++ -*-===*\ | |
|* *| | |
|* Target Register Enum Values *| | |
|* *| | |
|* Automatically generated file, do not edit! *| | |
|* *| | |
\*===----------------------------------------------------------------------===*/ | |
#ifdef GET_REGINFO_ENUM |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.section .AMDGPU.config,"",@progbits | |
.long 47176 | |
.long 11468865 | |
.long 47180 | |
.long 5008 | |
.long 47200 | |
.long 0 | |
.long 4 | |
.long 0 | |
.long 8 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from enum import Flag | |
from pprint import pprint | |
class SchedGroupMask(Flag): | |
NONE = 0 | |
ALU = 1 << 0 | |
VALU = 1 << 1 | |
SALU = 1 << 2 | |
MFMA = 1 << 3 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) | |
define amdgpu_kernel void @add_kernel(ptr addrspace(1) nocapture readonly %0, ptr addrspace(1) nocapture readonly %1, ptr addrspace(1) nocapture writeonly %2, i32 %3) local_unnamed_addr #0 { | |
%5 = tail call i32 @llvm.amdgcn.workgroup.id.x() | |
%6 = shl i32 %5, 10 | |
%7 = tail call i32 @llvm.amdgcn.workitem.id.x() | |
%8 = shl i32 %7, 2 | |
%9 = and i32 %8, 1020 | |
%10 = or disjoint i32 %9, %6 | |
%11 = icmp slt i32 %10, %3 | |
br i1 %11, label %.critedge, label %.critedge2 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/CMakeLists.txt b/CMakeLists.txt | |
--- a/CMakeLists.txt (revision 7fb35319d38e57f7341e8434a4f5b2049aec8657) | |
+++ b/CMakeLists.txt (date 1744728290525) | |
@@ -318,6 +318,9 @@ | |
endif() | |
target_link_options(triton PRIVATE ${PYTHON_LDFLAGS}) | |
+ set_target_properties(triton PROPERTIES ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/python/triton/_C) | |
+ set_target_properties(triton PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/python/triton/_C) | |
+ set_target_properties(triton PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/python/triton/_C) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
W20250414 14:53:24.356304 132475353552832 metadata.cpp:186] rocprofiler_iterate_agent_supported_counters returned ROCPROFILER_STATUS_ERROR_AGENT_ARCH_NOT_SUPPORTED for agent 1 (gfx1150) :: Agent HW architecture is not supported, no counter metrics found. | |
W20250414 14:53:24.651769 131105783740352 metadata.cpp:186] rocprofiler_iterate_agent_supported_counters returned ROCPROFILER_STATUS_ERROR_AGENT_ARCH_NOT_SUPPORTED for agent 1 (gfx1150) :: Agent HW architecture is not supported, no counter metrics found. | |
F20250414 14:53:24.970933 131105783740352 agent.cpp:1069] rocprofiler was only able to map 2 rocprofiler agents to HSA agents, expected 3 | |
@ 0x773d72612360 (unknown) | |
@ 0x773d72613297 (unknown) | |
@ 0x773d7205d18a (unknown) | |
@ 0x773d72098ed5 rocprofiler_set_api_table | |
@ 0x773d71ad5afc rocprofiler_register_library_api_table | |
@ 0x773d660a2229 (unknown) | |
@ 0x773d660a3c58 (unknown) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Dispatch_ID | GPU_ID | Queue_ID | PID | TID | Grid_Size | Workgroup_Size | LDS_Per_Workgroup | Scratch_Per_Workitem | Arch_VGPR | Accum_VGPR | SGPR | Wave_Size | Kernel_Name | Start_Timestamp | End_Timestamp | Correlation_ID | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 1 | 31938 | 31938 | 32 | 32 | 0 | 0 | 40 | 0 | 128 | 32 | smol_matmul.kd | 5382059929644 | 5382059931804 | 0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# RUN: python3 %s | FileCheck %s | |
import triton | |
import triton.language as tl | |
from triton.backends.compiler import GPUTarget | |
def print_test_name_and_run(f): | |
print(f"Test: {f.__name__}") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 8 : i32, ttg.target = "hip:gfx942", "ttg.threads-per-warp" = 64 : i32} { | |
tt.func public @matmul_kernel_persistent(%arg0: !tt.ptr<f16> {tt.divisibility = 16 : i32, tt.pointer_range = 32 : i32}, %arg1: !tt.ptr<f16> {tt.divisibility = 16 : i32, tt.pointer_range = 32 : i32}, %arg2: !tt.ptr<f16> {tt.divisibility = 16 : i32, tt.pointer_range = 32 : i32}, %arg3: i32 {tt.divisibility = 16 : i32}, %arg4: i32 {tt.divisibility = 16 : i32}, %arg5: i32 {tt.divisibility = 16 : i32}, %arg6: i32 {tt.divisibility = 16 : i32}, %arg7: i32 {tt.divisibility = 16 : i32}, %arg8: i32 {tt.divisibility = 16 : i32}) attributes {noinline = false} { | |
%cst = arith.constant dense<0.000000e+00> : tensor<128x256xf32, #ttg.amd_mfma<{versionMajor = 3, versionMinor = 0, warpsPerCTA = [2, 4], instrShape = [32, 32], isTransposed = true}>> | |
%true = arith.constant true | |
%cst_0 = arith.constant dense<0.000000e+00> : tensor<64x256xf16, #ttg.blocked<{sizePerThread = [8, 1], thre |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import subprocess | |
import sys | |
from black.trans import defaultdict | |
test_file = "/home/mlevental/dev_projects/triton/test/TritonGPU/amd/amd-range-analysis.mlir" | |
cmnd = [ | |
"/home/mlevental/dev_projects/llvm-project/cmake-build-debug/bin/triton-opt", |
NewerOlder