ckpt_id | gemm backend | batch_size | fuse | compile | compile_vae | quantization | sparsify | model_memory | inference_memory | time |
---|---|---|---|---|---|---|---|---|---|---|
black-forest-labs/FLUX.1-dev | N/A | 1 | False | False | False | fp8dqrow | False | 20.367 | 31.817 | 9.578 |
black-forest-labs/FLUX.1-dev | triton,aten | 1 | False | True | False | fp8dqrow | False | 20.367 | 31.817 | 4.165 |
black-forest-labs/FLUX.1-dev | cutlass,aten | 16 | False | True | False | fp8dqrow | False | 20.367 | 50.471 | 60.734 |
black-forest-labs/FLUX.1-dev | cutlass_no_fast_accum, aten | 16 | False | True | False | fp8dq | False |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from torch import nn | |
from torch.nn import functional as F | |
from triton.testing import do_bench | |
import triton | |
import triton.language as tl | |
import torch._inductor.config as config | |
from torch._inductor.utils import fresh_inductor_cache | |
#torch._logging.set_logs(autotuning=True) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import overload | |
import torch | |
from torch._inductor.codegen.cuda.cutlass_utils import try_import_cutlass | |
try_import_cutlass() # comment this out if you have cutlass installed | |
import cutlass | |
import types | |
import ast | |
import textwrap | |
import inspect | |
from cutlass.epilogue import relu |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cutlass_fused_add_mm_relu_1dd8740c = async_compile.cuda(r''' | |
#include <exception> | |
#include <iostream> | |
#include <memory> | |
#include <random> | |
#include <vector> | |
#include "cute/tensor.hpp" | |
#include "cutlass/cutlass.h" | |
#include "cutlass/numeric_types.h" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import overload | |
import torch | |
from torch._inductor.codegen.cuda.cutlass_utils import try_import_cutlass | |
try_import_cutlass() # comment this out if you have cutlass installed | |
import cutlass | |
import types | |
import ast | |
import textwrap | |
import inspect | |
from cutlass.epilogue import relu |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <exception> | |
#include <iostream> | |
#include <memory> | |
#include <random> | |
#include <vector> | |
#include "cute/tensor.hpp" | |
#include "cutlass/cutlass.h" | |
#include "cutlass/numeric_types.h" | |
#include "cutlass/tensor_ref.h" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
`--> TORCH_LOGS="+inductor" python test/inductor/test_cutlass_backend.py -k test_evt_fusions_basic_mul_shape0 | |
[INFO]:compile_threads set to 32 | |
[INFO]:Creating 'subprocess' pool with 32 workers | |
[INFO]:compile_threads set to 32 | |
> /data/users/mlazos/pytorch/test/inductor/test_cutlass_backend.py(129)run_evt_test() | |
-> M, N = shape | |
(Pdb) c | |
[DEBUG]:TRACED GRAPH | |
===== BEFORE PRE GRAD ===== | |
/data/users/mlazos/pytorch/torch/fx/_lazy_graph_module.py class GraphModule(torch.nn.Module): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import overload | |
import torch | |
from torch._inductor.codegen.cuda.cutlass_utils import try_import_cutlass | |
try_import_cutlass() # comment this out if you have cutlass installed | |
import cutlass | |
import types | |
import ast | |
import textwrap | |
import inspect | |
from cutlass.epilogue import relu |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import overload | |
import torch | |
from torch._inductor.codegen.cuda.cutlass_utils import try_import_cutlass | |
try_import_cutlass() # comment this out if you have cutlass installed | |
import cutlass | |
import types | |
import ast | |
import textwrap | |
import inspect | |
from cutlass.epilogue import relu |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using Accum = cutlass::epilogue::fusion::Sm90AccFetch; | |
using ElementD = cutlass::half_t; | |
using StrideD = cute::Stride<int64_t, cute::Int<1>, cute::Int<0>>; | |
using Bias = cutlass::epilogue::fusion::Sm90ColBroadcast< | |
0 /*Stages*/, typename EpilogueDescriptor::TileShape, cutlass::half_t, cutlass::half_t, | |
cute::Stride<cute::Int<1>, cute::Int<0>, cute::Int<0>> | |
>; |
NewerOlder