Skip to content

Instantly share code, notes, and snippets.

#0 building with \"default\" instance using docker driver
#1 [internal] load build definition from Dockerfile
#1 transferring dockerfile: 7.35kB done
#1 DONE 0.0s
#2 [internal] load metadata for docker.io/rapidsai/miniforge-cuda:cuda13.1.0-base-ubuntu24.04-py3.13
#2 DONE 0.6s
#3 [internal] load .dockerignore
#!/usr/bin/env python3
# /// script
# requires-python = ">=3.11"
# dependencies = [
# "polars>=1.0.0",
# "numpy>=1.24",
# "rich>=13",
# "pyarrow>=14",
# ]
# ///
#!/usr/bin/env python3
# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
# SPDX-License-Identifier: Apache-2.0
"""
Analyze an Nsight Systems report (``.nsys-rep``) exported to SQLite.
Subcommands:
- ``summary``per IR node type: summed host NVTX time and attributed GPU
kernel time in the chosen NVTX domain (default ``cudf_polars``).
diff --git a/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py b/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py
index f40889b68c..375e1dbebb 100644
--- a/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py
+++ b/python/cudf_polars/cudf_polars/experimental/benchmarks/utils.py
@@ -1569,9 +1569,9 @@ def run_polars_query(
for i in range(args.iterations):
if _HAS_STRUCTLOG and run_config.collect_traces:
- setup_logging(q_id, i)
+ setup_logging(q_id, i, run_config.run_id)
This file has been truncated, but you can view the full file.
import rapidsmpf
import pylibcudf as plc
import pyarrow as pa
import numpy as np
import rmm.mr
import rmm.pylibrmm.stream
import rapidsmpf.communicator.single
import rapidsmpf.shuffler
import rapidsmpf.buffer.resource
import rapidsmpf.buffer.buffer
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
--- ok.txt 2025-11-10 09:38:59.410995539 -0800
+++ error.txt 2025-11-10 09:39:15.465981068 -0800
@@ -20,7 +20,7 @@
aws-c-sdkutils 0.2.4 h7e655bb_2 conda-forge
aws-checksums 0.2.7 h7e655bb_3 conda-forge
aws-crt-cpp 0.35.0 h719b17a_2 conda-forge
-aws-sdk-cpp 1.11.606 h522d481_5 conda-forge
+aws-sdk-cpp 1.11.606 h522d481_6 conda-forge
aws-xray-sdk 2.15.0 pyhd8ed1ab_0 conda-forge
azure-core-cpp 1.16.1 h3a458e0_0 conda-forge
(cudf-polars-bench) root@gpu-h100-0161:/app# for query in {1..22}; do
> nsys profile \
> -o "/data/profiles/rapidsmpf.q$query.1k" -f true \
> --trace=nvtx,cuda \
> --nvtx-domain-exclude=CCCL,rapidsmpf,libkvikio \
> --cuda-memory-usage=true \
> python -m cudf_polars.experimental.benchmarks.pdsh \
> --executor="streaming" \
> --runtime="rapidsmpf" \
> --path="/data/tpch-rs/scale-1000" \
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Benchmark Results Explorer</title>
<link href="https://unpkg.com/tabulator-tables@6.2.5/dist/css/tabulator.min.css" rel="stylesheet">
<style>
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;