|
#!/usr/bin/env bash |
|
set -euo pipefail |
|
|
|
# Reproduce Kernel GPU h264_nvenc capability failures. |
|
# |
|
# Prereqs: |
|
# - kernel CLI installed and authenticated |
|
# - KERNEL_API_KEY exported, or `kernel login` completed |
|
# - jq and python3 available locally |
|
# |
|
# Usage: |
|
# KERNEL_API_KEY=... ./nvenc-smoke-repro.sh |
|
# SESSION_COUNT=20 CONCURRENCY=1 ./nvenc-smoke-repro.sh |
|
# |
|
# Notes: |
|
# - This creates GPU browser sessions, uploads a small ffmpeg smoke script, |
|
# runs it inside the VM, prints JSONL results, and deletes sessions. |
|
# - It does not call the Kernel replay API. It directly exercises: |
|
# ffmpeg -f x11grab ... -c:v h264_nvenc |
|
|
|
SESSION_COUNT="${SESSION_COUNT:-20}" |
|
CONCURRENCY="${CONCURRENCY:-1}" |
|
TIMEOUT_SECONDS="${TIMEOUT_SECONDS:-900}" |
|
KEEP_SESSIONS="${KEEP_SESSIONS:-0}" |
|
|
|
tmpdir="$(mktemp -d)" |
|
smoke_script="$tmpdir/nvenc-smoke-inner.sh" |
|
out_jsonl="${OUT_JSONL:-$PWD/nvenc-smoke-results-$(date -u +%Y%m%dT%H%M%SZ).jsonl}" |
|
|
|
cleanup() { |
|
rm -rf "$tmpdir" |
|
} |
|
trap cleanup EXIT |
|
|
|
cat >"$smoke_script" <<'INNER' |
|
#!/bin/bash |
|
set -euo pipefail |
|
|
|
echo "SMOKE_TS=$(date -Is)" |
|
echo "INSTANCE_NAME=$(printenv INSTANCE_NAME 2>/dev/null || printenv INST_NAME 2>/dev/null || true)" |
|
echo "METRO_NAME=$(printenv METRO_NAME 2>/dev/null || true)" |
|
echo "KERNEL_VERSION=$(printenv KERNEL_VERSION 2>/dev/null || true)" |
|
echo "VIDEO_ENCODER=$(printenv VIDEO_ENCODER 2>/dev/null || true)" |
|
echo "KERNEL_IMAGES_API_VIDEO_ENCODER=$(printenv KERNEL_IMAGES_API_VIDEO_ENCODER 2>/dev/null || true)" |
|
echo "NVIDIA_SMI=$(nvidia-smi --query-gpu=name,memory.used,memory.total,utilization.gpu,utilization.memory --format=csv,noheader,nounits 2>/dev/null | head -1 || true)" |
|
echo "NVIDIA_LICENSE_STATUS=$(nvidia-smi -q 2>/dev/null | awk -F: '/License Status/ {gsub(/^[ \t]+|[ \t]+$/, "", $2); print $2; exit}' || true)" |
|
echo "XRANDR_CURRENT=$(DISPLAY=:1 xrandr 2>/dev/null | awk '/ connected primary/ {print $3; exit}' || true)" |
|
echo "NVIDIA_DEVICES=$(ls /dev/nvidia* 2>/dev/null | tr '\n' ' ' || true)" |
|
echo "NVENC_ENCODERS=$(ffmpeg -hide_banner -encoders 2>/dev/null | grep -E 'h264_nvenc|hevc_nvenc|av1_nvenc' | awk '{print $2}' | tr '\n' ',' || true)" |
|
|
|
mkdir -p /tmp/nvenc-smoke |
|
|
|
run_test() { |
|
local name="$1" |
|
shift |
|
local out="/tmp/nvenc-smoke/${name}.mp4" |
|
local log="/tmp/nvenc-smoke/${name}.log" |
|
rm -f "$out" "$log" |
|
set +e |
|
timeout 20s ffmpeg -hide_banner -loglevel info -y \ |
|
-f x11grab -framerate 20 -i :1 \ |
|
-vf "pad=ceil(iw/2)*2:ceil(ih/2)*2" \ |
|
"$@" \ |
|
-use_wallclock_as_timestamps 1 \ |
|
-reset_timestamps 1 \ |
|
-avoid_negative_ts make_zero \ |
|
-movflags +frag_keyframe+empty_moov \ |
|
-frag_duration 2000000 \ |
|
-t 6 \ |
|
"$out" >"$log" 2>&1 |
|
local code=$? |
|
set -e |
|
local size=0 |
|
[ -f "$out" ] && size=$(stat -c%s "$out") |
|
echo "RESULT name=$name exit=$code size=$size" |
|
echo "LOG_BEGIN $name" |
|
sed -n '1,80p' "$log" || true |
|
echo "LOG_END $name" |
|
} |
|
|
|
run_test baseline \ |
|
-c:v h264_nvenc -profile:v high -pix_fmt yuv420p \ |
|
-rc vbr -cq 20 -preset p4 -spatial-aq 1 -temporal-aq 1 \ |
|
-b:v 10000k -maxrate 16000k |
|
|
|
run_test no_aq \ |
|
-c:v h264_nvenc -profile:v high -pix_fmt yuv420p \ |
|
-rc vbr -cq 20 -preset p4 \ |
|
-b:v 10000k -maxrate 16000k |
|
|
|
run_test p1_lowrate \ |
|
-c:v h264_nvenc -profile:v high -pix_fmt yuv420p \ |
|
-rc vbr -cq 24 -preset p1 \ |
|
-b:v 4000k -maxrate 6000k |
|
|
|
run_test cbr_lowrate \ |
|
-c:v h264_nvenc -profile:v high -pix_fmt yuv420p \ |
|
-rc cbr -preset p1 \ |
|
-b:v 4000k -maxrate 4000k -bufsize 8000k |
|
INNER |
|
chmod +x "$smoke_script" |
|
|
|
extract_json_object() { |
|
python3 - "$1" <<'PY' |
|
import json, sys |
|
s = sys.argv[1] |
|
start = s.find("{") |
|
end = s.rfind("}") |
|
if start == -1 or end == -1 or end <= start: |
|
raise SystemExit("no JSON object found in CLI output") |
|
print(json.dumps(json.loads(s[start:end + 1]))) |
|
PY |
|
} |
|
|
|
parse_stdout_json() { |
|
python3 - "$1" "$2" <<'PY' |
|
import base64, json, re, sys |
|
session_id = sys.argv[1] |
|
payload = json.loads(sys.argv[2]) |
|
stdout = base64.b64decode(payload.get("stdout_b64") or "").decode("utf-8", "replace") |
|
stderr = base64.b64decode(payload.get("stderr_b64") or "").decode("utf-8", "replace") |
|
tests = {} |
|
for line in stdout.splitlines(): |
|
m = re.match(r"RESULT name=(\S+) exit=(\d+) size=(\d+)", line) |
|
if m: |
|
tests[m.group(1)] = {"exit": int(m.group(2)), "size": int(m.group(3))} |
|
def value(key): |
|
prefix = key + "=" |
|
for line in stdout.splitlines(): |
|
if line.startswith(prefix): |
|
return line[len(prefix):] |
|
return None |
|
ok = any(t["exit"] == 0 and t["size"] > 0 for t in tests.values()) |
|
print(json.dumps({ |
|
"session_id": session_id, |
|
"ok": ok, |
|
"instance": value("INSTANCE_NAME"), |
|
"metro": value("METRO_NAME"), |
|
"nvidia": value("NVIDIA_SMI"), |
|
"license": value("NVIDIA_LICENSE_STATUS"), |
|
"xrandr": value("XRANDR_CURRENT"), |
|
"tests": tests, |
|
"failure_snippet": "\n".join( |
|
line for line in stdout.splitlines() |
|
if "OpenEncodeSessionEx failed" in line |
|
or "No capable devices found" in line |
|
or "CUDA_ERROR_NO_DEVICE" in line |
|
or "Nothing was written" in line |
|
)[:2000], |
|
"stderr": stderr[:2000], |
|
})) |
|
PY |
|
} |
|
|
|
run_one() { |
|
local idx="$1" |
|
local session_id="" |
|
local create_json="" |
|
|
|
{ |
|
create_raw="$(kernel browsers create --gpu --kiosk --timeout "$TIMEOUT_SECONDS" --no-color -o json 2>&1)" |
|
create_json="$(extract_json_object "$create_raw")" |
|
session_id="$(python3 -c 'import json,sys; print(json.loads(sys.argv[1])["session_id"])' "$create_json")" |
|
|
|
kernel browsers fs write-file "$session_id" \ |
|
--path /tmp/nvenc-smoke-inner.sh \ |
|
--source "$smoke_script" \ |
|
--mode 0755 >/dev/null |
|
|
|
exec_raw="$(kernel browsers process exec "$session_id" --timeout 180 --no-color -o json -- /tmp/nvenc-smoke-inner.sh 2>&1)" |
|
exec_json="$(extract_json_object "$exec_raw")" |
|
parsed="$(parse_stdout_json "$session_id" "$exec_json")" |
|
python3 - "$idx" "$create_json" "$parsed" <<'PY' |
|
import json, sys |
|
idx = int(sys.argv[1]) |
|
browser = json.loads(sys.argv[2]) |
|
parsed = json.loads(sys.argv[3]) |
|
parsed["run"] = idx |
|
parsed["browser"] = { |
|
"session_id": browser.get("session_id"), |
|
"gpu": browser.get("gpu"), |
|
"browser_live_view_url": browser.get("browser_live_view_url"), |
|
"created_at": browser.get("created_at"), |
|
} |
|
print(json.dumps(parsed)) |
|
PY |
|
} || { |
|
python3 - "$idx" "$session_id" <<'PY' |
|
import json, sys |
|
print(json.dumps({"run": int(sys.argv[1]), "session_id": sys.argv[2] or None, "ok": False, "error": "test failed before parse"})) |
|
PY |
|
} |
|
|
|
if [ -n "${session_id:-}" ] && [ "$KEEP_SESSIONS" != "1" ]; then |
|
kernel browsers delete "$session_id" >/dev/null 2>&1 || true |
|
fi |
|
} |
|
|
|
export -f run_one extract_json_object parse_stdout_json |
|
export smoke_script TIMEOUT_SECONDS KEEP_SESSIONS |
|
|
|
echo "Writing JSONL results to $out_jsonl" >&2 |
|
if [ "$CONCURRENCY" = "1" ]; then |
|
for i in $(seq 1 "$SESSION_COUNT"); do |
|
run_one "$i" | tee -a "$out_jsonl" |
|
done |
|
else |
|
seq 1 "$SESSION_COUNT" | xargs -P "$CONCURRENCY" -I{} bash -lc 'run_one "$@"' _ {} | tee -a "$out_jsonl" |
|
fi |
|
|
|
python3 - "$out_jsonl" <<'PY' |
|
import json, sys |
|
rows = [json.loads(line) for line in open(sys.argv[1]) if line.strip()] |
|
tested = [r for r in rows if r.get("tests")] |
|
print(json.dumps({ |
|
"results_file": sys.argv[1], |
|
"rows": len(rows), |
|
"tested": len(tested), |
|
"ok": sum(1 for r in tested if r.get("ok")), |
|
"failed": sum(1 for r in tested if not r.get("ok")), |
|
}, indent=2)) |
|
PY |