April 11, 2026 10:30 · April 7, 2026 11:50 · January 27, 2026 10:10 · July 2, 2025 12:56 · February 9, 2025 13:13 · September 13, 2025 06:23
 from __future__ import annotations

 import argparse
 import base64
 import json
 import mimetypes
 import sys
 from pathlib import Path
 from typing import Iterable
 from urllib import error, parse, request
 """
 Gemma 4 31B Dense - Minimal Pure PyTorch Implementation (Text-only, Inference-only)

 Architecture reference: huggingface/transformers modeling_gemma4.py
 Target model: google/gemma-4-31B-it (or google/gemma-4-31B)

 Design goals:
  - PyTorch only, no transformers dependency
  - Gemma 4 31B 固定 (config値をハードコード可)
  - 推論時の実験 (レイヤー繰り返し、logit調査等) がしやすい構造
 diff --git a/csrc/fused/fused.cu b/csrc/fused/fused.cu
 index fb8b9f1..571a9e6 100644
 --- a/csrc/fused/fused.cu
 +++ b/csrc/fused/fused.cu
 @@ -14,6 +14,12 @@
  * limitations under the License.
  */
 
 +#include <cuda/barrier>
 +#include <cuda/pipeline>
 # client_v4.py

 """
 Message Box MCP Client

 インストール方法等はserver_v4.pyを参照。

 ## 使用法

 メッセージ取得:
 # ライセンスは Apache License 2.0 です。
 # お勧めのオプションは：
 # --flash_attn --q8_kv_cache --n_gpu_layers <GPUのVRAMに応じて> --n_ctx <VRAMとモデルのcontext lengthに応じて> 

 import argparse
 import random
 import msvcrt  # Windowsの場合
 import toml
 from typing import List, Optional, Union, Iterator
 # License: Apache 2.0

 from typing import Dict, Optional
 import struct
 import json

 import numpy as np
 import torch
 # Apache License 2.0
 # 使用法は gist のコメントを見てください
 import time
 import argparse
 import os
 import json
 import tomli
 import traceback
 from typing import Any, Dict, List, Optional, Union, Iterator
 # Apache License 2.0
 # 使用法は gist のコメントを見てください
 import argparse
 import importlib
 import json
 import os
 import random
 import time
 import traceback
 import tomli
 # 仮に logs に入れたら以下のように指定
 # --lr_scheduler_type logs.stepwise_linear_decay_lr_scheduler.get_stepwise_linear_decay_lr_scheduler 
 # --lr_scheduler_args "step_a=50" "step_b=80" "factor_1=1.0" "factor_2=0.1"
 #
 # step_a までの学習率： 指定した learning_rate * factor_1
 # step_a から step_b まで： 線形に減少（増加）
 # step_b からの学習率： 指定した learning_rate * factor_2

 from torch.optim.lr_scheduler import LambdaLR
 import argparse
 import csv
 import glob
 import os
 from pathlib import Path

 import cv2
 import numpy as np
 import torch
 from PIL import Image
	from __future__ import annotations

	import argparse
	import base64
	import json
	import mimetypes
	import sys
	from pathlib import Path
	from typing import Iterable
	from urllib import error, parse, request
	"""
	Gemma 4 31B Dense - Minimal Pure PyTorch Implementation (Text-only, Inference-only)

	Architecture reference: huggingface/transformers modeling_gemma4.py
	Target model: google/gemma-4-31B-it (or google/gemma-4-31B)

	Design goals:
	- PyTorch only, no transformers dependency
	- Gemma 4 31B 固定 (config値をハードコード可)
	- 推論時の実験 (レイヤー繰り返し、logit調査等) がしやすい構造
	diff --git a/csrc/fused/fused.cu b/csrc/fused/fused.cu
	index fb8b9f1..571a9e6 100644
	--- a/csrc/fused/fused.cu
	+++ b/csrc/fused/fused.cu
	@@ -14,6 +14,12 @@
	* limitations under the License.
	*/

	+#include <cuda/barrier>
	+#include <cuda/pipeline>
	# client_v4.py

	"""
	Message Box MCP Client

	インストール方法等はserver_v4.pyを参照。

	## 使用法

	メッセージ取得:
	# ライセンスは Apache License 2.0 です。
	# お勧めのオプションは：
	# --flash_attn --q8_kv_cache --n_gpu_layers <GPUのVRAMに応じて> --n_ctx <VRAMとモデルのcontext lengthに応じて>

	import argparse
	import random
	import msvcrt # Windowsの場合
	import toml
	from typing import List, Optional, Union, Iterator
	# License: Apache 2.0

	from typing import Dict, Optional
	import struct
	import json

	import numpy as np
	import torch
	# Apache License 2.0
	# 使用法は gist のコメントを見てください
	import time
	import argparse
	import os
	import json
	import tomli
	import traceback
	from typing import Any, Dict, List, Optional, Union, Iterator
	# 仮に logs に入れたら以下のように指定
	# --lr_scheduler_type logs.stepwise_linear_decay_lr_scheduler.get_stepwise_linear_decay_lr_scheduler
	# --lr_scheduler_args "step_a=50" "step_b=80" "factor_1=1.0" "factor_2=0.1"
	#
	# step_a までの学習率：指定した learning_rate * factor_1
	# step_a から step_b まで：線形に減少（増加）
	# step_b からの学習率：指定した learning_rate * factor_2

	from torch.optim.lr_scheduler import LambdaLR
	import argparse
	import csv
	import glob
	import os
	from pathlib import Path

	import cv2
	import numpy as np
	import torch
	from PIL import Image