Created
October 13, 2025 03:29
-
-
Save jussker/b08d1db7f700caef8467acdf54b5d301 to your computer and use it in GitHub Desktop.
方便命令行调用llama.cpp计算临时文档的PPL
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| import argparse | |
| import json | |
| import os | |
| import re | |
| import subprocess | |
| import sys | |
| import tempfile | |
| # 仅从环境变量读取,不提供默认值 | |
| ENV_PPL_MODEL_PATH = os.environ.get("PPL_MODEL_PATH") | |
| def open_in_vscode_wait(path: str): | |
| try: | |
| subprocess.run(["code", "--wait", path], check=True) | |
| return True, None | |
| except FileNotFoundError: | |
| return ( | |
| False, | |
| "未找到 'code' 命令,请在 VS Code 中安装 Shell 命令(Cmd+Shift+P -> Shell Command: Install 'code' command in PATH)。", | |
| ) | |
| except subprocess.CalledProcessError as e: | |
| return False, f"打开 VS Code 失败: {e}" | |
| def run_perplexity(model_path: str, file_path: str): | |
| try: | |
| proc = subprocess.run( | |
| ["llama-perplexity", "-mu", model_path, "-f", file_path], | |
| stdout=subprocess.PIPE, | |
| stderr=subprocess.PIPE, | |
| text=True, | |
| ) | |
| except FileNotFoundError: | |
| return ( | |
| None, | |
| None, | |
| "未找到命令 'llama-perplexity',请确认llama.cpp已安装并在 PATH 中。", | |
| ) | |
| output = (proc.stdout or "") + "\n" + (proc.stderr or "") | |
| if proc.returncode != 0 and "Final estimate" not in output: | |
| return None, output, f"llama-perplexity 退出码 {proc.returncode}" | |
| return parse_ppl(output), output, None | |
| def parse_ppl(output: str): | |
| m = re.search(r"Final estimate:\s*PPL\s*=\s*([0-9]+(?:\.[0-9]+)?)", output) | |
| if m: | |
| try: | |
| return float(m.group(1)) | |
| except ValueError: | |
| return None | |
| return None | |
| def judge_ppl(ppl: float): | |
| HUMAN_MIN = 15.0 | |
| HUMAN_MAX = 35.0 | |
| if ppl < HUMAN_MIN: | |
| return {"is_human": False, "label": "ai_like"} | |
| if ppl <= HUMAN_MAX: | |
| return {"is_human": True, "label": "human"} | |
| return {"is_human": False, "label": "low_quality"} | |
| def main(): | |
| parser = argparse.ArgumentParser( | |
| description="计算文本 PPL,并根据阈值判断内容类型。" | |
| ) | |
| parser.add_argument( | |
| "-f", | |
| "--file", | |
| help="输入文件的绝对路径;若未提供则创建临时文件并在 VS Code 中打开。", | |
| ) | |
| parser.add_argument( | |
| "--model", help="模型 GGUF 路径,优先于环境变量 PPL_MODEL_PATH。" | |
| ) | |
| parser.add_argument( | |
| "--no-editor", | |
| action="store_true", | |
| help="不打开 VS Code(建议与 --file 一起使用)。", | |
| ) | |
| args = parser.parse_args() | |
| model_path = args.model or ENV_PPL_MODEL_PATH | |
| if not model_path: | |
| print( | |
| json.dumps( | |
| { | |
| "error": "未设置模型路径:请设置环境变量 PPL_MODEL_PATH 或通过 --model 指定。", | |
| "hint": '示例:export PPL_MODEL_PATH="/path/to/model.gguf"', | |
| }, | |
| ensure_ascii=False, | |
| ) | |
| ) | |
| sys.exit(64) # EX_USAGE | |
| # 当不打开编辑器且未提供文件时,给出提示 | |
| if args.no_editor and not args.file: | |
| print( | |
| json.dumps( | |
| {"error": "使用 --no-editor 时必须提供 --file 指定输入文件路径。"}, | |
| ensure_ascii=False, | |
| ) | |
| ) | |
| sys.exit(2) | |
| created_tmp = False | |
| if args.file: | |
| target_path = args.file | |
| else: | |
| tmp = tempfile.NamedTemporaryFile(prefix="ppl_", suffix=".txt", delete=False) | |
| target_path = tmp.name | |
| tmp.close() | |
| created_tmp = True | |
| try: | |
| if not args.no_editor: | |
| ok, err = open_in_vscode_wait(target_path) | |
| if not ok: | |
| print(json.dumps({"error": err}, ensure_ascii=False)) | |
| sys.exit(1) | |
| ppl, output, err = run_perplexity(model_path, target_path) | |
| if ppl is not None: | |
| verdict = judge_ppl(ppl) | |
| print(json.dumps({"ppl": ppl, **verdict}, ensure_ascii=False)) | |
| sys.exit(0) | |
| print( | |
| json.dumps( | |
| {"error": err or "未能解析 PPL", "raw": (output or "").strip()}, | |
| ensure_ascii=False, | |
| ) | |
| ) | |
| sys.exit(2) | |
| finally: | |
| if created_tmp: | |
| try: | |
| os.unlink(target_path) | |
| except Exception: | |
| pass | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment