Skip to content

Instantly share code, notes, and snippets.

@taross-f
Created July 26, 2025 16:03
Show Gist options
  • Select an option

  • Save taross-f/1a5333471ef0a64fa92d7d6fd7c5a632 to your computer and use it in GitHub Desktop.

Select an option

Save taross-f/1a5333471ef0a64fa92d7d6fd7c5a632 to your computer and use it in GitHub Desktop.
ai usage
#!/usr/bin/env python3
"""
ローカルgitリポジトリのAI生成コード比率分析ツール
"""
import subprocess
import re
import json
import logging
from datetime import datetime, timezone
from typing import Dict, List, Tuple, Optional
import argparse
class LocalRepoAnalyzer:
def __init__(self, repo_path: str, since: Optional[str] = None, until: Optional[str] = None):
self.repo_path = repo_path
self.since = since
self.until = until
self.logger = logging.getLogger(__name__)
# AI生成コミットのパターン
self.ai_patterns = {
'claude_code': [
r'🤖 Generated with \[Claude Code\]',
r'Co-Authored-By: Claude <noreply@anthropic\.com>',
r'Generated with Claude Code',
],
'devin': [
r'Generated by Devin',
r'Co-authored-by: Devin',
r'Devin.*generated',
]
}
# Devin AI integration botのメールアドレス
self.devin_emails = [
'158243242+devin-ai-integration[bot]@users.noreply.github.com',
'devin-ai-integration[bot]@users.noreply.github.com'
]
def get_commits(self) -> List[Dict]:
"""gitログからコミット情報を取得"""
cmd = [
'git', '-C', self.repo_path, 'log',
'--pretty=format:%H|%an|%ae|%ad|%s|%b', '--date=iso', '--all'
]
# 期間指定がある場合はオプションを追加
if self.since:
cmd.extend(['--since', self.since])
if self.until:
cmd.extend(['--until', self.until])
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
self.logger.error(f"Git command failed: {result.stderr}")
raise Exception(f"Git command failed: {result.stderr}")
commits = []
current_commit = {}
for line in result.stdout.split('\n'):
if '|' in line and len(line.split('|')) >= 5:
# 新しいコミット行
if current_commit:
commits.append(current_commit)
parts = line.split('|', 5)
current_commit = {
'sha': parts[0],
'author': parts[1],
'email': parts[2],
'date': parts[3],
'subject': parts[4],
'body': parts[5] if len(parts) > 5 else ''
}
elif current_commit:
# コミットメッセージの続き
current_commit['body'] += '\n' + line
if current_commit:
commits.append(current_commit)
return commits
def classify_commit(self, commit: Dict) -> str:
"""コミットをAI生成かどうか分類"""
full_message = f"{commit['subject']}\n{commit['body']}"
# メールアドレスでDevin判定
if commit['email'] in self.devin_emails:
return 'devin'
# ClaudeCode パターンチェック
for pattern in self.ai_patterns['claude_code']:
if re.search(pattern, full_message, re.IGNORECASE | re.MULTILINE):
return 'claude_code'
# Devin パターンチェック
for pattern in self.ai_patterns['devin']:
if re.search(pattern, full_message, re.IGNORECASE | re.MULTILINE):
return 'devin'
# Devinの構造的特徴をチェック
if self._is_devin_style_commit(full_message):
return 'devin'
# anthropic.comドメインチェック
if 'anthropic.com' in commit['email']:
return 'claude_code'
return 'human'
def _is_devin_style_commit(self, message: str) -> bool:
"""Devinスタイルのコミットメッセージかどうか判定"""
lines = message.split('\n')
# 基本構造チェック: タイトル + 空行 + 箇条書きリスト
if len(lines) < 4:
return False
# 箇条書き行をカウント
bullet_lines = 0
for line in lines:
if line.strip().startswith('- '):
bullet_lines += 1
# 3個以上の箇条書きがあり、特定のフレーズを含む場合はDevin
if bullet_lines >= 3:
message_lower = message.lower()
devin_phrases = [
'this changes from',
'makes it easier',
'maintains backward compatibility',
'modified',
'improved',
'updated'
]
phrase_count = sum(1 for phrase in devin_phrases if phrase in message_lower)
return phrase_count >= 2
return False
def get_commit_stats(self, commit_sha: str) -> Tuple[int, int]:
"""特定コミットの変更統計を取得"""
cmd = ['git', '-C', self.repo_path, 'show', '--numstat', commit_sha]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
self.logger.warning(f"Failed to get stats for commit {commit_sha}: {result.stderr}")
return 0, 0
additions = 0
deletions = 0
for line in result.stdout.split('\n'):
if '\t' in line:
parts = line.split('\t')
if len(parts) >= 2:
try:
if parts[0] != '-':
additions += int(parts[0])
if parts[1] != '-':
deletions += int(parts[1])
except ValueError:
continue
return additions, deletions
def analyze(self) -> Dict:
"""リポジトリを分析"""
period_str = ""
if self.since or self.until:
period_parts = []
if self.since:
period_parts.append(f"since {self.since}")
if self.until:
period_parts.append(f"until {self.until}")
period_str = f" ({', '.join(period_parts)})"
self.logger.info(f"Analyzing {self.repo_path}{period_str}...")
commits = self.get_commits()
self.logger.info(f"Found {len(commits)} commits")
results = {
'total_commits': len(commits),
'claude_code': {'commits': 0, 'additions': 0, 'deletions': 0},
'devin': {'commits': 0, 'additions': 0, 'deletions': 0},
'human': {'commits': 0, 'additions': 0, 'deletions': 0},
'analysis_date': datetime.now().isoformat(),
'repository': self.repo_path,
'period': {
'since': self.since,
'until': self.until
},
'commit_details': []
}
for i, commit in enumerate(commits):
if i % 50 == 0:
self.logger.info(f"Processing commit {i+1}/{len(commits)}")
commit_type = self.classify_commit(commit)
results[commit_type]['commits'] += 1
# 変更統計を取得
additions, deletions = self.get_commit_stats(commit['sha'])
results[commit_type]['additions'] += additions
results[commit_type]['deletions'] += deletions
# デバッグ用にAI生成コミットの詳細を保存
if commit_type in ['claude_code', 'devin']:
results['commit_details'].append({
'sha': commit['sha'][:8],
'type': commit_type,
'author': commit['author'],
'email': commit['email'],
'date': commit['date'],
'subject': commit['subject'],
'additions': additions,
'deletions': deletions
})
return results
def print_report(self, results: Dict):
"""分析結果をレポート出力"""
self.logger.info("Generating analysis report")
print("\n" + "="*60)
period_info = ""
if results['period']['since'] or results['period']['until']:
period_parts = []
if results['period']['since']:
period_parts.append(f"Since: {results['period']['since']}")
if results['period']['until']:
period_parts.append(f"Until: {results['period']['until']}")
period_info = f" ({', '.join(period_parts)})"
print(f"AI Code Analysis Report - {results['repository']}{period_info}")
print("="*60)
total_commits = results['total_commits']
print(f"\n📊 Commit Summary:")
print(f"Total Commits: {total_commits}")
for ai_type in ['claude_code', 'devin', 'human']:
count = results[ai_type]['commits']
percentage = (count / total_commits * 100) if total_commits > 0 else 0
print(f"{ai_type.replace('_', ' ').title()}: {count} ({percentage:.1f}%)")
print(f"\n📝 Code Changes:")
total_additions = sum(results[t]['additions'] for t in ['claude_code', 'devin', 'human'])
total_deletions = sum(results[t]['deletions'] for t in ['claude_code', 'devin', 'human'])
if total_additions > 0:
for ai_type in ['claude_code', 'devin', 'human']:
additions = results[ai_type]['additions']
percentage = (additions / total_additions * 100) if total_additions > 0 else 0
print(f"{ai_type.replace('_', ' ').title()} additions: {additions} lines ({percentage:.1f}%)")
print(f"\nTotal additions: {total_additions} lines")
print(f"Total deletions: {total_deletions} lines")
# AI vs Human ratio
ai_commits = results['claude_code']['commits'] + results['devin']['commits']
human_commits = results['human']['commits']
if total_commits > 0:
ai_ratio = (ai_commits / total_commits * 100)
print(f"\n🤖 AI Generated: {ai_ratio:.1f}%")
print(f"👤 Human Generated: {100 - ai_ratio:.1f}%")
# AI生成コミットの詳細表示
if results['commit_details']:
print(f"\n🔍 AI Generated Commits Details:")
for detail in results['commit_details'][:10]: # 最初の10件表示
print(f" {detail['sha']} ({detail['type']}) {detail['date'][:10]} {detail['subject'][:50]}...")
def setup_logging(verbose: bool = False):
"""ログ設定をセットアップ"""
log_level = logging.DEBUG if verbose else logging.INFO
log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
logging.basicConfig(
level=log_level,
format=log_format,
handlers=[
logging.StreamHandler(),
logging.FileHandler('repo_analysis.log')
]
)
def main():
parser = argparse.ArgumentParser(description='Analyze AI-generated code in local git repository')
parser.add_argument('repo_path', help='Path to local git repository')
parser.add_argument('--since', help='Start date for analysis (e.g., "2024-01-01", "1 month ago", "2024-07-01T00:00:00")')
parser.add_argument('--until', help='End date for analysis (e.g., "2024-12-31", "yesterday", "2024-07-31T23:59:59")')
parser.add_argument('--verbose', '-v', action='store_true', help='Enable verbose logging')
args = parser.parse_args()
# ログ設定
setup_logging(args.verbose)
logger = logging.getLogger(__name__)
try:
analyzer = LocalRepoAnalyzer(args.repo_path, args.since, args.until)
results = analyzer.analyze()
analyzer.print_report(results)
# JSON でも保存
import os
repo_name = os.path.basename(args.repo_path.rstrip('/'))
# 期間情報をファイル名に含める
filename_parts = [repo_name, 'local_analysis']
if args.since:
filename_parts.append(f'since_{args.since.replace(" ", "_").replace(":", "_")}')
if args.until:
filename_parts.append(f'until_{args.until.replace(" ", "_").replace(":", "_")}')
output_file = '_'.join(filename_parts) + '.json'
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(results, f, indent=2, ensure_ascii=False)
logger.info(f"Analysis results saved to: {output_file}")
print(f"\n📄 Detailed results saved to: {output_file}")
except Exception as e:
logger.error(f"Analysis failed: {e}")
raise
if __name__ == '__main__':
main()
#!/usr/bin/env python3
"""
GitHub リポジトリの AI 生成コード比率分析ツール
ClaudeCode と Devin で書かれたコードの比率を分析する
"""
import requests
import json
import re
from typing import Dict, List, Tuple
from datetime import datetime
import argparse
class RepoAnalyzer:
def __init__(self, owner: str, repo: str, github_token: str = None):
self.owner = owner
self.repo = repo
self.token = github_token
self.headers = {}
if github_token:
self.headers['Authorization'] = f'token {github_token}'
# AI生成コミットのパターン
self.ai_patterns = {
'claude_code': [
r'🤖 Generated with \[Claude Code\]',
r'Co-Authored-By: Claude <noreply@anthropic\.com>',
r'Generated with Claude Code',
],
'devin': [
r'Generated by Devin',
r'Co-authored-by: Devin',
r'Devin.*generated',
# Devinの詳細なコミットメッセージパターン
r'- Modified .+\n- This changes from .+ to .+\n- .+\n- Makes it .+\n- Maintains .+',
r'^[A-Z][a-z]+ .+ by .+\n- Modified',
r'- This changes from .+ to .+',
r'- Makes it easier to .+',
r'- Maintains backward compatibility',
# 複数の箇条書きで詳細説明があるパターン
r'(?:- [A-Z].+\n){3,}',
]
}
def get_commits(self) -> List[Dict]:
"""すべてのコミットを取得"""
commits = []
page = 1
per_page = 100
while True:
url = f'https://api.github.com/repos/{self.owner}/{self.repo}/commits'
params = {'page': page, 'per_page': per_page}
response = requests.get(url, headers=self.headers, params=params)
if response.status_code != 200:
print(f"Error fetching commits: {response.status_code}")
break
page_commits = response.json()
if not page_commits:
break
commits.extend(page_commits)
page += 1
# API制限を考慮して最初の500コミットに制限
if len(commits) >= 500:
break
return commits
def classify_commit(self, commit: Dict) -> str:
"""コミットをAI生成かどうか分類"""
message = commit['commit']['message']
# ClaudeCode パターンチェック
for pattern in self.ai_patterns['claude_code']:
if re.search(pattern, message, re.IGNORECASE | re.MULTILINE):
return 'claude_code'
# Devin パターンチェック
for pattern in self.ai_patterns['devin']:
if re.search(pattern, message, re.IGNORECASE | re.MULTILINE):
return 'devin'
# Devinの構造的特徴をチェック
if self._is_devin_style_commit(message):
return 'devin'
# Author/Committerもチェック
author_email = commit['commit']['author']['email']
committer_email = commit['commit']['committer']['email']
if 'anthropic.com' in author_email or 'anthropic.com' in committer_email:
return 'claude_code'
return 'human'
def _is_devin_style_commit(self, message: str) -> bool:
"""Devinスタイルのコミットメッセージかどうか判定"""
lines = message.split('\n')
# 基本構造チェック: タイトル + 空行 + 箇条書きリスト
if len(lines) < 4:
return False
# 2行目が空行かチェック
if len(lines) >= 2 and lines[1].strip() != '':
return False
# 箇条書き行をカウント
bullet_lines = 0
for line in lines[2:]: # 3行目以降
if line.strip().startswith('- '):
bullet_lines += 1
# 3個以上の箇条書きがあり、特定のフレーズを含む場合はDevin
if bullet_lines >= 3:
message_lower = message.lower()
devin_phrases = [
'this changes from',
'makes it easier',
'maintains backward compatibility',
'modified',
'improved',
'updated'
]
phrase_count = sum(1 for phrase in devin_phrases if phrase in message_lower)
return phrase_count >= 2
return False
def get_commit_stats(self, commit_sha: str) -> Tuple[int, int]:
"""特定コミットの変更統計を取得 (追加行数, 削除行数)"""
url = f'https://api.github.com/repos/{self.owner}/{self.repo}/commits/{commit_sha}'
response = requests.get(url, headers=self.headers)
if response.status_code != 200:
return 0, 0
commit_data = response.json()
stats = commit_data.get('stats', {})
return stats.get('additions', 0), stats.get('deletions', 0)
def analyze(self) -> Dict:
"""リポジトリを分析"""
print(f"Analyzing {self.owner}/{self.repo}...")
commits = self.get_commits()
print(f"Found {len(commits)} commits")
results = {
'total_commits': len(commits),
'claude_code': {'commits': 0, 'additions': 0, 'deletions': 0},
'devin': {'commits': 0, 'additions': 0, 'deletions': 0},
'human': {'commits': 0, 'additions': 0, 'deletions': 0},
'analysis_date': datetime.now().isoformat(),
'repository': f'{self.owner}/{self.repo}'
}
for i, commit in enumerate(commits):
if i % 50 == 0:
print(f"Processing commit {i+1}/{len(commits)}")
commit_type = self.classify_commit(commit)
results[commit_type]['commits'] += 1
# 変更統計を取得(API制限を考慮して最初の100コミットのみ詳細分析)
if i < 100:
additions, deletions = self.get_commit_stats(commit['sha'])
results[commit_type]['additions'] += additions
results[commit_type]['deletions'] += deletions
return results
def print_report(self, results: Dict):
"""分析結果をレポート出力"""
print("\n" + "="*60)
print(f"AI Code Analysis Report - {results['repository']}")
print("="*60)
total_commits = results['total_commits']
print(f"\n📊 Commit Summary:")
print(f"Total Commits: {total_commits}")
for ai_type in ['claude_code', 'devin', 'human']:
count = results[ai_type]['commits']
percentage = (count / total_commits * 100) if total_commits > 0 else 0
print(f"{ai_type.replace('_', ' ').title()}: {count} ({percentage:.1f}%)")
print(f"\n📝 Code Changes (first 100 commits):")
total_additions = sum(results[t]['additions'] for t in ['claude_code', 'devin', 'human'])
total_deletions = sum(results[t]['deletions'] for t in ['claude_code', 'devin', 'human'])
if total_additions > 0:
for ai_type in ['claude_code', 'devin', 'human']:
additions = results[ai_type]['additions']
percentage = (additions / total_additions * 100) if total_additions > 0 else 0
print(f"{ai_type.replace('_', ' ').title()} additions: {additions} lines ({percentage:.1f}%)")
print(f"\nTotal additions: {total_additions} lines")
print(f"Total deletions: {total_deletions} lines")
# AI vs Human ratio
ai_commits = results['claude_code']['commits'] + results['devin']['commits']
human_commits = results['human']['commits']
if total_commits > 0:
ai_ratio = (ai_commits / total_commits * 100)
print(f"\n🤖 AI Generated: {ai_ratio:.1f}%")
print(f"👤 Human Generated: {100 - ai_ratio:.1f}%")
def main():
parser = argparse.ArgumentParser(description='Analyze AI-generated code in GitHub repository')
parser.add_argument('repo_url', help='GitHub repository URL (e.g., https://github.com/owner/repo)')
parser.add_argument('--token', help='GitHub personal access token for higher rate limits')
args = parser.parse_args()
# URL から owner/repo を抽出
import re
match = re.match(r'https://github\.com/([^/]+)/([^/]+)', args.repo_url.rstrip('/'))
if not match:
print("Invalid GitHub URL format")
return
owner, repo = match.groups()
analyzer = RepoAnalyzer(owner, repo, args.token)
results = analyzer.analyze()
analyzer.print_report(results)
# JSON でも保存
output_file = f'{repo}_analysis.json'
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(results, f, indent=2, ensure_ascii=False)
print(f"\n📄 Detailed results saved to: {output_file}")
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment