Created
July 26, 2025 16:03
-
-
Save taross-f/1a5333471ef0a64fa92d7d6fd7c5a632 to your computer and use it in GitHub Desktop.
ai usage
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| ローカルgitリポジトリのAI生成コード比率分析ツール | |
| """ | |
| import subprocess | |
| import re | |
| import json | |
| import logging | |
| from datetime import datetime, timezone | |
| from typing import Dict, List, Tuple, Optional | |
| import argparse | |
| class LocalRepoAnalyzer: | |
| def __init__(self, repo_path: str, since: Optional[str] = None, until: Optional[str] = None): | |
| self.repo_path = repo_path | |
| self.since = since | |
| self.until = until | |
| self.logger = logging.getLogger(__name__) | |
| # AI生成コミットのパターン | |
| self.ai_patterns = { | |
| 'claude_code': [ | |
| r'🤖 Generated with \[Claude Code\]', | |
| r'Co-Authored-By: Claude <noreply@anthropic\.com>', | |
| r'Generated with Claude Code', | |
| ], | |
| 'devin': [ | |
| r'Generated by Devin', | |
| r'Co-authored-by: Devin', | |
| r'Devin.*generated', | |
| ] | |
| } | |
| # Devin AI integration botのメールアドレス | |
| self.devin_emails = [ | |
| '158243242+devin-ai-integration[bot]@users.noreply.github.com', | |
| 'devin-ai-integration[bot]@users.noreply.github.com' | |
| ] | |
| def get_commits(self) -> List[Dict]: | |
| """gitログからコミット情報を取得""" | |
| cmd = [ | |
| 'git', '-C', self.repo_path, 'log', | |
| '--pretty=format:%H|%an|%ae|%ad|%s|%b', '--date=iso', '--all' | |
| ] | |
| # 期間指定がある場合はオプションを追加 | |
| if self.since: | |
| cmd.extend(['--since', self.since]) | |
| if self.until: | |
| cmd.extend(['--until', self.until]) | |
| result = subprocess.run(cmd, capture_output=True, text=True) | |
| if result.returncode != 0: | |
| self.logger.error(f"Git command failed: {result.stderr}") | |
| raise Exception(f"Git command failed: {result.stderr}") | |
| commits = [] | |
| current_commit = {} | |
| for line in result.stdout.split('\n'): | |
| if '|' in line and len(line.split('|')) >= 5: | |
| # 新しいコミット行 | |
| if current_commit: | |
| commits.append(current_commit) | |
| parts = line.split('|', 5) | |
| current_commit = { | |
| 'sha': parts[0], | |
| 'author': parts[1], | |
| 'email': parts[2], | |
| 'date': parts[3], | |
| 'subject': parts[4], | |
| 'body': parts[5] if len(parts) > 5 else '' | |
| } | |
| elif current_commit: | |
| # コミットメッセージの続き | |
| current_commit['body'] += '\n' + line | |
| if current_commit: | |
| commits.append(current_commit) | |
| return commits | |
| def classify_commit(self, commit: Dict) -> str: | |
| """コミットをAI生成かどうか分類""" | |
| full_message = f"{commit['subject']}\n{commit['body']}" | |
| # メールアドレスでDevin判定 | |
| if commit['email'] in self.devin_emails: | |
| return 'devin' | |
| # ClaudeCode パターンチェック | |
| for pattern in self.ai_patterns['claude_code']: | |
| if re.search(pattern, full_message, re.IGNORECASE | re.MULTILINE): | |
| return 'claude_code' | |
| # Devin パターンチェック | |
| for pattern in self.ai_patterns['devin']: | |
| if re.search(pattern, full_message, re.IGNORECASE | re.MULTILINE): | |
| return 'devin' | |
| # Devinの構造的特徴をチェック | |
| if self._is_devin_style_commit(full_message): | |
| return 'devin' | |
| # anthropic.comドメインチェック | |
| if 'anthropic.com' in commit['email']: | |
| return 'claude_code' | |
| return 'human' | |
| def _is_devin_style_commit(self, message: str) -> bool: | |
| """Devinスタイルのコミットメッセージかどうか判定""" | |
| lines = message.split('\n') | |
| # 基本構造チェック: タイトル + 空行 + 箇条書きリスト | |
| if len(lines) < 4: | |
| return False | |
| # 箇条書き行をカウント | |
| bullet_lines = 0 | |
| for line in lines: | |
| if line.strip().startswith('- '): | |
| bullet_lines += 1 | |
| # 3個以上の箇条書きがあり、特定のフレーズを含む場合はDevin | |
| if bullet_lines >= 3: | |
| message_lower = message.lower() | |
| devin_phrases = [ | |
| 'this changes from', | |
| 'makes it easier', | |
| 'maintains backward compatibility', | |
| 'modified', | |
| 'improved', | |
| 'updated' | |
| ] | |
| phrase_count = sum(1 for phrase in devin_phrases if phrase in message_lower) | |
| return phrase_count >= 2 | |
| return False | |
| def get_commit_stats(self, commit_sha: str) -> Tuple[int, int]: | |
| """特定コミットの変更統計を取得""" | |
| cmd = ['git', '-C', self.repo_path, 'show', '--numstat', commit_sha] | |
| result = subprocess.run(cmd, capture_output=True, text=True) | |
| if result.returncode != 0: | |
| self.logger.warning(f"Failed to get stats for commit {commit_sha}: {result.stderr}") | |
| return 0, 0 | |
| additions = 0 | |
| deletions = 0 | |
| for line in result.stdout.split('\n'): | |
| if '\t' in line: | |
| parts = line.split('\t') | |
| if len(parts) >= 2: | |
| try: | |
| if parts[0] != '-': | |
| additions += int(parts[0]) | |
| if parts[1] != '-': | |
| deletions += int(parts[1]) | |
| except ValueError: | |
| continue | |
| return additions, deletions | |
| def analyze(self) -> Dict: | |
| """リポジトリを分析""" | |
| period_str = "" | |
| if self.since or self.until: | |
| period_parts = [] | |
| if self.since: | |
| period_parts.append(f"since {self.since}") | |
| if self.until: | |
| period_parts.append(f"until {self.until}") | |
| period_str = f" ({', '.join(period_parts)})" | |
| self.logger.info(f"Analyzing {self.repo_path}{period_str}...") | |
| commits = self.get_commits() | |
| self.logger.info(f"Found {len(commits)} commits") | |
| results = { | |
| 'total_commits': len(commits), | |
| 'claude_code': {'commits': 0, 'additions': 0, 'deletions': 0}, | |
| 'devin': {'commits': 0, 'additions': 0, 'deletions': 0}, | |
| 'human': {'commits': 0, 'additions': 0, 'deletions': 0}, | |
| 'analysis_date': datetime.now().isoformat(), | |
| 'repository': self.repo_path, | |
| 'period': { | |
| 'since': self.since, | |
| 'until': self.until | |
| }, | |
| 'commit_details': [] | |
| } | |
| for i, commit in enumerate(commits): | |
| if i % 50 == 0: | |
| self.logger.info(f"Processing commit {i+1}/{len(commits)}") | |
| commit_type = self.classify_commit(commit) | |
| results[commit_type]['commits'] += 1 | |
| # 変更統計を取得 | |
| additions, deletions = self.get_commit_stats(commit['sha']) | |
| results[commit_type]['additions'] += additions | |
| results[commit_type]['deletions'] += deletions | |
| # デバッグ用にAI生成コミットの詳細を保存 | |
| if commit_type in ['claude_code', 'devin']: | |
| results['commit_details'].append({ | |
| 'sha': commit['sha'][:8], | |
| 'type': commit_type, | |
| 'author': commit['author'], | |
| 'email': commit['email'], | |
| 'date': commit['date'], | |
| 'subject': commit['subject'], | |
| 'additions': additions, | |
| 'deletions': deletions | |
| }) | |
| return results | |
| def print_report(self, results: Dict): | |
| """分析結果をレポート出力""" | |
| self.logger.info("Generating analysis report") | |
| print("\n" + "="*60) | |
| period_info = "" | |
| if results['period']['since'] or results['period']['until']: | |
| period_parts = [] | |
| if results['period']['since']: | |
| period_parts.append(f"Since: {results['period']['since']}") | |
| if results['period']['until']: | |
| period_parts.append(f"Until: {results['period']['until']}") | |
| period_info = f" ({', '.join(period_parts)})" | |
| print(f"AI Code Analysis Report - {results['repository']}{period_info}") | |
| print("="*60) | |
| total_commits = results['total_commits'] | |
| print(f"\n📊 Commit Summary:") | |
| print(f"Total Commits: {total_commits}") | |
| for ai_type in ['claude_code', 'devin', 'human']: | |
| count = results[ai_type]['commits'] | |
| percentage = (count / total_commits * 100) if total_commits > 0 else 0 | |
| print(f"{ai_type.replace('_', ' ').title()}: {count} ({percentage:.1f}%)") | |
| print(f"\n📝 Code Changes:") | |
| total_additions = sum(results[t]['additions'] for t in ['claude_code', 'devin', 'human']) | |
| total_deletions = sum(results[t]['deletions'] for t in ['claude_code', 'devin', 'human']) | |
| if total_additions > 0: | |
| for ai_type in ['claude_code', 'devin', 'human']: | |
| additions = results[ai_type]['additions'] | |
| percentage = (additions / total_additions * 100) if total_additions > 0 else 0 | |
| print(f"{ai_type.replace('_', ' ').title()} additions: {additions} lines ({percentage:.1f}%)") | |
| print(f"\nTotal additions: {total_additions} lines") | |
| print(f"Total deletions: {total_deletions} lines") | |
| # AI vs Human ratio | |
| ai_commits = results['claude_code']['commits'] + results['devin']['commits'] | |
| human_commits = results['human']['commits'] | |
| if total_commits > 0: | |
| ai_ratio = (ai_commits / total_commits * 100) | |
| print(f"\n🤖 AI Generated: {ai_ratio:.1f}%") | |
| print(f"👤 Human Generated: {100 - ai_ratio:.1f}%") | |
| # AI生成コミットの詳細表示 | |
| if results['commit_details']: | |
| print(f"\n🔍 AI Generated Commits Details:") | |
| for detail in results['commit_details'][:10]: # 最初の10件表示 | |
| print(f" {detail['sha']} ({detail['type']}) {detail['date'][:10]} {detail['subject'][:50]}...") | |
| def setup_logging(verbose: bool = False): | |
| """ログ設定をセットアップ""" | |
| log_level = logging.DEBUG if verbose else logging.INFO | |
| log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' | |
| logging.basicConfig( | |
| level=log_level, | |
| format=log_format, | |
| handlers=[ | |
| logging.StreamHandler(), | |
| logging.FileHandler('repo_analysis.log') | |
| ] | |
| ) | |
| def main(): | |
| parser = argparse.ArgumentParser(description='Analyze AI-generated code in local git repository') | |
| parser.add_argument('repo_path', help='Path to local git repository') | |
| parser.add_argument('--since', help='Start date for analysis (e.g., "2024-01-01", "1 month ago", "2024-07-01T00:00:00")') | |
| parser.add_argument('--until', help='End date for analysis (e.g., "2024-12-31", "yesterday", "2024-07-31T23:59:59")') | |
| parser.add_argument('--verbose', '-v', action='store_true', help='Enable verbose logging') | |
| args = parser.parse_args() | |
| # ログ設定 | |
| setup_logging(args.verbose) | |
| logger = logging.getLogger(__name__) | |
| try: | |
| analyzer = LocalRepoAnalyzer(args.repo_path, args.since, args.until) | |
| results = analyzer.analyze() | |
| analyzer.print_report(results) | |
| # JSON でも保存 | |
| import os | |
| repo_name = os.path.basename(args.repo_path.rstrip('/')) | |
| # 期間情報をファイル名に含める | |
| filename_parts = [repo_name, 'local_analysis'] | |
| if args.since: | |
| filename_parts.append(f'since_{args.since.replace(" ", "_").replace(":", "_")}') | |
| if args.until: | |
| filename_parts.append(f'until_{args.until.replace(" ", "_").replace(":", "_")}') | |
| output_file = '_'.join(filename_parts) + '.json' | |
| with open(output_file, 'w', encoding='utf-8') as f: | |
| json.dump(results, f, indent=2, ensure_ascii=False) | |
| logger.info(f"Analysis results saved to: {output_file}") | |
| print(f"\n📄 Detailed results saved to: {output_file}") | |
| except Exception as e: | |
| logger.error(f"Analysis failed: {e}") | |
| raise | |
| if __name__ == '__main__': | |
| main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| GitHub リポジトリの AI 生成コード比率分析ツール | |
| ClaudeCode と Devin で書かれたコードの比率を分析する | |
| """ | |
| import requests | |
| import json | |
| import re | |
| from typing import Dict, List, Tuple | |
| from datetime import datetime | |
| import argparse | |
| class RepoAnalyzer: | |
| def __init__(self, owner: str, repo: str, github_token: str = None): | |
| self.owner = owner | |
| self.repo = repo | |
| self.token = github_token | |
| self.headers = {} | |
| if github_token: | |
| self.headers['Authorization'] = f'token {github_token}' | |
| # AI生成コミットのパターン | |
| self.ai_patterns = { | |
| 'claude_code': [ | |
| r'🤖 Generated with \[Claude Code\]', | |
| r'Co-Authored-By: Claude <noreply@anthropic\.com>', | |
| r'Generated with Claude Code', | |
| ], | |
| 'devin': [ | |
| r'Generated by Devin', | |
| r'Co-authored-by: Devin', | |
| r'Devin.*generated', | |
| # Devinの詳細なコミットメッセージパターン | |
| r'- Modified .+\n- This changes from .+ to .+\n- .+\n- Makes it .+\n- Maintains .+', | |
| r'^[A-Z][a-z]+ .+ by .+\n- Modified', | |
| r'- This changes from .+ to .+', | |
| r'- Makes it easier to .+', | |
| r'- Maintains backward compatibility', | |
| # 複数の箇条書きで詳細説明があるパターン | |
| r'(?:- [A-Z].+\n){3,}', | |
| ] | |
| } | |
| def get_commits(self) -> List[Dict]: | |
| """すべてのコミットを取得""" | |
| commits = [] | |
| page = 1 | |
| per_page = 100 | |
| while True: | |
| url = f'https://api.github.com/repos/{self.owner}/{self.repo}/commits' | |
| params = {'page': page, 'per_page': per_page} | |
| response = requests.get(url, headers=self.headers, params=params) | |
| if response.status_code != 200: | |
| print(f"Error fetching commits: {response.status_code}") | |
| break | |
| page_commits = response.json() | |
| if not page_commits: | |
| break | |
| commits.extend(page_commits) | |
| page += 1 | |
| # API制限を考慮して最初の500コミットに制限 | |
| if len(commits) >= 500: | |
| break | |
| return commits | |
| def classify_commit(self, commit: Dict) -> str: | |
| """コミットをAI生成かどうか分類""" | |
| message = commit['commit']['message'] | |
| # ClaudeCode パターンチェック | |
| for pattern in self.ai_patterns['claude_code']: | |
| if re.search(pattern, message, re.IGNORECASE | re.MULTILINE): | |
| return 'claude_code' | |
| # Devin パターンチェック | |
| for pattern in self.ai_patterns['devin']: | |
| if re.search(pattern, message, re.IGNORECASE | re.MULTILINE): | |
| return 'devin' | |
| # Devinの構造的特徴をチェック | |
| if self._is_devin_style_commit(message): | |
| return 'devin' | |
| # Author/Committerもチェック | |
| author_email = commit['commit']['author']['email'] | |
| committer_email = commit['commit']['committer']['email'] | |
| if 'anthropic.com' in author_email or 'anthropic.com' in committer_email: | |
| return 'claude_code' | |
| return 'human' | |
| def _is_devin_style_commit(self, message: str) -> bool: | |
| """Devinスタイルのコミットメッセージかどうか判定""" | |
| lines = message.split('\n') | |
| # 基本構造チェック: タイトル + 空行 + 箇条書きリスト | |
| if len(lines) < 4: | |
| return False | |
| # 2行目が空行かチェック | |
| if len(lines) >= 2 and lines[1].strip() != '': | |
| return False | |
| # 箇条書き行をカウント | |
| bullet_lines = 0 | |
| for line in lines[2:]: # 3行目以降 | |
| if line.strip().startswith('- '): | |
| bullet_lines += 1 | |
| # 3個以上の箇条書きがあり、特定のフレーズを含む場合はDevin | |
| if bullet_lines >= 3: | |
| message_lower = message.lower() | |
| devin_phrases = [ | |
| 'this changes from', | |
| 'makes it easier', | |
| 'maintains backward compatibility', | |
| 'modified', | |
| 'improved', | |
| 'updated' | |
| ] | |
| phrase_count = sum(1 for phrase in devin_phrases if phrase in message_lower) | |
| return phrase_count >= 2 | |
| return False | |
| def get_commit_stats(self, commit_sha: str) -> Tuple[int, int]: | |
| """特定コミットの変更統計を取得 (追加行数, 削除行数)""" | |
| url = f'https://api.github.com/repos/{self.owner}/{self.repo}/commits/{commit_sha}' | |
| response = requests.get(url, headers=self.headers) | |
| if response.status_code != 200: | |
| return 0, 0 | |
| commit_data = response.json() | |
| stats = commit_data.get('stats', {}) | |
| return stats.get('additions', 0), stats.get('deletions', 0) | |
| def analyze(self) -> Dict: | |
| """リポジトリを分析""" | |
| print(f"Analyzing {self.owner}/{self.repo}...") | |
| commits = self.get_commits() | |
| print(f"Found {len(commits)} commits") | |
| results = { | |
| 'total_commits': len(commits), | |
| 'claude_code': {'commits': 0, 'additions': 0, 'deletions': 0}, | |
| 'devin': {'commits': 0, 'additions': 0, 'deletions': 0}, | |
| 'human': {'commits': 0, 'additions': 0, 'deletions': 0}, | |
| 'analysis_date': datetime.now().isoformat(), | |
| 'repository': f'{self.owner}/{self.repo}' | |
| } | |
| for i, commit in enumerate(commits): | |
| if i % 50 == 0: | |
| print(f"Processing commit {i+1}/{len(commits)}") | |
| commit_type = self.classify_commit(commit) | |
| results[commit_type]['commits'] += 1 | |
| # 変更統計を取得(API制限を考慮して最初の100コミットのみ詳細分析) | |
| if i < 100: | |
| additions, deletions = self.get_commit_stats(commit['sha']) | |
| results[commit_type]['additions'] += additions | |
| results[commit_type]['deletions'] += deletions | |
| return results | |
| def print_report(self, results: Dict): | |
| """分析結果をレポート出力""" | |
| print("\n" + "="*60) | |
| print(f"AI Code Analysis Report - {results['repository']}") | |
| print("="*60) | |
| total_commits = results['total_commits'] | |
| print(f"\n📊 Commit Summary:") | |
| print(f"Total Commits: {total_commits}") | |
| for ai_type in ['claude_code', 'devin', 'human']: | |
| count = results[ai_type]['commits'] | |
| percentage = (count / total_commits * 100) if total_commits > 0 else 0 | |
| print(f"{ai_type.replace('_', ' ').title()}: {count} ({percentage:.1f}%)") | |
| print(f"\n📝 Code Changes (first 100 commits):") | |
| total_additions = sum(results[t]['additions'] for t in ['claude_code', 'devin', 'human']) | |
| total_deletions = sum(results[t]['deletions'] for t in ['claude_code', 'devin', 'human']) | |
| if total_additions > 0: | |
| for ai_type in ['claude_code', 'devin', 'human']: | |
| additions = results[ai_type]['additions'] | |
| percentage = (additions / total_additions * 100) if total_additions > 0 else 0 | |
| print(f"{ai_type.replace('_', ' ').title()} additions: {additions} lines ({percentage:.1f}%)") | |
| print(f"\nTotal additions: {total_additions} lines") | |
| print(f"Total deletions: {total_deletions} lines") | |
| # AI vs Human ratio | |
| ai_commits = results['claude_code']['commits'] + results['devin']['commits'] | |
| human_commits = results['human']['commits'] | |
| if total_commits > 0: | |
| ai_ratio = (ai_commits / total_commits * 100) | |
| print(f"\n🤖 AI Generated: {ai_ratio:.1f}%") | |
| print(f"👤 Human Generated: {100 - ai_ratio:.1f}%") | |
| def main(): | |
| parser = argparse.ArgumentParser(description='Analyze AI-generated code in GitHub repository') | |
| parser.add_argument('repo_url', help='GitHub repository URL (e.g., https://github.com/owner/repo)') | |
| parser.add_argument('--token', help='GitHub personal access token for higher rate limits') | |
| args = parser.parse_args() | |
| # URL から owner/repo を抽出 | |
| import re | |
| match = re.match(r'https://github\.com/([^/]+)/([^/]+)', args.repo_url.rstrip('/')) | |
| if not match: | |
| print("Invalid GitHub URL format") | |
| return | |
| owner, repo = match.groups() | |
| analyzer = RepoAnalyzer(owner, repo, args.token) | |
| results = analyzer.analyze() | |
| analyzer.print_report(results) | |
| # JSON でも保存 | |
| output_file = f'{repo}_analysis.json' | |
| with open(output_file, 'w', encoding='utf-8') as f: | |
| json.dump(results, f, indent=2, ensure_ascii=False) | |
| print(f"\n📄 Detailed results saved to: {output_file}") | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment