taross-f · July 26, 2025 16:03
diff --git a/analyze_local_repo.py b/analyze_local_repo.py
 #!/usr/bin/env python3
 """
 ローカルgitリポジトリのAI生成コード比率分析ツール
 """

 import subprocess
 import re
 import json
 import logging
 from datetime import datetime, timezone
 from typing import Dict, List, Tuple, Optional
 import argparse

 class LocalRepoAnalyzer:
    def __init__(self, repo_path: str, since: Optional[str] = None, until: Optional[str] = None):
        self.repo_path = repo_path
        self.since = since
        self.until = until
        self.logger = logging.getLogger(__name__)
        
        # AI生成コミットのパターン
        self.ai_patterns = {
            'claude_code': [
                r'🤖 Generated with \[Claude Code\]',
                r'Co-Authored-By: Claude <noreply@anthropic\.com>',
                r'Generated with Claude Code',
            ],
            'devin': [
                r'Generated by Devin',
                r'Co-authored-by: Devin',
                r'Devin.*generated',
            ]
        }
        
        # Devin AI integration botのメールアドレス
        self.devin_emails = [
            '158243242+devin-ai-integration[bot]@users.noreply.github.com',
            'devin-ai-integration[bot]@users.noreply.github.com'
        ]
    
    def get_commits(self) -> List[Dict]:
        """gitログからコミット情報を取得"""
        cmd = [
            'git', '-C', self.repo_path, 'log', 
            '--pretty=format:%H|%an|%ae|%ad|%s|%b', '--date=iso', '--all'
        ]
        
        # 期間指定がある場合はオプションを追加
        if self.since:
            cmd.extend(['--since', self.since])
        if self.until:
            cmd.extend(['--until', self.until])
        
        result = subprocess.run(cmd, capture_output=True, text=True)
        if result.returncode != 0:
            self.logger.error(f"Git command failed: {result.stderr}")
            raise Exception(f"Git command failed: {result.stderr}")
        
        commits = []
        current_commit = {}
        
        for line in result.stdout.split('\n'):
            if '|' in line and len(line.split('|')) >= 5:
                # 新しいコミット行
                if current_commit:
                    commits.append(current_commit)
                
                parts = line.split('|', 5)
                current_commit = {
                    'sha': parts[0],
                    'author': parts[1],
                    'email': parts[2],
                    'date': parts[3],
                    'subject': parts[4],
                    'body': parts[5] if len(parts) > 5 else ''
                }
            elif current_commit:
                # コミットメッセージの続き
                current_commit['body'] += '\n' + line
        
        if current_commit:
            commits.append(current_commit)
        
        return commits
    
    def classify_commit(self, commit: Dict) -> str:
        """コミットをAI生成かどうか分類"""
        full_message = f"{commit['subject']}\n{commit['body']}"
        
        # メールアドレスでDevin判定
        if commit['email'] in self.devin_emails:
            return 'devin'
        
        # ClaudeCode パターンチェック
        for pattern in self.ai_patterns['claude_code']:
            if re.search(pattern, full_message, re.IGNORECASE | re.MULTILINE):
                return 'claude_code'
        
        # Devin パターンチェック
        for pattern in self.ai_patterns['devin']:
            if re.search(pattern, full_message, re.IGNORECASE | re.MULTILINE):
                return 'devin'
        
        # Devinの構造的特徴をチェック
        if self._is_devin_style_commit(full_message):
            return 'devin'
        
        # anthropic.comドメインチェック
        if 'anthropic.com' in commit['email']:
            return 'claude_code'
        
        return 'human'
    
    def _is_devin_style_commit(self, message: str) -> bool:
        """Devinスタイルのコミットメッセージかどうか判定"""
        lines = message.split('\n')
        
        # 基本構造チェック: タイトル + 空行 + 箇条書きリスト
        if len(lines) < 4:
            return False
        
        # 箇条書き行をカウント
        bullet_lines = 0
        for line in lines:
            if line.strip().startswith('- '):
                bullet_lines += 1
        
        # 3個以上の箇条書きがあり、特定のフレーズを含む場合はDevin
        if bullet_lines >= 3:
            message_lower = message.lower()
            devin_phrases = [
                'this changes from',
                'makes it easier',
                'maintains backward compatibility',
                'modified',
                'improved',
                'updated'
            ]
            
            phrase_count = sum(1 for phrase in devin_phrases if phrase in message_lower)
            return phrase_count >= 2
        
        return False
    
    def get_commit_stats(self, commit_sha: str) -> Tuple[int, int]:
        """特定コミットの変更統計を取得"""
        cmd = ['git', '-C', self.repo_path, 'show', '--numstat', commit_sha]
        
        result = subprocess.run(cmd, capture_output=True, text=True)
        if result.returncode != 0:
            self.logger.warning(f"Failed to get stats for commit {commit_sha}: {result.stderr}")
            return 0, 0
        
        additions = 0
        deletions = 0
        
        for line in result.stdout.split('\n'):
            if '\t' in line:
                parts = line.split('\t')
                if len(parts) >= 2:
                    try:
                        if parts[0] != '-':
                            additions += int(parts[0])
                        if parts[1] != '-':
                            deletions += int(parts[1])
                    except ValueError:
                        continue
        
        return additions, deletions
    
    def analyze(self) -> Dict:
        """リポジトリを分析"""
        period_str = ""
        if self.since or self.until:
            period_parts = []
            if self.since:
                period_parts.append(f"since {self.since}")
            if self.until:
                period_parts.append(f"until {self.until}")
            period_str = f" ({', '.join(period_parts)})"
        
        self.logger.info(f"Analyzing {self.repo_path}{period_str}...")
        
        commits = self.get_commits()
        self.logger.info(f"Found {len(commits)} commits")
        
        results = {
            'total_commits': len(commits),
            'claude_code': {'commits': 0, 'additions': 0, 'deletions': 0},
            'devin': {'commits': 0, 'additions': 0, 'deletions': 0},
            'human': {'commits': 0, 'additions': 0, 'deletions': 0},
            'analysis_date': datetime.now().isoformat(),
            'repository': self.repo_path,
            'period': {
                'since': self.since,
                'until': self.until
            },
            'commit_details': []
        }
        
        for i, commit in enumerate(commits):
            if i % 50 == 0:
                self.logger.info(f"Processing commit {i+1}/{len(commits)}")
            
            commit_type = self.classify_commit(commit)
            results[commit_type]['commits'] += 1
            
            # 変更統計を取得
            additions, deletions = self.get_commit_stats(commit['sha'])
            results[commit_type]['additions'] += additions
            results[commit_type]['deletions'] += deletions
            
            # デバッグ用にAI生成コミットの詳細を保存
            if commit_type in ['claude_code', 'devin']:
                results['commit_details'].append({
                    'sha': commit['sha'][:8],
                    'type': commit_type,
                    'author': commit['author'],
                    'email': commit['email'],
                    'date': commit['date'],
                    'subject': commit['subject'],
                    'additions': additions,
                    'deletions': deletions
                })
        
        return results
    
    def print_report(self, results: Dict):
        """分析結果をレポート出力"""
        self.logger.info("Generating analysis report")
        
        print("\n" + "="*60)
        period_info = ""
        if results['period']['since'] or results['period']['until']:
            period_parts = []
            if results['period']['since']:
                period_parts.append(f"Since: {results['period']['since']}")
            if results['period']['until']:
                period_parts.append(f"Until: {results['period']['until']}")
            period_info = f" ({', '.join(period_parts)})"
        
        print(f"AI Code Analysis Report - {results['repository']}{period_info}")
        print("="*60)
        
        total_commits = results['total_commits']
        
        print(f"\n📊 Commit Summary:")
        print(f"Total Commits: {total_commits}")
        
        for ai_type in ['claude_code', 'devin', 'human']:
            count = results[ai_type]['commits']
            percentage = (count / total_commits * 100) if total_commits > 0 else 0
            print(f"{ai_type.replace('_', ' ').title()}: {count} ({percentage:.1f}%)")
        
        print(f"\n📝 Code Changes:")
        total_additions = sum(results[t]['additions'] for t in ['claude_code', 'devin', 'human'])
        total_deletions = sum(results[t]['deletions'] for t in ['claude_code', 'devin', 'human'])
        
        if total_additions > 0:
            for ai_type in ['claude_code', 'devin', 'human']:
                additions = results[ai_type]['additions']
                percentage = (additions / total_additions * 100) if total_additions > 0 else 0
                print(f"{ai_type.replace('_', ' ').title()} additions: {additions} lines ({percentage:.1f}%)")
        
        print(f"\nTotal additions: {total_additions} lines")
        print(f"Total deletions: {total_deletions} lines")
        
        # AI vs Human ratio
        ai_commits = results['claude_code']['commits'] + results['devin']['commits']
        human_commits = results['human']['commits']
        
        if total_commits > 0:
            ai_ratio = (ai_commits / total_commits * 100)
            print(f"\n🤖 AI Generated: {ai_ratio:.1f}%")
            print(f"👤 Human Generated: {100 - ai_ratio:.1f}%")
        
        # AI生成コミットの詳細表示
        if results['commit_details']:
            print(f"\n🔍 AI Generated Commits Details:")
            for detail in results['commit_details'][:10]:  # 最初の10件表示
                print(f"  {detail['sha']} ({detail['type']}) {detail['date'][:10]} {detail['subject'][:50]}...")

 def setup_logging(verbose: bool = False):
    """ログ設定をセットアップ"""
    log_level = logging.DEBUG if verbose else logging.INFO
    log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
    
    logging.basicConfig(
        level=log_level,
        format=log_format,
        handlers=[
            logging.StreamHandler(),
            logging.FileHandler('repo_analysis.log')
        ]
    )

 def main():
    parser = argparse.ArgumentParser(description='Analyze AI-generated code in local git repository')
    parser.add_argument('repo_path', help='Path to local git repository')
    parser.add_argument('--since', help='Start date for analysis (e.g., "2024-01-01", "1 month ago", "2024-07-01T00:00:00")')
    parser.add_argument('--until', help='End date for analysis (e.g., "2024-12-31", "yesterday", "2024-07-31T23:59:59")')
    parser.add_argument('--verbose', '-v', action='store_true', help='Enable verbose logging')
    
    args = parser.parse_args()
    
    # ログ設定
    setup_logging(args.verbose)
    logger = logging.getLogger(__name__)
    
    try:
        analyzer = LocalRepoAnalyzer(args.repo_path, args.since, args.until)
        results = analyzer.analyze()
        analyzer.print_report(results)
        
        # JSON でも保存
        import os
        repo_name = os.path.basename(args.repo_path.rstrip('/'))
        
        # 期間情報をファイル名に含める
        filename_parts = [repo_name, 'local_analysis']
        if args.since:
            filename_parts.append(f'since_{args.since.replace(" ", "_").replace(":", "_")}')
        if args.until:
            filename_parts.append(f'until_{args.until.replace(" ", "_").replace(":", "_")}')
        
        output_file = '_'.join(filename_parts) + '.json'
        
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(results, f, indent=2, ensure_ascii=False)
        
        logger.info(f"Analysis results saved to: {output_file}")
        print(f"\n📄 Detailed results saved to: {output_file}")
        
    except Exception as e:
        logger.error(f"Analysis failed: {e}")
        raise

 if __name__ == '__main__':
    main()
diff --git a/analyze_repo.py b/analyze_repo.py
 #!/usr/bin/env python3
 """
 GitHub リポジトリの AI 生成コード比率分析ツール
 ClaudeCode と Devin で書かれたコードの比率を分析する
 """

 import requests
 import json
 import re
 from typing import Dict, List, Tuple
 from datetime import datetime
 import argparse

 class RepoAnalyzer:
    def __init__(self, owner: str, repo: str, github_token: str = None):
        self.owner = owner
        self.repo = repo
        self.token = github_token
        self.headers = {}
        if github_token:
            self.headers['Authorization'] = f'token {github_token}'
        
        # AI生成コミットのパターン
        self.ai_patterns = {
            'claude_code': [
                r'🤖 Generated with \[Claude Code\]',
                r'Co-Authored-By: Claude <noreply@anthropic\.com>',
                r'Generated with Claude Code',
            ],
            'devin': [
                r'Generated by Devin',
                r'Co-authored-by: Devin',
                r'Devin.*generated',
                # Devinの詳細なコミットメッセージパターン
                r'- Modified .+\n- This changes from .+ to .+\n- .+\n- Makes it .+\n- Maintains .+',
                r'^[A-Z][a-z]+ .+ by .+\n- Modified',
                r'- This changes from .+ to .+',
                r'- Makes it easier to .+',
                r'- Maintains backward compatibility',
                # 複数の箇条書きで詳細説明があるパターン
                r'(?:- [A-Z].+\n){3,}',
            ]
        }
    
    def get_commits(self) -> List[Dict]:
        """すべてのコミットを取得"""
        commits = []
        page = 1
        per_page = 100
        
        while True:
            url = f'https://api.github.com/repos/{self.owner}/{self.repo}/commits'
            params = {'page': page, 'per_page': per_page}
            
            response = requests.get(url, headers=self.headers, params=params)
            if response.status_code != 200:
                print(f"Error fetching commits: {response.status_code}")
                break
            
            page_commits = response.json()
            if not page_commits:
                break
            
            commits.extend(page_commits)
            page += 1
            
            # API制限を考慮して最初の500コミットに制限
            if len(commits) >= 500:
                break
        
        return commits
    
    def classify_commit(self, commit: Dict) -> str:
        """コミットをAI生成かどうか分類"""
        message = commit['commit']['message']
        
        # ClaudeCode パターンチェック
        for pattern in self.ai_patterns['claude_code']:
            if re.search(pattern, message, re.IGNORECASE | re.MULTILINE):
                return 'claude_code'
        
        # Devin パターンチェック
        for pattern in self.ai_patterns['devin']:
            if re.search(pattern, message, re.IGNORECASE | re.MULTILINE):
                return 'devin'
        
        # Devinの構造的特徴をチェック
        if self._is_devin_style_commit(message):
            return 'devin'
        
        # Author/Committerもチェック
        author_email = commit['commit']['author']['email']
        committer_email = commit['commit']['committer']['email']
        
        if 'anthropic.com' in author_email or 'anthropic.com' in committer_email:
            return 'claude_code'
        
        return 'human'
    
    def _is_devin_style_commit(self, message: str) -> bool:
        """Devinスタイルのコミットメッセージかどうか判定"""
        lines = message.split('\n')
        
        # 基本構造チェック: タイトル + 空行 + 箇条書きリスト
        if len(lines) < 4:
            return False
        
        # 2行目が空行かチェック
        if len(lines) >= 2 and lines[1].strip() != '':
            return False
        
        # 箇条書き行をカウント
        bullet_lines = 0
        for line in lines[2:]:  # 3行目以降
            if line.strip().startswith('- '):
                bullet_lines += 1
        
        # 3個以上の箇条書きがあり、特定のフレーズを含む場合はDevin
        if bullet_lines >= 3:
            message_lower = message.lower()
            devin_phrases = [
                'this changes from',
                'makes it easier',
                'maintains backward compatibility',
                'modified',
                'improved',
                'updated'
            ]
            
            phrase_count = sum(1 for phrase in devin_phrases if phrase in message_lower)
            return phrase_count >= 2
        
        return False
    
    def get_commit_stats(self, commit_sha: str) -> Tuple[int, int]:
        """特定コミットの変更統計を取得 (追加行数, 削除行数)"""
        url = f'https://api.github.com/repos/{self.owner}/{self.repo}/commits/{commit_sha}'
        
        response = requests.get(url, headers=self.headers)
        if response.status_code != 200:
            return 0, 0
        
        commit_data = response.json()
        stats = commit_data.get('stats', {})
        
        return stats.get('additions', 0), stats.get('deletions', 0)
    
    def analyze(self) -> Dict:
        """リポジトリを分析"""
        print(f"Analyzing {self.owner}/{self.repo}...")
        
        commits = self.get_commits()
        print(f"Found {len(commits)} commits")
        
        results = {
            'total_commits': len(commits),
            'claude_code': {'commits': 0, 'additions': 0, 'deletions': 0},
            'devin': {'commits': 0, 'additions': 0, 'deletions': 0},
            'human': {'commits': 0, 'additions': 0, 'deletions': 0},
            'analysis_date': datetime.now().isoformat(),
            'repository': f'{self.owner}/{self.repo}'
        }
        
        for i, commit in enumerate(commits):
            if i % 50 == 0:
                print(f"Processing commit {i+1}/{len(commits)}")
            
            commit_type = self.classify_commit(commit)
            results[commit_type]['commits'] += 1
            
            # 変更統計を取得（API制限を考慮して最初の100コミットのみ詳細分析）
            if i < 100:
                additions, deletions = self.get_commit_stats(commit['sha'])
                results[commit_type]['additions'] += additions
                results[commit_type]['deletions'] += deletions
        
        return results
    
    def print_report(self, results: Dict):
        """分析結果をレポート出力"""
        print("\n" + "="*60)
        print(f"AI Code Analysis Report - {results['repository']}")
        print("="*60)
        
        total_commits = results['total_commits']
        
        print(f"\n📊 Commit Summary:")
        print(f"Total Commits: {total_commits}")
        
        for ai_type in ['claude_code', 'devin', 'human']:
            count = results[ai_type]['commits']
            percentage = (count / total_commits * 100) if total_commits > 0 else 0
            print(f"{ai_type.replace('_', ' ').title()}: {count} ({percentage:.1f}%)")
        
        print(f"\n📝 Code Changes (first 100 commits):")
        total_additions = sum(results[t]['additions'] for t in ['claude_code', 'devin', 'human'])
        total_deletions = sum(results[t]['deletions'] for t in ['claude_code', 'devin', 'human'])
        
        if total_additions > 0:
            for ai_type in ['claude_code', 'devin', 'human']:
                additions = results[ai_type]['additions']
                percentage = (additions / total_additions * 100) if total_additions > 0 else 0
                print(f"{ai_type.replace('_', ' ').title()} additions: {additions} lines ({percentage:.1f}%)")
        
        print(f"\nTotal additions: {total_additions} lines")
        print(f"Total deletions: {total_deletions} lines")
        
        # AI vs Human ratio
        ai_commits = results['claude_code']['commits'] + results['devin']['commits']
        human_commits = results['human']['commits']
        
        if total_commits > 0:
            ai_ratio = (ai_commits / total_commits * 100)
            print(f"\n🤖 AI Generated: {ai_ratio:.1f}%")
            print(f"👤 Human Generated: {100 - ai_ratio:.1f}%")

 def main():
    parser = argparse.ArgumentParser(description='Analyze AI-generated code in GitHub repository')
    parser.add_argument('repo_url', help='GitHub repository URL (e.g., https://github.com/owner/repo)')
    parser.add_argument('--token', help='GitHub personal access token for higher rate limits')
    
    args = parser.parse_args()
    
    # URL から owner/repo を抽出
    import re
    match = re.match(r'https://github\.com/([^/]+)/([^/]+)', args.repo_url.rstrip('/'))
    if not match:
        print("Invalid GitHub URL format")
        return
    
    owner, repo = match.groups()
    
    analyzer = RepoAnalyzer(owner, repo, args.token)
    results = analyzer.analyze()
    analyzer.print_report(results)
    
    # JSON でも保存
    output_file = f'{repo}_analysis.json'
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(results, f, indent=2, ensure_ascii=False)
    
    print(f"\n📄 Detailed results saved to: {output_file}")

 if __name__ == '__main__':
    main()
	#!/usr/bin/env python3
	"""
	ローカルgitリポジトリのAI生成コード比率分析ツール
	"""

	import subprocess
	import re
	import json
	import logging
	from datetime import datetime, timezone
	from typing import Dict, List, Tuple, Optional
	import argparse

	class LocalRepoAnalyzer:
	def __init__(self, repo_path: str, since: Optional[str] = None, until: Optional[str] = None):
	self.repo_path = repo_path
	self.since = since
	self.until = until
	self.logger = logging.getLogger(__name__)

	# AI生成コミットのパターン
	self.ai_patterns = {
	'claude_code': [
	r'🤖 Generated with \[Claude Code\]',
	r'Co-Authored-By: Claude <noreply@anthropic\.com>',
	r'Generated with Claude Code',
	],
	'devin': [
	r'Generated by Devin',
	r'Co-authored-by: Devin',
	r'Devin.*generated',
	]
	}

	# Devin AI integration botのメールアドレス
	self.devin_emails = [
	'158243242+devin-ai-integration[bot]@users.noreply.github.com',
	'devin-ai-integration[bot]@users.noreply.github.com'
	]

	def get_commits(self) -> List[Dict]:
	"""gitログからコミット情報を取得"""
	cmd = [
	'git', '-C', self.repo_path, 'log',
	'--pretty=format:%H\|%an\|%ae\|%ad\|%s\|%b', '--date=iso', '--all'
	]

	# 期間指定がある場合はオプションを追加
	if self.since:
	cmd.extend(['--since', self.since])
	if self.until:
	cmd.extend(['--until', self.until])

	result = subprocess.run(cmd, capture_output=True, text=True)
	if result.returncode != 0:
	self.logger.error(f"Git command failed: {result.stderr}")
	raise Exception(f"Git command failed: {result.stderr}")

	commits = []
	current_commit = {}

	for line in result.stdout.split('\n'):
	if '\|' in line and len(line.split('\|')) >= 5:
	# 新しいコミット行
	if current_commit:
	commits.append(current_commit)

	parts = line.split('\|', 5)
	current_commit = {
	'sha': parts[0],
	'author': parts[1],
	'email': parts[2],
	'date': parts[3],
	'subject': parts[4],
	'body': parts[5] if len(parts) > 5 else ''
	}
	elif current_commit:
	# コミットメッセージの続き
	current_commit['body'] += '\n' + line

	if current_commit:
	commits.append(current_commit)

	return commits

	def classify_commit(self, commit: Dict) -> str:
	"""コミットをAI生成かどうか分類"""
	full_message = f"{commit['subject']}\n{commit['body']}"

	# メールアドレスでDevin判定
	if commit['email'] in self.devin_emails:
	return 'devin'

	# ClaudeCode パターンチェック
	for pattern in self.ai_patterns['claude_code']:
	if re.search(pattern, full_message, re.IGNORECASE \| re.MULTILINE):
	return 'claude_code'

	# Devin パターンチェック
	for pattern in self.ai_patterns['devin']:
	if re.search(pattern, full_message, re.IGNORECASE \| re.MULTILINE):
	return 'devin'

	# Devinの構造的特徴をチェック
	if self._is_devin_style_commit(full_message):
	return 'devin'

	# anthropic.comドメインチェック
	if 'anthropic.com' in commit['email']:
	return 'claude_code'

	return 'human'

	def _is_devin_style_commit(self, message: str) -> bool:
	"""Devinスタイルのコミットメッセージかどうか判定"""
	lines = message.split('\n')

	# 基本構造チェック: タイトル + 空行 + 箇条書きリスト
	if len(lines) < 4:
	return False

	# 箇条書き行をカウント
	bullet_lines = 0
	for line in lines:
	if line.strip().startswith('- '):
	bullet_lines += 1

	# 3個以上の箇条書きがあり、特定のフレーズを含む場合はDevin
	if bullet_lines >= 3:
	message_lower = message.lower()
	devin_phrases = [
	'this changes from',
	'makes it easier',
	'maintains backward compatibility',
	'modified',
	'improved',
	'updated'
	]

	phrase_count = sum(1 for phrase in devin_phrases if phrase in message_lower)
	return phrase_count >= 2

	return False

	def get_commit_stats(self, commit_sha: str) -> Tuple[int, int]:
	"""特定コミットの変更統計を取得"""
	cmd = ['git', '-C', self.repo_path, 'show', '--numstat', commit_sha]

	result = subprocess.run(cmd, capture_output=True, text=True)
	if result.returncode != 0:
	self.logger.warning(f"Failed to get stats for commit {commit_sha}: {result.stderr}")
	return 0, 0

	additions = 0
	deletions = 0

	for line in result.stdout.split('\n'):
	if '\t' in line:
	parts = line.split('\t')
	if len(parts) >= 2:
	try:
	if parts[0] != '-':
	additions += int(parts[0])
	if parts[1] != '-':
	deletions += int(parts[1])
	except ValueError:
	continue

	return additions, deletions

	def analyze(self) -> Dict:
	"""リポジトリを分析"""
	period_str = ""
	if self.since or self.until:
	period_parts = []
	if self.since:
	period_parts.append(f"since {self.since}")
	if self.until:
	period_parts.append(f"until {self.until}")
	period_str = f" ({', '.join(period_parts)})"

	self.logger.info(f"Analyzing {self.repo_path}{period_str}...")

	commits = self.get_commits()
	self.logger.info(f"Found {len(commits)} commits")

	results = {
	'total_commits': len(commits),
	'claude_code': {'commits': 0, 'additions': 0, 'deletions': 0},
	'devin': {'commits': 0, 'additions': 0, 'deletions': 0},
	'human': {'commits': 0, 'additions': 0, 'deletions': 0},
	'analysis_date': datetime.now().isoformat(),
	'repository': self.repo_path,
	'period': {
	'since': self.since,
	'until': self.until
	},
	'commit_details': []
	}

	for i, commit in enumerate(commits):
	if i % 50 == 0:
	self.logger.info(f"Processing commit {i+1}/{len(commits)}")

	commit_type = self.classify_commit(commit)
	results[commit_type]['commits'] += 1

	# 変更統計を取得
	additions, deletions = self.get_commit_stats(commit['sha'])
	results[commit_type]['additions'] += additions
	results[commit_type]['deletions'] += deletions

	# デバッグ用にAI生成コミットの詳細を保存
	if commit_type in ['claude_code', 'devin']:
	results['commit_details'].append({
	'sha': commit['sha'][:8],
	'type': commit_type,
	'author': commit['author'],
	'email': commit['email'],
	'date': commit['date'],
	'subject': commit['subject'],
	'additions': additions,
	'deletions': deletions
	})

	return results

	def print_report(self, results: Dict):
	"""分析結果をレポート出力"""
	self.logger.info("Generating analysis report")

	print("\n" + "="*60)
	period_info = ""
	if results['period']['since'] or results['period']['until']:
	period_parts = []
	if results['period']['since']:
	period_parts.append(f"Since: {results['period']['since']}")
	if results['period']['until']:
	period_parts.append(f"Until: {results['period']['until']}")
	period_info = f" ({', '.join(period_parts)})"

	print(f"AI Code Analysis Report - {results['repository']}{period_info}")
	print("="*60)

	total_commits = results['total_commits']

	print(f"\n📊 Commit Summary:")
	print(f"Total Commits: {total_commits}")

	for ai_type in ['claude_code', 'devin', 'human']:
	count = results[ai_type]['commits']
	percentage = (count / total_commits * 100) if total_commits > 0 else 0
	print(f"{ai_type.replace('_', ' ').title()}: {count} ({percentage:.1f}%)")

	print(f"\n📝 Code Changes:")
	total_additions = sum(results[t]['additions'] for t in ['claude_code', 'devin', 'human'])
	total_deletions = sum(results[t]['deletions'] for t in ['claude_code', 'devin', 'human'])

	if total_additions > 0:
	for ai_type in ['claude_code', 'devin', 'human']:
	additions = results[ai_type]['additions']
	percentage = (additions / total_additions * 100) if total_additions > 0 else 0
	print(f"{ai_type.replace('_', ' ').title()} additions: {additions} lines ({percentage:.1f}%)")

	print(f"\nTotal additions: {total_additions} lines")
	print(f"Total deletions: {total_deletions} lines")

	# AI vs Human ratio
	ai_commits = results['claude_code']['commits'] + results['devin']['commits']
	human_commits = results['human']['commits']

	if total_commits > 0:
	ai_ratio = (ai_commits / total_commits * 100)
	print(f"\n🤖 AI Generated: {ai_ratio:.1f}%")
	print(f"👤 Human Generated: {100 - ai_ratio:.1f}%")

	# AI生成コミットの詳細表示
	if results['commit_details']:
	print(f"\n🔍 AI Generated Commits Details:")
	for detail in results['commit_details'][:10]: # 最初の10件表示
	print(f" {detail['sha']} ({detail['type']}) {detail['date'][:10]} {detail['subject'][:50]}...")

	def setup_logging(verbose: bool = False):
	"""ログ設定をセットアップ"""
	log_level = logging.DEBUG if verbose else logging.INFO
	log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'

	logging.basicConfig(
	level=log_level,
	format=log_format,
	handlers=[
	logging.StreamHandler(),
	logging.FileHandler('repo_analysis.log')
	]
	)

	def main():
	parser = argparse.ArgumentParser(description='Analyze AI-generated code in local git repository')
	parser.add_argument('repo_path', help='Path to local git repository')
	parser.add_argument('--since', help='Start date for analysis (e.g., "2024-01-01", "1 month ago", "2024-07-01T00:00:00")')
	parser.add_argument('--until', help='End date for analysis (e.g., "2024-12-31", "yesterday", "2024-07-31T23:59:59")')
	parser.add_argument('--verbose', '-v', action='store_true', help='Enable verbose logging')

	args = parser.parse_args()

	# ログ設定
	setup_logging(args.verbose)
	logger = logging.getLogger(__name__)

	try:
	analyzer = LocalRepoAnalyzer(args.repo_path, args.since, args.until)
	results = analyzer.analyze()
	analyzer.print_report(results)

	# JSON でも保存
	import os
	repo_name = os.path.basename(args.repo_path.rstrip('/'))

	# 期間情報をファイル名に含める
	filename_parts = [repo_name, 'local_analysis']
	if args.since:
	filename_parts.append(f'since_{args.since.replace(" ", "_").replace(":", "_")}')
	if args.until:
	filename_parts.append(f'until_{args.until.replace(" ", "_").replace(":", "_")}')

	output_file = '_'.join(filename_parts) + '.json'

	with open(output_file, 'w', encoding='utf-8') as f:
	json.dump(results, f, indent=2, ensure_ascii=False)

	logger.info(f"Analysis results saved to: {output_file}")
	print(f"\n📄 Detailed results saved to: {output_file}")

	except Exception as e:
	logger.error(f"Analysis failed: {e}")
	raise

	if __name__ == '__main__':
	main()
	#!/usr/bin/env python3
	"""
	GitHub リポジトリの AI 生成コード比率分析ツール
	ClaudeCode と Devin で書かれたコードの比率を分析する
	"""

	import requests
	import json
	import re
	from typing import Dict, List, Tuple
	from datetime import datetime
	import argparse

	class RepoAnalyzer:
	def __init__(self, owner: str, repo: str, github_token: str = None):
	self.owner = owner
	self.repo = repo
	self.token = github_token
	self.headers = {}
	if github_token:
	self.headers['Authorization'] = f'token {github_token}'

	# AI生成コミットのパターン
	self.ai_patterns = {
	'claude_code': [
	r'🤖 Generated with \[Claude Code\]',
	r'Co-Authored-By: Claude <noreply@anthropic\.com>',
	r'Generated with Claude Code',
	],
	'devin': [
	r'Generated by Devin',
	r'Co-authored-by: Devin',
	r'Devin.*generated',
	# Devinの詳細なコミットメッセージパターン
	r'- Modified .+\n- This changes from .+ to .+\n- .+\n- Makes it .+\n- Maintains .+',
	r'^[A-Z][a-z]+ .+ by .+\n- Modified',
	r'- This changes from .+ to .+',
	r'- Makes it easier to .+',
	r'- Maintains backward compatibility',
	# 複数の箇条書きで詳細説明があるパターン
	r'(?:- [A-Z].+\n){3,}',
	]
	}

	def get_commits(self) -> List[Dict]:
	"""すべてのコミットを取得"""
	commits = []
	page = 1
	per_page = 100

	while True:
	url = f'https://api.github.com/repos/{self.owner}/{self.repo}/commits'
	params = {'page': page, 'per_page': per_page}

	response = requests.get(url, headers=self.headers, params=params)
	if response.status_code != 200:
	print(f"Error fetching commits: {response.status_code}")
	break

	page_commits = response.json()
	if not page_commits:
	break

	commits.extend(page_commits)
	page += 1

	# API制限を考慮して最初の500コミットに制限
	if len(commits) >= 500:
	break

	return commits

	def classify_commit(self, commit: Dict) -> str:
	"""コミットをAI生成かどうか分類"""
	message = commit['commit']['message']

	# ClaudeCode パターンチェック
	for pattern in self.ai_patterns['claude_code']:
	if re.search(pattern, message, re.IGNORECASE \| re.MULTILINE):
	return 'claude_code'

	# Devin パターンチェック
	for pattern in self.ai_patterns['devin']:
	if re.search(pattern, message, re.IGNORECASE \| re.MULTILINE):
	return 'devin'

	# Devinの構造的特徴をチェック
	if self._is_devin_style_commit(message):
	return 'devin'

	# Author/Committerもチェック
	author_email = commit['commit']['author']['email']
	committer_email = commit['commit']['committer']['email']

	if 'anthropic.com' in author_email or 'anthropic.com' in committer_email:
	return 'claude_code'

	return 'human'

	def _is_devin_style_commit(self, message: str) -> bool:
	"""Devinスタイルのコミットメッセージかどうか判定"""
	lines = message.split('\n')

	# 基本構造チェック: タイトル + 空行 + 箇条書きリスト
	if len(lines) < 4:
	return False

	# 2行目が空行かチェック
	if len(lines) >= 2 and lines[1].strip() != '':
	return False

	# 箇条書き行をカウント
	bullet_lines = 0
	for line in lines[2:]: # 3行目以降
	if line.strip().startswith('- '):
	bullet_lines += 1

	# 3個以上の箇条書きがあり、特定のフレーズを含む場合はDevin
	if bullet_lines >= 3:
	message_lower = message.lower()
	devin_phrases = [
	'this changes from',
	'makes it easier',
	'maintains backward compatibility',
	'modified',
	'improved',
	'updated'
	]

	phrase_count = sum(1 for phrase in devin_phrases if phrase in message_lower)
	return phrase_count >= 2

	return False

	def get_commit_stats(self, commit_sha: str) -> Tuple[int, int]:
	"""特定コミットの変更統計を取得 (追加行数, 削除行数)"""
	url = f'https://api.github.com/repos/{self.owner}/{self.repo}/commits/{commit_sha}'

	response = requests.get(url, headers=self.headers)
	if response.status_code != 200:
	return 0, 0

	commit_data = response.json()
	stats = commit_data.get('stats', {})

	return stats.get('additions', 0), stats.get('deletions', 0)

	def analyze(self) -> Dict:
	"""リポジトリを分析"""
	print(f"Analyzing {self.owner}/{self.repo}...")

	commits = self.get_commits()
	print(f"Found {len(commits)} commits")

	results = {
	'total_commits': len(commits),
	'claude_code': {'commits': 0, 'additions': 0, 'deletions': 0},
	'devin': {'commits': 0, 'additions': 0, 'deletions': 0},
	'human': {'commits': 0, 'additions': 0, 'deletions': 0},
	'analysis_date': datetime.now().isoformat(),
	'repository': f'{self.owner}/{self.repo}'
	}

	for i, commit in enumerate(commits):
	if i % 50 == 0:
	print(f"Processing commit {i+1}/{len(commits)}")

	commit_type = self.classify_commit(commit)
	results[commit_type]['commits'] += 1

	# 変更統計を取得（API制限を考慮して最初の100コミットのみ詳細分析）
	if i < 100:
	additions, deletions = self.get_commit_stats(commit['sha'])
	results[commit_type]['additions'] += additions
	results[commit_type]['deletions'] += deletions

	return results

	def print_report(self, results: Dict):
	"""分析結果をレポート出力"""
	print("\n" + "="*60)
	print(f"AI Code Analysis Report - {results['repository']}")
	print("="*60)

	total_commits = results['total_commits']

	print(f"\n📊 Commit Summary:")
	print(f"Total Commits: {total_commits}")

	for ai_type in ['claude_code', 'devin', 'human']:
	count = results[ai_type]['commits']
	percentage = (count / total_commits * 100) if total_commits > 0 else 0
	print(f"{ai_type.replace('_', ' ').title()}: {count} ({percentage:.1f}%)")

	print(f"\n📝 Code Changes (first 100 commits):")
	total_additions = sum(results[t]['additions'] for t in ['claude_code', 'devin', 'human'])
	total_deletions = sum(results[t]['deletions'] for t in ['claude_code', 'devin', 'human'])

	if total_additions > 0:
	for ai_type in ['claude_code', 'devin', 'human']:
	additions = results[ai_type]['additions']
	percentage = (additions / total_additions * 100) if total_additions > 0 else 0
	print(f"{ai_type.replace('_', ' ').title()} additions: {additions} lines ({percentage:.1f}%)")

	print(f"\nTotal additions: {total_additions} lines")
	print(f"Total deletions: {total_deletions} lines")

	# AI vs Human ratio
	ai_commits = results['claude_code']['commits'] + results['devin']['commits']
	human_commits = results['human']['commits']

	if total_commits > 0:
	ai_ratio = (ai_commits / total_commits * 100)
	print(f"\n🤖 AI Generated: {ai_ratio:.1f}%")
	print(f"👤 Human Generated: {100 - ai_ratio:.1f}%")

	def main():
	parser = argparse.ArgumentParser(description='Analyze AI-generated code in GitHub repository')
	parser.add_argument('repo_url', help='GitHub repository URL (e.g., https://github.com/owner/repo)')
	parser.add_argument('--token', help='GitHub personal access token for higher rate limits')

	args = parser.parse_args()

	# URL から owner/repo を抽出
	import re
	match = re.match(r'https://github\.com/([^/]+)/([^/]+)', args.repo_url.rstrip('/'))
	if not match:
	print("Invalid GitHub URL format")
	return

	owner, repo = match.groups()

	analyzer = RepoAnalyzer(owner, repo, args.token)
	results = analyzer.analyze()
	analyzer.print_report(results)

	# JSON でも保存
	output_file = f'{repo}_analysis.json'
	with open(output_file, 'w', encoding='utf-8') as f:
	json.dump(results, f, indent=2, ensure_ascii=False)

	print(f"\n📄 Detailed results saved to: {output_file}")

	if __name__ == '__main__':
	main()