project-standalo-sonic-cloud/skills/documentation-generator/scripts/analyze_project.py

490 lines
18 KiB
Python

#!/usr/bin/env python3
"""
Project Analyzer for Documentation Generation
Analyzes project structure and outputs YAML for documentation generation.
"""
import os
import sys
import json
import re
from pathlib import Path
from typing import Dict, List, Any, Optional
from datetime import datetime
# Try to import yaml, but provide fallback
try:
import yaml
except ImportError:
yaml = None
def detect_project_type(root_path: Path) -> Dict[str, Any]:
"""Detect project type from config files."""
indicators = {
'node': ['package.json'],
'python': ['requirements.txt', 'pyproject.toml', 'setup.py', 'Pipfile'],
'rust': ['Cargo.toml'],
'go': ['go.mod'],
'java': ['pom.xml', 'build.gradle', 'build.gradle.kts'],
'dotnet': list(root_path.glob('*.csproj')) + list(root_path.glob('*.sln')),
'ruby': ['Gemfile'],
'php': ['composer.json'],
}
for lang, files in indicators.items():
if isinstance(files, list) and isinstance(files[0], str):
for f in files:
if (root_path / f).exists():
return {'type': lang, 'config_file': f}
elif files: # Already Path objects from glob
return {'type': lang, 'config_file': str(files[0].name)}
return {'type': 'other', 'config_file': None}
def parse_package_json(root_path: Path) -> Dict[str, Any]:
"""Parse package.json for Node.js projects."""
pkg_path = root_path / 'package.json'
if not pkg_path.exists():
return {}
with open(pkg_path, 'r') as f:
data = json.load(f)
deps = data.get('dependencies', {})
dev_deps = data.get('devDependencies', {})
# Detect framework
framework = None
if 'next' in deps:
framework = 'Next.js'
elif 'react' in deps:
framework = 'React'
elif 'vue' in deps:
framework = 'Vue.js'
elif '@angular/core' in deps:
framework = 'Angular'
elif 'express' in deps:
framework = 'Express'
elif 'fastify' in deps:
framework = 'Fastify'
# Detect database
database = None
if '@prisma/client' in deps:
database = 'Prisma (PostgreSQL/MySQL/SQLite)'
elif 'mongoose' in deps:
database = 'MongoDB (Mongoose)'
elif 'typeorm' in deps:
database = 'TypeORM'
elif 'sequelize' in deps:
database = 'Sequelize'
# Detect UI framework
ui_framework = None
if 'tailwindcss' in dev_deps or 'tailwindcss' in deps:
ui_framework = 'Tailwind CSS'
if '@mui/material' in deps:
ui_framework = 'Material UI'
elif '@chakra-ui/react' in deps:
ui_framework = 'Chakra UI'
# Categorize dependencies
key_deps = []
dep_categories = {
'core': ['react', 'next', 'vue', 'angular', 'express', 'fastify'],
'database': ['@prisma/client', 'mongoose', 'typeorm', 'sequelize', 'pg', 'mysql2'],
'auth': ['next-auth', 'passport', 'jsonwebtoken', '@auth0/nextjs-auth0'],
'ui': ['@mui/material', '@chakra-ui/react', 'antd', '@radix-ui'],
'state': ['zustand', 'redux', '@reduxjs/toolkit', 'recoil', 'jotai'],
'testing': ['jest', 'vitest', '@testing-library/react', 'cypress'],
}
for dep, version in {**deps, **dev_deps}.items():
category = 'utility'
for cat, patterns in dep_categories.items():
if any(p in dep for p in patterns):
category = cat
break
if category != 'utility' or dep in ['axios', 'zod', 'date-fns', 'lodash']:
key_deps.append({
'name': dep,
'version': version.replace('^', '').replace('~', ''),
'category': category,
'purpose': get_dep_purpose(dep)
})
return {
'name': data.get('name', 'Unknown'),
'version': data.get('version', '0.0.0'),
'description': data.get('description', ''),
'framework': framework,
'database': database,
'ui_framework': ui_framework,
'key_dependencies': key_deps[:15], # Limit to 15 most important
'scripts': data.get('scripts', {})
}
def get_dep_purpose(dep_name: str) -> str:
"""Get plain English purpose for common dependencies."""
purposes = {
'react': 'UI component library',
'next': 'Full-stack React framework',
'vue': 'Progressive UI framework',
'express': 'Web server framework',
'fastify': 'High-performance web framework',
'@prisma/client': 'Database ORM and query builder',
'mongoose': 'MongoDB object modeling',
'typeorm': 'TypeScript ORM',
'sequelize': 'SQL ORM',
'next-auth': 'Authentication for Next.js',
'passport': 'Authentication middleware',
'jsonwebtoken': 'JWT token handling',
'@mui/material': 'Material Design components',
'@chakra-ui/react': 'Accessible component library',
'tailwindcss': 'Utility-first CSS framework',
'zustand': 'State management',
'redux': 'Predictable state container',
'@reduxjs/toolkit': 'Redux development toolkit',
'axios': 'HTTP client',
'zod': 'Schema validation',
'date-fns': 'Date utility functions',
'lodash': 'Utility functions',
'jest': 'Testing framework',
'vitest': 'Fast unit testing',
'@testing-library/react': 'React component testing',
'cypress': 'End-to-end testing',
}
return purposes.get(dep_name, 'Utility library')
def scan_directory_structure(root_path: Path) -> Dict[str, Any]:
"""Scan and categorize directory structure."""
ignore_dirs = {
'node_modules', '.git', '.next', '__pycache__', 'venv',
'.venv', 'dist', 'build', '.cache', 'coverage', '.turbo'
}
common_purposes = {
'src': 'Main source code directory',
'app': 'Application code (Next.js App Router)',
'pages': 'Page components (Next.js Pages Router)',
'components': 'Reusable UI components',
'lib': 'Shared utilities and libraries',
'utils': 'Utility functions',
'hooks': 'Custom React hooks',
'context': 'React context providers',
'store': 'State management',
'styles': 'CSS and styling',
'types': 'TypeScript type definitions',
'api': 'API route handlers',
'services': 'Business logic services',
'models': 'Data models/entities',
'prisma': 'Database schema and migrations',
'public': 'Static assets',
'tests': 'Test files',
'__tests__': 'Jest test files',
'test': 'Test files',
'spec': 'Test specifications',
'docs': 'Documentation',
'scripts': 'Build and utility scripts',
'config': 'Configuration files',
}
directories = []
source_dir = None
# Find main source directory
for candidate in ['src', 'app', 'lib', 'source']:
if (root_path / candidate).is_dir():
source_dir = candidate
break
# Scan directories
for item in sorted(root_path.iterdir()):
if item.is_dir() and item.name not in ignore_dirs and not item.name.startswith('.'):
file_count = sum(1 for _ in item.rglob('*') if _.is_file())
key_files = [
f.name for f in item.iterdir()
if f.is_file() and f.suffix in ['.ts', '.tsx', '.js', '.jsx', '.py', '.rs', '.go']
][:5]
directories.append({
'path': item.name,
'purpose': common_purposes.get(item.name, 'Project directory'),
'file_count': file_count,
'key_files': key_files
})
return {
'source_dir': source_dir or '.',
'directories': directories
}
def detect_features(root_path: Path) -> List[Dict[str, Any]]:
"""Detect main features from code patterns."""
features = []
feature_patterns = {
'authentication': {
'keywords': ['auth', 'login', 'logout', 'session', 'jwt', 'oauth'],
'description': 'User authentication and session management',
'technical_notes': 'Handles user login, logout, and session tokens'
},
'user_management': {
'keywords': ['user', 'profile', 'account', 'register', 'signup'],
'description': 'User account creation and profile management',
'technical_notes': 'CRUD operations for user data'
},
'api': {
'keywords': ['api', 'endpoint', 'route'],
'description': 'REST API endpoints for data operations',
'technical_notes': 'HTTP handlers for client-server communication'
},
'database': {
'keywords': ['prisma', 'model', 'entity', 'schema', 'migration'],
'description': 'Database storage and data persistence',
'technical_notes': 'ORM-based data layer with migrations'
},
'file_upload': {
'keywords': ['upload', 'file', 'storage', 's3', 'blob'],
'description': 'File upload and storage functionality',
'technical_notes': 'Handles file uploads and cloud storage'
},
'search': {
'keywords': ['search', 'filter', 'query'],
'description': 'Search and filtering capabilities',
'technical_notes': 'Full-text search or database queries'
},
}
# Scan for features
all_files = list(root_path.rglob('*.ts')) + list(root_path.rglob('*.tsx')) + \
list(root_path.rglob('*.js')) + list(root_path.rglob('*.jsx'))
file_names = [f.stem.lower() for f in all_files]
file_paths = [str(f.relative_to(root_path)).lower() for f in all_files]
for feature_name, config in feature_patterns.items():
found_files = []
for keyword in config['keywords']:
found_files.extend([
str(f.relative_to(root_path)) for f in all_files
if keyword in str(f).lower()
])
if found_files:
features.append({
'name': feature_name.replace('_', ' ').title(),
'description': config['description'],
'technical_notes': config['technical_notes'],
'files': list(set(found_files))[:5]
})
return features
def find_components(root_path: Path) -> List[Dict[str, Any]]:
"""Find UI components in the project."""
components = []
component_dirs = ['components', 'src/components', 'app/components']
for comp_dir in component_dirs:
comp_path = root_path / comp_dir
if comp_path.exists():
for file in comp_path.rglob('*.tsx'):
if file.name.startswith('_') or file.name == 'index.tsx':
continue
name = file.stem
if name[0].isupper(): # Component names are PascalCase
components.append({
'id': f'component_{name.lower()}',
'name': name,
'path': str(file.relative_to(root_path)),
'description': f'{name} component',
'props': 'See source file'
})
return components[:20] # Limit to 20 components
def find_api_endpoints(root_path: Path) -> List[Dict[str, Any]]:
"""Find API endpoints in the project."""
endpoints = []
# Next.js App Router: app/api/**/route.ts
api_dir = root_path / 'app' / 'api'
if api_dir.exists():
for route_file in api_dir.rglob('route.ts'):
path_parts = route_file.parent.relative_to(api_dir).parts
api_path = '/api/' + '/'.join(path_parts)
# Read file to detect methods
content = route_file.read_text()
methods = []
for method in ['GET', 'POST', 'PUT', 'PATCH', 'DELETE']:
if f'export async function {method}' in content or f'export function {method}' in content:
methods.append(method)
for method in methods:
endpoints.append({
'method': method,
'path': api_path.replace('[', ':').replace(']', ''),
'handler_file': str(route_file.relative_to(root_path)),
'description': f'{method} {api_path}',
'technical_notes': 'Next.js App Router endpoint'
})
# Next.js Pages Router: pages/api/**/*.ts
pages_api = root_path / 'pages' / 'api'
if pages_api.exists():
for api_file in pages_api.rglob('*.ts'):
path_parts = api_file.relative_to(pages_api).with_suffix('').parts
api_path = '/api/' + '/'.join(path_parts)
endpoints.append({
'method': 'MULTIPLE',
'path': api_path.replace('[', ':').replace(']', ''),
'handler_file': str(api_file.relative_to(root_path)),
'description': f'API endpoint at {api_path}',
'technical_notes': 'Next.js Pages Router endpoint'
})
return endpoints
def find_data_models(root_path: Path) -> List[Dict[str, Any]]:
"""Find data models in the project."""
models = []
# Prisma schema
prisma_schema = root_path / 'prisma' / 'schema.prisma'
if prisma_schema.exists():
content = prisma_schema.read_text()
model_pattern = re.compile(r'model\s+(\w+)\s*\{([^}]+)\}', re.MULTILINE)
for match in model_pattern.finditer(content):
model_name = match.group(1)
model_body = match.group(2)
# Extract fields
fields = []
for line in model_body.strip().split('\n'):
line = line.strip()
if line and not line.startswith('@@') and not line.startswith('//'):
parts = line.split()
if len(parts) >= 2:
fields.append({
'name': parts[0],
'type': parts[1],
'description': f'{parts[0]} field'
})
models.append({
'name': model_name,
'description': f'{model_name} data model',
'fields': fields[:10] # Limit fields
})
return models
def collect_glossary_terms(features: List, components: List, endpoints: List) -> List[Dict[str, str]]:
"""Collect technical terms that need definitions."""
common_terms = {
'API': 'Application Programming Interface - a way for different software to communicate',
'REST': 'Representational State Transfer - a standard way to design web APIs',
'Component': 'A reusable piece of the user interface',
'Endpoint': 'A specific URL that the application responds to',
'ORM': 'Object-Relational Mapping - connects code to database tables',
'JWT': 'JSON Web Token - a secure way to transmit user identity',
'CRUD': 'Create, Read, Update, Delete - basic data operations',
'Props': 'Properties passed to a component to customize it',
'State': 'Data that can change and affects what users see',
'Hook': 'A way to add features to React components',
'Migration': 'A controlled change to database structure',
'Schema': 'The structure/shape of data',
'Route': 'A URL path that maps to specific functionality',
'Handler': 'Code that responds to a specific request',
}
return [{'term': k, 'definition': v} for k, v in common_terms.items()]
def generate_analysis(root_path: Path) -> Dict[str, Any]:
"""Generate complete project analysis."""
project_info = detect_project_type(root_path)
pkg_info = parse_package_json(root_path) if project_info['type'] == 'node' else {}
structure = scan_directory_structure(root_path)
features = detect_features(root_path)
components = find_components(root_path)
endpoints = find_api_endpoints(root_path)
models = find_data_models(root_path)
glossary = collect_glossary_terms(features, components, endpoints)
return {
'analysis_timestamp': datetime.now().isoformat(),
'project': {
'name': pkg_info.get('name', root_path.name),
'version': pkg_info.get('version', '0.0.0'),
'description': pkg_info.get('description', ''),
'type': project_info['type'],
},
'tech_stack': {
'language': 'TypeScript' if project_info['type'] == 'node' else project_info['type'],
'framework': pkg_info.get('framework'),
'database': pkg_info.get('database'),
'ui_framework': pkg_info.get('ui_framework'),
'key_dependencies': pkg_info.get('key_dependencies', []),
},
'structure': structure,
'features': features,
'components': components,
'api_endpoints': endpoints,
'data_models': models,
'glossary_terms': glossary,
}
def output_yaml(data: Dict[str, Any], output_path: Optional[Path] = None):
"""Output analysis as YAML."""
if yaml:
output = yaml.dump(data, default_flow_style=False, allow_unicode=True, sort_keys=False)
else:
# Fallback to JSON if yaml not available
output = json.dumps(data, indent=2)
if output_path:
output_path.write_text(output)
print(f"Analysis written to: {output_path}")
else:
print(output)
def main():
"""Main entry point."""
root_path = Path.cwd()
if len(sys.argv) > 1:
root_path = Path(sys.argv[1])
if not root_path.exists():
print(f"Error: Path does not exist: {root_path}", file=sys.stderr)
sys.exit(1)
output_path = None
if len(sys.argv) > 2:
output_path = Path(sys.argv[2])
analysis = generate_analysis(root_path)
output_yaml(analysis, output_path)
if __name__ == '__main__':
main()