490 lines
18 KiB
Python
490 lines
18 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Project Analyzer for Documentation Generation
|
|
Analyzes project structure and outputs YAML for documentation generation.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import json
|
|
import re
|
|
from pathlib import Path
|
|
from typing import Dict, List, Any, Optional
|
|
from datetime import datetime
|
|
|
|
# Try to import yaml, but provide fallback
|
|
try:
|
|
import yaml
|
|
except ImportError:
|
|
yaml = None
|
|
|
|
|
|
def detect_project_type(root_path: Path) -> Dict[str, Any]:
|
|
"""Detect project type from config files."""
|
|
indicators = {
|
|
'node': ['package.json'],
|
|
'python': ['requirements.txt', 'pyproject.toml', 'setup.py', 'Pipfile'],
|
|
'rust': ['Cargo.toml'],
|
|
'go': ['go.mod'],
|
|
'java': ['pom.xml', 'build.gradle', 'build.gradle.kts'],
|
|
'dotnet': list(root_path.glob('*.csproj')) + list(root_path.glob('*.sln')),
|
|
'ruby': ['Gemfile'],
|
|
'php': ['composer.json'],
|
|
}
|
|
|
|
for lang, files in indicators.items():
|
|
if isinstance(files, list) and isinstance(files[0], str):
|
|
for f in files:
|
|
if (root_path / f).exists():
|
|
return {'type': lang, 'config_file': f}
|
|
elif files: # Already Path objects from glob
|
|
return {'type': lang, 'config_file': str(files[0].name)}
|
|
|
|
return {'type': 'other', 'config_file': None}
|
|
|
|
|
|
def parse_package_json(root_path: Path) -> Dict[str, Any]:
|
|
"""Parse package.json for Node.js projects."""
|
|
pkg_path = root_path / 'package.json'
|
|
if not pkg_path.exists():
|
|
return {}
|
|
|
|
with open(pkg_path, 'r') as f:
|
|
data = json.load(f)
|
|
|
|
deps = data.get('dependencies', {})
|
|
dev_deps = data.get('devDependencies', {})
|
|
|
|
# Detect framework
|
|
framework = None
|
|
if 'next' in deps:
|
|
framework = 'Next.js'
|
|
elif 'react' in deps:
|
|
framework = 'React'
|
|
elif 'vue' in deps:
|
|
framework = 'Vue.js'
|
|
elif '@angular/core' in deps:
|
|
framework = 'Angular'
|
|
elif 'express' in deps:
|
|
framework = 'Express'
|
|
elif 'fastify' in deps:
|
|
framework = 'Fastify'
|
|
|
|
# Detect database
|
|
database = None
|
|
if '@prisma/client' in deps:
|
|
database = 'Prisma (PostgreSQL/MySQL/SQLite)'
|
|
elif 'mongoose' in deps:
|
|
database = 'MongoDB (Mongoose)'
|
|
elif 'typeorm' in deps:
|
|
database = 'TypeORM'
|
|
elif 'sequelize' in deps:
|
|
database = 'Sequelize'
|
|
|
|
# Detect UI framework
|
|
ui_framework = None
|
|
if 'tailwindcss' in dev_deps or 'tailwindcss' in deps:
|
|
ui_framework = 'Tailwind CSS'
|
|
if '@mui/material' in deps:
|
|
ui_framework = 'Material UI'
|
|
elif '@chakra-ui/react' in deps:
|
|
ui_framework = 'Chakra UI'
|
|
|
|
# Categorize dependencies
|
|
key_deps = []
|
|
dep_categories = {
|
|
'core': ['react', 'next', 'vue', 'angular', 'express', 'fastify'],
|
|
'database': ['@prisma/client', 'mongoose', 'typeorm', 'sequelize', 'pg', 'mysql2'],
|
|
'auth': ['next-auth', 'passport', 'jsonwebtoken', '@auth0/nextjs-auth0'],
|
|
'ui': ['@mui/material', '@chakra-ui/react', 'antd', '@radix-ui'],
|
|
'state': ['zustand', 'redux', '@reduxjs/toolkit', 'recoil', 'jotai'],
|
|
'testing': ['jest', 'vitest', '@testing-library/react', 'cypress'],
|
|
}
|
|
|
|
for dep, version in {**deps, **dev_deps}.items():
|
|
category = 'utility'
|
|
for cat, patterns in dep_categories.items():
|
|
if any(p in dep for p in patterns):
|
|
category = cat
|
|
break
|
|
|
|
if category != 'utility' or dep in ['axios', 'zod', 'date-fns', 'lodash']:
|
|
key_deps.append({
|
|
'name': dep,
|
|
'version': version.replace('^', '').replace('~', ''),
|
|
'category': category,
|
|
'purpose': get_dep_purpose(dep)
|
|
})
|
|
|
|
return {
|
|
'name': data.get('name', 'Unknown'),
|
|
'version': data.get('version', '0.0.0'),
|
|
'description': data.get('description', ''),
|
|
'framework': framework,
|
|
'database': database,
|
|
'ui_framework': ui_framework,
|
|
'key_dependencies': key_deps[:15], # Limit to 15 most important
|
|
'scripts': data.get('scripts', {})
|
|
}
|
|
|
|
|
|
def get_dep_purpose(dep_name: str) -> str:
|
|
"""Get plain English purpose for common dependencies."""
|
|
purposes = {
|
|
'react': 'UI component library',
|
|
'next': 'Full-stack React framework',
|
|
'vue': 'Progressive UI framework',
|
|
'express': 'Web server framework',
|
|
'fastify': 'High-performance web framework',
|
|
'@prisma/client': 'Database ORM and query builder',
|
|
'mongoose': 'MongoDB object modeling',
|
|
'typeorm': 'TypeScript ORM',
|
|
'sequelize': 'SQL ORM',
|
|
'next-auth': 'Authentication for Next.js',
|
|
'passport': 'Authentication middleware',
|
|
'jsonwebtoken': 'JWT token handling',
|
|
'@mui/material': 'Material Design components',
|
|
'@chakra-ui/react': 'Accessible component library',
|
|
'tailwindcss': 'Utility-first CSS framework',
|
|
'zustand': 'State management',
|
|
'redux': 'Predictable state container',
|
|
'@reduxjs/toolkit': 'Redux development toolkit',
|
|
'axios': 'HTTP client',
|
|
'zod': 'Schema validation',
|
|
'date-fns': 'Date utility functions',
|
|
'lodash': 'Utility functions',
|
|
'jest': 'Testing framework',
|
|
'vitest': 'Fast unit testing',
|
|
'@testing-library/react': 'React component testing',
|
|
'cypress': 'End-to-end testing',
|
|
}
|
|
return purposes.get(dep_name, 'Utility library')
|
|
|
|
|
|
def scan_directory_structure(root_path: Path) -> Dict[str, Any]:
|
|
"""Scan and categorize directory structure."""
|
|
ignore_dirs = {
|
|
'node_modules', '.git', '.next', '__pycache__', 'venv',
|
|
'.venv', 'dist', 'build', '.cache', 'coverage', '.turbo'
|
|
}
|
|
|
|
common_purposes = {
|
|
'src': 'Main source code directory',
|
|
'app': 'Application code (Next.js App Router)',
|
|
'pages': 'Page components (Next.js Pages Router)',
|
|
'components': 'Reusable UI components',
|
|
'lib': 'Shared utilities and libraries',
|
|
'utils': 'Utility functions',
|
|
'hooks': 'Custom React hooks',
|
|
'context': 'React context providers',
|
|
'store': 'State management',
|
|
'styles': 'CSS and styling',
|
|
'types': 'TypeScript type definitions',
|
|
'api': 'API route handlers',
|
|
'services': 'Business logic services',
|
|
'models': 'Data models/entities',
|
|
'prisma': 'Database schema and migrations',
|
|
'public': 'Static assets',
|
|
'tests': 'Test files',
|
|
'__tests__': 'Jest test files',
|
|
'test': 'Test files',
|
|
'spec': 'Test specifications',
|
|
'docs': 'Documentation',
|
|
'scripts': 'Build and utility scripts',
|
|
'config': 'Configuration files',
|
|
}
|
|
|
|
directories = []
|
|
source_dir = None
|
|
|
|
# Find main source directory
|
|
for candidate in ['src', 'app', 'lib', 'source']:
|
|
if (root_path / candidate).is_dir():
|
|
source_dir = candidate
|
|
break
|
|
|
|
# Scan directories
|
|
for item in sorted(root_path.iterdir()):
|
|
if item.is_dir() and item.name not in ignore_dirs and not item.name.startswith('.'):
|
|
file_count = sum(1 for _ in item.rglob('*') if _.is_file())
|
|
key_files = [
|
|
f.name for f in item.iterdir()
|
|
if f.is_file() and f.suffix in ['.ts', '.tsx', '.js', '.jsx', '.py', '.rs', '.go']
|
|
][:5]
|
|
|
|
directories.append({
|
|
'path': item.name,
|
|
'purpose': common_purposes.get(item.name, 'Project directory'),
|
|
'file_count': file_count,
|
|
'key_files': key_files
|
|
})
|
|
|
|
return {
|
|
'source_dir': source_dir or '.',
|
|
'directories': directories
|
|
}
|
|
|
|
|
|
def detect_features(root_path: Path) -> List[Dict[str, Any]]:
|
|
"""Detect main features from code patterns."""
|
|
features = []
|
|
|
|
feature_patterns = {
|
|
'authentication': {
|
|
'keywords': ['auth', 'login', 'logout', 'session', 'jwt', 'oauth'],
|
|
'description': 'User authentication and session management',
|
|
'technical_notes': 'Handles user login, logout, and session tokens'
|
|
},
|
|
'user_management': {
|
|
'keywords': ['user', 'profile', 'account', 'register', 'signup'],
|
|
'description': 'User account creation and profile management',
|
|
'technical_notes': 'CRUD operations for user data'
|
|
},
|
|
'api': {
|
|
'keywords': ['api', 'endpoint', 'route'],
|
|
'description': 'REST API endpoints for data operations',
|
|
'technical_notes': 'HTTP handlers for client-server communication'
|
|
},
|
|
'database': {
|
|
'keywords': ['prisma', 'model', 'entity', 'schema', 'migration'],
|
|
'description': 'Database storage and data persistence',
|
|
'technical_notes': 'ORM-based data layer with migrations'
|
|
},
|
|
'file_upload': {
|
|
'keywords': ['upload', 'file', 'storage', 's3', 'blob'],
|
|
'description': 'File upload and storage functionality',
|
|
'technical_notes': 'Handles file uploads and cloud storage'
|
|
},
|
|
'search': {
|
|
'keywords': ['search', 'filter', 'query'],
|
|
'description': 'Search and filtering capabilities',
|
|
'technical_notes': 'Full-text search or database queries'
|
|
},
|
|
}
|
|
|
|
# Scan for features
|
|
all_files = list(root_path.rglob('*.ts')) + list(root_path.rglob('*.tsx')) + \
|
|
list(root_path.rglob('*.js')) + list(root_path.rglob('*.jsx'))
|
|
|
|
file_names = [f.stem.lower() for f in all_files]
|
|
file_paths = [str(f.relative_to(root_path)).lower() for f in all_files]
|
|
|
|
for feature_name, config in feature_patterns.items():
|
|
found_files = []
|
|
for keyword in config['keywords']:
|
|
found_files.extend([
|
|
str(f.relative_to(root_path)) for f in all_files
|
|
if keyword in str(f).lower()
|
|
])
|
|
|
|
if found_files:
|
|
features.append({
|
|
'name': feature_name.replace('_', ' ').title(),
|
|
'description': config['description'],
|
|
'technical_notes': config['technical_notes'],
|
|
'files': list(set(found_files))[:5]
|
|
})
|
|
|
|
return features
|
|
|
|
|
|
def find_components(root_path: Path) -> List[Dict[str, Any]]:
|
|
"""Find UI components in the project."""
|
|
components = []
|
|
component_dirs = ['components', 'src/components', 'app/components']
|
|
|
|
for comp_dir in component_dirs:
|
|
comp_path = root_path / comp_dir
|
|
if comp_path.exists():
|
|
for file in comp_path.rglob('*.tsx'):
|
|
if file.name.startswith('_') or file.name == 'index.tsx':
|
|
continue
|
|
|
|
name = file.stem
|
|
if name[0].isupper(): # Component names are PascalCase
|
|
components.append({
|
|
'id': f'component_{name.lower()}',
|
|
'name': name,
|
|
'path': str(file.relative_to(root_path)),
|
|
'description': f'{name} component',
|
|
'props': 'See source file'
|
|
})
|
|
|
|
return components[:20] # Limit to 20 components
|
|
|
|
|
|
def find_api_endpoints(root_path: Path) -> List[Dict[str, Any]]:
|
|
"""Find API endpoints in the project."""
|
|
endpoints = []
|
|
|
|
# Next.js App Router: app/api/**/route.ts
|
|
api_dir = root_path / 'app' / 'api'
|
|
if api_dir.exists():
|
|
for route_file in api_dir.rglob('route.ts'):
|
|
path_parts = route_file.parent.relative_to(api_dir).parts
|
|
api_path = '/api/' + '/'.join(path_parts)
|
|
|
|
# Read file to detect methods
|
|
content = route_file.read_text()
|
|
methods = []
|
|
for method in ['GET', 'POST', 'PUT', 'PATCH', 'DELETE']:
|
|
if f'export async function {method}' in content or f'export function {method}' in content:
|
|
methods.append(method)
|
|
|
|
for method in methods:
|
|
endpoints.append({
|
|
'method': method,
|
|
'path': api_path.replace('[', ':').replace(']', ''),
|
|
'handler_file': str(route_file.relative_to(root_path)),
|
|
'description': f'{method} {api_path}',
|
|
'technical_notes': 'Next.js App Router endpoint'
|
|
})
|
|
|
|
# Next.js Pages Router: pages/api/**/*.ts
|
|
pages_api = root_path / 'pages' / 'api'
|
|
if pages_api.exists():
|
|
for api_file in pages_api.rglob('*.ts'):
|
|
path_parts = api_file.relative_to(pages_api).with_suffix('').parts
|
|
api_path = '/api/' + '/'.join(path_parts)
|
|
|
|
endpoints.append({
|
|
'method': 'MULTIPLE',
|
|
'path': api_path.replace('[', ':').replace(']', ''),
|
|
'handler_file': str(api_file.relative_to(root_path)),
|
|
'description': f'API endpoint at {api_path}',
|
|
'technical_notes': 'Next.js Pages Router endpoint'
|
|
})
|
|
|
|
return endpoints
|
|
|
|
|
|
def find_data_models(root_path: Path) -> List[Dict[str, Any]]:
|
|
"""Find data models in the project."""
|
|
models = []
|
|
|
|
# Prisma schema
|
|
prisma_schema = root_path / 'prisma' / 'schema.prisma'
|
|
if prisma_schema.exists():
|
|
content = prisma_schema.read_text()
|
|
model_pattern = re.compile(r'model\s+(\w+)\s*\{([^}]+)\}', re.MULTILINE)
|
|
|
|
for match in model_pattern.finditer(content):
|
|
model_name = match.group(1)
|
|
model_body = match.group(2)
|
|
|
|
# Extract fields
|
|
fields = []
|
|
for line in model_body.strip().split('\n'):
|
|
line = line.strip()
|
|
if line and not line.startswith('@@') and not line.startswith('//'):
|
|
parts = line.split()
|
|
if len(parts) >= 2:
|
|
fields.append({
|
|
'name': parts[0],
|
|
'type': parts[1],
|
|
'description': f'{parts[0]} field'
|
|
})
|
|
|
|
models.append({
|
|
'name': model_name,
|
|
'description': f'{model_name} data model',
|
|
'fields': fields[:10] # Limit fields
|
|
})
|
|
|
|
return models
|
|
|
|
|
|
def collect_glossary_terms(features: List, components: List, endpoints: List) -> List[Dict[str, str]]:
|
|
"""Collect technical terms that need definitions."""
|
|
common_terms = {
|
|
'API': 'Application Programming Interface - a way for different software to communicate',
|
|
'REST': 'Representational State Transfer - a standard way to design web APIs',
|
|
'Component': 'A reusable piece of the user interface',
|
|
'Endpoint': 'A specific URL that the application responds to',
|
|
'ORM': 'Object-Relational Mapping - connects code to database tables',
|
|
'JWT': 'JSON Web Token - a secure way to transmit user identity',
|
|
'CRUD': 'Create, Read, Update, Delete - basic data operations',
|
|
'Props': 'Properties passed to a component to customize it',
|
|
'State': 'Data that can change and affects what users see',
|
|
'Hook': 'A way to add features to React components',
|
|
'Migration': 'A controlled change to database structure',
|
|
'Schema': 'The structure/shape of data',
|
|
'Route': 'A URL path that maps to specific functionality',
|
|
'Handler': 'Code that responds to a specific request',
|
|
}
|
|
|
|
return [{'term': k, 'definition': v} for k, v in common_terms.items()]
|
|
|
|
|
|
def generate_analysis(root_path: Path) -> Dict[str, Any]:
|
|
"""Generate complete project analysis."""
|
|
project_info = detect_project_type(root_path)
|
|
pkg_info = parse_package_json(root_path) if project_info['type'] == 'node' else {}
|
|
structure = scan_directory_structure(root_path)
|
|
features = detect_features(root_path)
|
|
components = find_components(root_path)
|
|
endpoints = find_api_endpoints(root_path)
|
|
models = find_data_models(root_path)
|
|
glossary = collect_glossary_terms(features, components, endpoints)
|
|
|
|
return {
|
|
'analysis_timestamp': datetime.now().isoformat(),
|
|
'project': {
|
|
'name': pkg_info.get('name', root_path.name),
|
|
'version': pkg_info.get('version', '0.0.0'),
|
|
'description': pkg_info.get('description', ''),
|
|
'type': project_info['type'],
|
|
},
|
|
'tech_stack': {
|
|
'language': 'TypeScript' if project_info['type'] == 'node' else project_info['type'],
|
|
'framework': pkg_info.get('framework'),
|
|
'database': pkg_info.get('database'),
|
|
'ui_framework': pkg_info.get('ui_framework'),
|
|
'key_dependencies': pkg_info.get('key_dependencies', []),
|
|
},
|
|
'structure': structure,
|
|
'features': features,
|
|
'components': components,
|
|
'api_endpoints': endpoints,
|
|
'data_models': models,
|
|
'glossary_terms': glossary,
|
|
}
|
|
|
|
|
|
def output_yaml(data: Dict[str, Any], output_path: Optional[Path] = None):
|
|
"""Output analysis as YAML."""
|
|
if yaml:
|
|
output = yaml.dump(data, default_flow_style=False, allow_unicode=True, sort_keys=False)
|
|
else:
|
|
# Fallback to JSON if yaml not available
|
|
output = json.dumps(data, indent=2)
|
|
|
|
if output_path:
|
|
output_path.write_text(output)
|
|
print(f"Analysis written to: {output_path}")
|
|
else:
|
|
print(output)
|
|
|
|
|
|
def main():
|
|
"""Main entry point."""
|
|
root_path = Path.cwd()
|
|
|
|
if len(sys.argv) > 1:
|
|
root_path = Path(sys.argv[1])
|
|
|
|
if not root_path.exists():
|
|
print(f"Error: Path does not exist: {root_path}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
output_path = None
|
|
if len(sys.argv) > 2:
|
|
output_path = Path(sys.argv[2])
|
|
|
|
analysis = generate_analysis(root_path)
|
|
output_yaml(analysis, output_path)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|