project-standalo-sonic-cloud/skills/guardrail-orchestrator/scripts/analyze_codebase.py

487 lines
15 KiB
Python
Executable File

#!/usr/bin/env python3
"""Analyze codebase and generate project manifest from existing code."""
import argparse
import json
import os
import re
import sys
from datetime import datetime
from pathlib import Path
from typing import Any, Optional
def find_files(base_path: str, pattern: str) -> list[str]:
"""Find files matching a glob pattern."""
base = Path(base_path)
return [str(p.relative_to(base)) for p in base.glob(pattern)]
def read_file(filepath: str) -> str:
"""Read file contents."""
try:
with open(filepath, 'r', encoding='utf-8') as f:
return f.read()
except Exception:
return ""
def extract_component_name(filepath: str) -> str:
"""Extract component name from file path."""
name = Path(filepath).stem
return name
def to_snake_case(name: str) -> str:
"""Convert PascalCase to snake_case."""
s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
def extract_props_from_content(content: str) -> dict:
"""Extract props interface from component content."""
props = {}
# Look for interface Props or type Props
interface_match = re.search(
r'(?:interface|type)\s+\w*Props\w*\s*(?:=\s*)?\{([^}]+)\}',
content,
re.DOTALL
)
if interface_match:
props_block = interface_match.group(1)
# Parse individual props
prop_matches = re.findall(
r'(\w+)(\?)?:\s*([^;,\n]+)',
props_block
)
for name, optional, prop_type in prop_matches:
props[name] = {
"type": prop_type.strip(),
"optional": bool(optional)
}
return props
def extract_imports(content: str) -> list[str]:
"""Extract component imports from file."""
imports = []
# Look for imports from components directory
import_matches = re.findall(
r"import\s+\{?\s*([^}]+)\s*\}?\s+from\s+['\"]\.\.?/components/(\w+)['\"]",
content
)
for imported, component in import_matches:
imports.append(component)
# Also check for direct component imports
direct_imports = re.findall(
r"import\s+(\w+)\s+from\s+['\"]\.\.?/components/(\w+)['\"]",
content
)
for imported, component in direct_imports:
imports.append(component)
return list(set(imports))
def extract_api_methods(content: str) -> list[str]:
"""Extract HTTP methods from API route file."""
methods = []
method_patterns = ['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'HEAD', 'OPTIONS']
for method in method_patterns:
if re.search(rf'export\s+(?:async\s+)?function\s+{method}\s*\(', content):
methods.append(method)
return methods
def extract_fetch_calls(content: str) -> list[str]:
"""Extract API fetch calls from content."""
apis = []
# Look for fetch('/api/...') patterns - handle static paths
fetch_matches = re.findall(
r"fetch\s*\(\s*['\"`]/api/([^'\"`\?\$\{]+)",
content
)
apis.extend(fetch_matches)
# Look for fetch(`/api/tasks`) or similar template literals with static paths
template_matches = re.findall(
r"fetch\s*\(\s*`/api/(\w+)`",
content
)
apis.extend(template_matches)
# Clean up: remove trailing slashes and normalize
cleaned = []
for api in apis:
api = api.rstrip('/')
if api and not api.startswith('$'):
cleaned.append(api)
return list(set(cleaned))
def extract_types_from_db(content: str) -> dict:
"""Extract type definitions from db.ts or similar."""
types = {}
# Extract interfaces
interface_matches = re.findall(
r'export\s+interface\s+(\w+)\s*\{([^}]+)\}',
content,
re.DOTALL
)
for name, body in interface_matches:
fields = {}
field_matches = re.findall(r'(\w+)(\?)?:\s*([^;,\n]+)', body)
for field_name, optional, field_type in field_matches:
fields[field_name] = field_type.strip()
types[name] = fields
# Extract type aliases
type_matches = re.findall(
r"export\s+type\s+(\w+)\s*=\s*([^;]+);",
content
)
for name, type_def in type_matches:
types[name] = type_def.strip()
return types
def path_to_route(filepath: str) -> str:
"""Convert file path to route path."""
# Remove app/ prefix and page.tsx suffix
route = filepath.replace('app/', '').replace('/page.tsx', '').replace('page.tsx', '')
if route == '' or route == '/':
return '/'
# Handle dynamic segments [id] -> [id]
route = re.sub(r'\[([^\]]+)\]', r'[\1]', route)
# Ensure starts with /
if not route.startswith('/'):
route = '/' + route
return route
def analyze_pages(base_path: str) -> list[dict]:
"""Analyze all page files."""
pages = []
page_files = find_files(base_path, 'app/**/page.tsx')
for filepath in page_files:
full_path = os.path.join(base_path, filepath)
content = read_file(full_path)
route = path_to_route(filepath)
# Generate page ID
if route == '/' or filepath == 'app/page.tsx':
page_id = 'page_home'
name = 'Home'
route = '/'
else:
name = route.strip('/').replace('/', '_').replace('[', '').replace(']', '')
page_id = f'page_{name}'
# Extract component imports
components = extract_imports(content)
comp_ids = [f"comp_{to_snake_case(c)}" for c in components]
# Extract API dependencies
api_calls = extract_fetch_calls(content)
api_ids = [f"api_{a.replace('/', '_')}" for a in api_calls]
pages.append({
"id": page_id,
"path": route,
"file_path": filepath,
"status": "IMPLEMENTED",
"description": f"Page at {route}",
"components": comp_ids,
"data_dependencies": api_ids
})
return pages
def analyze_components(base_path: str) -> list[dict]:
"""Analyze all component files."""
components = []
component_files = find_files(base_path, 'app/components/*.tsx')
for filepath in component_files:
full_path = os.path.join(base_path, filepath)
content = read_file(full_path)
name = extract_component_name(filepath)
comp_id = f"comp_{to_snake_case(name)}"
# Extract props
props = extract_props_from_content(content)
components.append({
"id": comp_id,
"name": name,
"file_path": filepath,
"status": "IMPLEMENTED",
"description": f"{name} component",
"props": props
})
return components
def analyze_apis(base_path: str) -> list[dict]:
"""Analyze all API route files."""
apis = []
api_files = find_files(base_path, 'app/api/**/route.ts')
for filepath in api_files:
full_path = os.path.join(base_path, filepath)
content = read_file(full_path)
# Extract path from file location
path = '/' + filepath.replace('app/', '').replace('/route.ts', '')
# Extract HTTP methods
methods = extract_api_methods(content)
for method in methods:
# Generate action name from method
action_map = {
'GET': 'list' if '[' not in path else 'get',
'POST': 'create',
'PUT': 'update',
'DELETE': 'delete',
'PATCH': 'patch'
}
action = action_map.get(method, method.lower())
# Generate resource name from path
resource = path.replace('/api/', '').replace('/', '_').replace('[', '').replace(']', '')
if not resource:
resource = 'root'
api_id = f"api_{action}_{resource}"
apis.append({
"id": api_id,
"path": path,
"method": method,
"file_path": filepath,
"status": "IMPLEMENTED",
"description": f"{method} {path}",
"request": {},
"response": {
"type": "object",
"description": "Response data"
}
})
return apis
def analyze_database(base_path: str) -> tuple[list[dict], dict]:
"""Analyze database/type files."""
tables = []
types = {}
# Check for db.ts file
db_path = os.path.join(base_path, 'app/lib/db.ts')
if os.path.exists(db_path):
content = read_file(db_path)
types = extract_types_from_db(content)
# Look for table/collection definitions
if 'tasks' in content.lower():
tables.append({
"id": "table_tasks",
"name": "tasks",
"file_path": "app/lib/db.ts",
"status": "IMPLEMENTED",
"description": "Tasks storage",
"columns": types.get('Task', {})
})
return tables, types
def build_dependencies(pages: list, components: list, apis: list) -> dict:
"""Build dependency mappings."""
component_to_page = {}
api_to_component = {}
# Build component to page mapping
for page in pages:
for comp_id in page.get('components', []):
if comp_id not in component_to_page:
component_to_page[comp_id] = []
component_to_page[comp_id].append(page['id'])
# API to component would require deeper analysis
# For now, we'll leave it based on page dependencies
return {
"component_to_page": component_to_page,
"api_to_component": {},
"table_to_api": {}
}
def generate_manifest(
base_path: str,
project_name: Optional[str] = None
) -> dict:
"""Generate complete project manifest."""
# Determine project name
if not project_name:
# Try to get from package.json
pkg_path = os.path.join(base_path, 'package.json')
if os.path.exists(pkg_path):
try:
with open(pkg_path) as f:
pkg = json.load(f)
project_name = pkg.get('name', Path(base_path).name)
except Exception:
project_name = Path(base_path).name
else:
project_name = Path(base_path).name
# Analyze codebase
pages = analyze_pages(base_path)
components = analyze_components(base_path)
apis = analyze_apis(base_path)
tables, types = analyze_database(base_path)
dependencies = build_dependencies(pages, components, apis)
now = datetime.now().isoformat()
manifest = {
"project": {
"name": project_name,
"version": "1.0.0",
"created_at": now,
"description": f"Project manifest for {project_name}"
},
"state": {
"current_phase": "IMPLEMENTATION_PHASE",
"approval_status": {
"manifest_approved": True,
"approved_by": "analyzer",
"approved_at": now
},
"revision_history": [
{
"action": "MANIFEST_GENERATED",
"timestamp": now,
"details": "Generated from existing codebase analysis"
}
]
},
"entities": {
"pages": pages,
"components": components,
"api_endpoints": apis,
"database_tables": tables
},
"dependencies": dependencies,
"types": types
}
return manifest
def main():
parser = argparse.ArgumentParser(
description='Analyze codebase and generate project manifest'
)
parser.add_argument(
'--path',
default='.',
help='Path to project root'
)
parser.add_argument(
'--name',
help='Project name (defaults to package.json name or directory name)'
)
parser.add_argument(
'--output',
default='project_manifest.json',
help='Output file path'
)
parser.add_argument(
'--dry-run',
action='store_true',
help='Print manifest without writing to file'
)
parser.add_argument(
'--force',
action='store_true',
help='Overwrite existing manifest'
)
args = parser.parse_args()
base_path = os.path.abspath(args.path)
output_path = os.path.join(base_path, args.output)
# Check for existing manifest
if os.path.exists(output_path) and not args.force and not args.dry_run:
print(f"Error: {args.output} already exists. Use --force to overwrite.")
sys.exit(1)
print(f"Analyzing codebase at: {base_path}")
print()
# Generate manifest
manifest = generate_manifest(base_path, args.name)
# Count entities
pages = len(manifest['entities']['pages'])
components = len(manifest['entities']['components'])
apis = len(manifest['entities']['api_endpoints'])
tables = len(manifest['entities']['database_tables'])
if args.dry_run:
print(json.dumps(manifest, indent=2))
else:
with open(output_path, 'w') as f:
json.dump(manifest, f, indent=2)
print(f"Manifest written to: {output_path}")
print()
print("╔══════════════════════════════════════════════════════════════╗")
print("║ 📊 MANIFEST GENERATED ║")
print("╠══════════════════════════════════════════════════════════════╣")
print(f"║ Project: {manifest['project']['name']:<51}")
print("╠══════════════════════════════════════════════════════════════╣")
print("║ ENTITIES DISCOVERED ║")
print(f"║ 📄 Pages: {pages:<43}")
print(f"║ 🧩 Components: {components:<43}")
print(f"║ 🔌 APIs: {apis:<43}")
print(f"║ 🗄️ Tables: {tables:<43}")
print("╠══════════════════════════════════════════════════════════════╣")
print("║ Status: All entities marked as IMPLEMENTED ║")
print("║ Phase: IMPLEMENTATION_PHASE ║")
print("╚══════════════════════════════════════════════════════════════╝")
if __name__ == '__main__':
main()