#!/usr/bin/env python3 """ Comprehensive Security Scanner for guardrail workflow. Performs static security analysis on codebase: - Hardcoded secrets and credentials - SQL injection vulnerabilities - XSS vulnerabilities - Path traversal risks - Insecure dependencies - Authentication/Authorization issues - OWASP Top 10 patterns Usage: python3 security_scan.py --project-dir . [--severity critical|high|medium|low] """ import argparse import json import os import re import sys from pathlib import Path from typing import NamedTuple from dataclasses import dataclass, field @dataclass class SecurityIssue: """Security vulnerability finding.""" severity: str # CRITICAL, HIGH, MEDIUM, LOW, INFO category: str title: str description: str file_path: str line_number: int | None code_snippet: str recommendation: str cwe_id: str | None = None owasp_category: str | None = None @dataclass class ScanResult: """Complete scan results.""" issues: list[SecurityIssue] = field(default_factory=list) files_scanned: int = 0 scan_duration: float = 0.0 # Security patterns organized by category SECURITY_PATTERNS = { 'hardcoded_secrets': { 'severity': 'CRITICAL', 'cwe': 'CWE-798', 'owasp': 'A07:2021-Identification and Authentication Failures', 'patterns': [ # API Keys (r'''(?:api[_-]?key|apikey)\s*[:=]\s*['"]((?!process\.env)[^'"]{10,})['"']''', 'Hardcoded API key'), (r'''(?:api[_-]?secret|apisecret)\s*[:=]\s*['"]((?!process\.env)[^'"]{10,})['"']''', 'Hardcoded API secret'), # Passwords (r'''(?:password|passwd|pwd)\s*[:=]\s*['"]([^'"]{4,})['"']''', 'Hardcoded password'), # Private keys (r'''-----BEGIN (?:RSA |EC |DSA )?PRIVATE KEY-----''', 'Embedded private key'), # AWS credentials (r'''(?:aws[_-]?access[_-]?key[_-]?id|aws[_-]?secret)\s*[:=]\s*['"]([A-Z0-9]{16,})['"']''', 'AWS credential'), (r'''AKIA[0-9A-Z]{16}''', 'AWS Access Key ID'), # JWT secrets (r'''(?:jwt[_-]?secret|token[_-]?secret)\s*[:=]\s*['"]([^'"]{8,})['"']''', 'Hardcoded JWT secret'), # Database connection strings (r'''(?:mongodb|postgres|mysql|redis):\/\/[^:]+:[^@]+@''', 'Database credentials in connection string'), # Generic secrets (r'''(?:secret|token|auth)[_-]?(?:key)?\s*[:=]\s*['"]([^'"]{8,})['"']''', 'Potential hardcoded secret'), ] }, 'sql_injection': { 'severity': 'CRITICAL', 'cwe': 'CWE-89', 'owasp': 'A03:2021-Injection', 'patterns': [ # String concatenation in queries (r'''(?:query|sql|execute)\s*\(\s*[`'"].*\$\{''', 'SQL injection via template literal'), (r'''(?:query|sql|execute)\s*\(\s*['"].*\+\s*(?:req\.|params\.|body\.|query\.)''', 'SQL injection via concatenation'), (r'''(?:SELECT|INSERT|UPDATE|DELETE|FROM|WHERE).*\$\{''', 'Raw SQL with template interpolation'), # Raw queries (r'''\.raw\s*\(\s*[`'"].*\$\{''', 'Raw query with interpolation'), (r'''prisma\.\$queryRaw\s*`[^`]*\$\{''', 'Prisma raw query with interpolation'), ] }, 'xss': { 'severity': 'HIGH', 'cwe': 'CWE-79', 'owasp': 'A03:2021-Injection', 'patterns': [ # React dangerouslySetInnerHTML (r'''dangerouslySetInnerHTML\s*=\s*\{\s*\{__html:\s*(?!DOMPurify|sanitize)''', 'Unsanitized dangerouslySetInnerHTML'), # innerHTML assignment (r'''\.innerHTML\s*=\s*(?!['"`]<)''', 'Direct innerHTML assignment'), # document.write (r'''document\.write\s*\(''', 'document.write usage'), # eval with user input (r'''eval\s*\(\s*(?:req\.|params\.|body\.|query\.|props\.)''', 'eval with user input'), # jQuery html() with user input (r'''\$\([^)]+\)\.html\s*\(\s*(?!['"`])''', 'jQuery html() with dynamic content'), ] }, 'path_traversal': { 'severity': 'HIGH', 'cwe': 'CWE-22', 'owasp': 'A01:2021-Broken Access Control', 'patterns': [ # File operations with user input (r'''(?:readFile|writeFile|readFileSync|writeFileSync|createReadStream)\s*\(\s*(?:req\.|params\.|body\.|query\.)''', 'File operation with user input'), (r'''(?:readFile|writeFile)\s*\(\s*[`'"].*\$\{(?:req\.|params\.|body\.|query\.)''', 'File path with user input interpolation'), # Path.join with user input (without validation) (r'''path\.(?:join|resolve)\s*\([^)]*(?:req\.|params\.|body\.|query\.)''', 'Path operation with user input'), ] }, 'command_injection': { 'severity': 'CRITICAL', 'cwe': 'CWE-78', 'owasp': 'A03:2021-Injection', 'patterns': [ # exec/spawn with user input (r'''(?:exec|execSync|spawn|spawnSync)\s*\(\s*[`'"].*\$\{''', 'Command injection via template literal'), (r'''(?:exec|execSync|spawn|spawnSync)\s*\(\s*(?:req\.|params\.|body\.|query\.)''', 'Command execution with user input'), # child_process with concatenation (r'''child_process.*\(\s*['"].*\+\s*(?:req\.|params\.|body\.|query\.)''', 'Command injection via concatenation'), ] }, 'insecure_auth': { 'severity': 'HIGH', 'cwe': 'CWE-287', 'owasp': 'A07:2021-Identification and Authentication Failures', 'patterns': [ # Weak JWT algorithms (r'''algorithm\s*[:=]\s*['"](?:none|HS256)['"']''', 'Weak JWT algorithm'), # No password hashing (r'''password\s*===?\s*(?:req\.|body\.|params\.)''', 'Plain text password comparison'), # Disabled security (r'''(?:verify|secure|https|ssl)\s*[:=]\s*false''', 'Security feature disabled'), # Cookie without security flags (r'''cookie\s*\([^)]*\)\s*(?!.*(?:httpOnly|secure|sameSite))''', 'Cookie without security flags'), ] }, 'sensitive_data_exposure': { 'severity': 'MEDIUM', 'cwe': 'CWE-200', 'owasp': 'A02:2021-Cryptographic Failures', 'patterns': [ # Logging sensitive data (r'''console\.(?:log|info|debug)\s*\([^)]*(?:password|secret|token|key|credential)''', 'Logging sensitive data'), # Error messages with sensitive info (r'''(?:throw|Error)\s*\([^)]*(?:password|secret|token|key|sql|query)''', 'Sensitive info in error message'), # HTTP instead of HTTPS (r'''['"]http:\/\/(?!localhost|127\.0\.0\.1)''', 'HTTP URL (should be HTTPS)'), ] }, 'insecure_dependencies': { 'severity': 'MEDIUM', 'cwe': 'CWE-1104', 'owasp': 'A06:2021-Vulnerable and Outdated Components', 'patterns': [ # Known vulnerable patterns (r'''require\s*\(\s*['"](?:serialize-javascript|lodash\.template|node-serialize)['"]\s*\)''', 'Known vulnerable package'), # Outdated crypto (r'''crypto\.createCipher\s*\(''', 'Deprecated crypto.createCipher'), (r'''md5\s*\(|createHash\s*\(\s*['"]md5['"]''', 'MD5 hash usage (weak)'), (r'''sha1\s*\(|createHash\s*\(\s*['"]sha1['"]''', 'SHA1 hash usage (weak)'), ] }, 'cors_misconfiguration': { 'severity': 'MEDIUM', 'cwe': 'CWE-942', 'owasp': 'A01:2021-Broken Access Control', 'patterns': [ # Wildcard CORS (r'''(?:Access-Control-Allow-Origin|origin)\s*[:=]\s*['"]\*['"']''', 'Wildcard CORS origin'), (r'''cors\s*\(\s*\{[^}]*origin\s*:\s*true''', 'CORS allows all origins'), # Credentials with wildcard (r'''credentials\s*:\s*true[^}]*origin\s*:\s*['"]\*['"']''', 'CORS credentials with wildcard origin'), ] }, 'insecure_randomness': { 'severity': 'LOW', 'cwe': 'CWE-330', 'owasp': 'A02:2021-Cryptographic Failures', 'patterns': [ # Math.random for security (r'''Math\.random\s*\(\s*\)[^;]*(?:token|secret|password|key|id|session)''', 'Math.random for security-sensitive value'), (r'''(?:token|secret|key|session)[^=]*=\s*Math\.random''', 'Math.random for security-sensitive value'), ] }, 'debug_code': { 'severity': 'LOW', 'cwe': 'CWE-489', 'owasp': 'A05:2021-Security Misconfiguration', 'patterns': [ # Debug statements (r'''console\.(?:log|debug|info|warn)\s*\(''', 'Console statement (remove in production)'), (r'''debugger\s*;''', 'Debugger statement'), # TODO/FIXME security notes (r'''(?:TODO|FIXME|HACK|XXX).*(?:security|auth|password|secret|vulnerable)''', 'Security-related TODO'), ] }, 'nosql_injection': { 'severity': 'HIGH', 'cwe': 'CWE-943', 'owasp': 'A03:2021-Injection', 'patterns': [ # MongoDB injection (r'''\.find\s*\(\s*\{[^}]*\$(?:where|regex|gt|lt|ne|in|nin|or|and).*(?:req\.|params\.|body\.|query\.)''', 'NoSQL injection risk'), (r'''\.find\s*\(\s*(?:req\.|params\.|body\.|query\.)''', 'Direct user input in query'), ] }, 'prototype_pollution': { 'severity': 'HIGH', 'cwe': 'CWE-1321', 'owasp': 'A03:2021-Injection', 'patterns': [ # Deep merge without protection (r'''(?:merge|extend|assign)\s*\([^)]*(?:req\.|params\.|body\.|query\.)''', 'Potential prototype pollution via merge'), (r'''Object\.assign\s*\(\s*\{\}[^)]*(?:req\.|params\.|body\.|query\.)''', 'Object.assign with user input'), # __proto__ access (r'''__proto__''', 'Direct __proto__ access'), (r'''constructor\s*\[\s*['"]prototype['"]''', 'Prototype access via constructor'), ] }, 'ssrf': { 'severity': 'HIGH', 'cwe': 'CWE-918', 'owasp': 'A10:2021-Server-Side Request Forgery', 'patterns': [ # Fetch/axios with user URL (r'''(?:fetch|axios\.get|axios\.post|http\.get|https\.get)\s*\(\s*(?:req\.|params\.|body\.|query\.)''', 'SSRF via user-controlled URL'), (r'''(?:fetch|axios)\s*\(\s*[`'"].*\$\{(?:req\.|params\.|body\.|query\.)''', 'SSRF via URL interpolation'), ] }, } # File extensions to scan SCAN_EXTENSIONS = {'.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs'} # Directories to skip SKIP_DIRS = {'node_modules', '.next', 'dist', 'build', '.git', 'coverage', '__pycache__'} def find_source_files(project_dir: str) -> list[str]: """Find all source files to scan.""" files = [] for root, dirs, filenames in os.walk(project_dir): # Skip excluded directories dirs[:] = [d for d in dirs if d not in SKIP_DIRS] for filename in filenames: ext = os.path.splitext(filename)[1] if ext in SCAN_EXTENSIONS: files.append(os.path.join(root, filename)) return files def scan_file(file_path: str) -> list[SecurityIssue]: """Scan a single file for security issues.""" issues = [] try: with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: content = f.read() lines = content.split('\n') except (IOError, OSError): return [] for category, config in SECURITY_PATTERNS.items(): for pattern, title in config['patterns']: try: for match in re.finditer(pattern, content, re.IGNORECASE | re.MULTILINE): # Find line number line_start = content[:match.start()].count('\n') + 1 line_content = lines[line_start - 1] if line_start <= len(lines) else '' # Skip if in comment stripped = line_content.strip() if stripped.startswith('//') or stripped.startswith('*') or stripped.startswith('/*'): continue # Skip if looks like env var reference if 'process.env' in line_content or 'import.meta.env' in line_content: continue issues.append(SecurityIssue( severity=config['severity'], category=category, title=title, description=get_description(category), file_path=file_path, line_number=line_start, code_snippet=line_content.strip()[:100], recommendation=get_recommendation(category), cwe_id=config.get('cwe'), owasp_category=config.get('owasp') )) except re.error: continue return issues def get_description(category: str) -> str: """Get detailed description for category.""" descriptions = { 'hardcoded_secrets': 'Credentials or secrets hardcoded in source code can be extracted by attackers.', 'sql_injection': 'User input directly in SQL queries allows attackers to manipulate database operations.', 'xss': 'Unsanitized user input rendered in HTML allows attackers to inject malicious scripts.', 'path_traversal': 'User input in file paths allows attackers to access arbitrary files.', 'command_injection': 'User input in system commands allows attackers to execute arbitrary commands.', 'insecure_auth': 'Weak authentication mechanisms can be bypassed by attackers.', 'sensitive_data_exposure': 'Sensitive information may be exposed through logs or errors.', 'insecure_dependencies': 'Known vulnerable packages or weak cryptographic functions.', 'cors_misconfiguration': 'Overly permissive CORS allows unauthorized cross-origin requests.', 'insecure_randomness': 'Predictable random values can be guessed by attackers.', 'debug_code': 'Debug code in production may expose sensitive information.', 'nosql_injection': 'User input in NoSQL queries allows attackers to manipulate database operations.', 'prototype_pollution': 'Modifying object prototypes can lead to code execution.', 'ssrf': 'User-controlled URLs allow attackers to make requests to internal services.', } return descriptions.get(category, 'Security vulnerability detected.') def get_recommendation(category: str) -> str: """Get remediation recommendation for category.""" recommendations = { 'hardcoded_secrets': 'Use environment variables (process.env) or a secrets manager.', 'sql_injection': 'Use parameterized queries or ORM methods. Never concatenate user input.', 'xss': 'Sanitize user input with DOMPurify or escape HTML entities.', 'path_traversal': 'Validate and sanitize file paths. Use path.basename() and whitelist allowed paths.', 'command_injection': 'Avoid shell commands with user input. Use execFile with argument arrays.', 'insecure_auth': 'Use strong algorithms (RS256), hash passwords with bcrypt, enable all security flags.', 'sensitive_data_exposure': 'Remove sensitive data from logs. Use generic error messages.', 'insecure_dependencies': 'Update to latest secure versions. Use crypto.createCipheriv and SHA-256+.', 'cors_misconfiguration': 'Specify exact allowed origins. Do not use wildcard with credentials.', 'insecure_randomness': 'Use crypto.randomBytes() or crypto.randomUUID() for security-sensitive values.', 'debug_code': 'Remove console statements and debugger in production builds.', 'nosql_injection': 'Sanitize input and use schema validation. Avoid $where operators.', 'prototype_pollution': 'Use Object.create(null) or validate/sanitize object keys.', 'ssrf': 'Validate URLs against allowlist. Block internal IP ranges.', } return recommendations.get(category, 'Review and remediate the security issue.') def check_package_json(project_dir: str) -> list[SecurityIssue]: """Check package.json for security issues.""" issues = [] pkg_path = os.path.join(project_dir, 'package.json') if not os.path.exists(pkg_path): return [] try: with open(pkg_path, 'r') as f: pkg = json.load(f) except (json.JSONDecodeError, IOError): return [] # Known vulnerable packages (simplified check) vulnerable_packages = { 'lodash': '< 4.17.21', 'axios': '< 0.21.1', 'node-fetch': '< 2.6.1', 'minimist': '< 1.2.6', 'serialize-javascript': '< 3.1.0', } all_deps = {} all_deps.update(pkg.get('dependencies', {})) all_deps.update(pkg.get('devDependencies', {})) for pkg_name in vulnerable_packages: if pkg_name in all_deps: issues.append(SecurityIssue( severity='MEDIUM', category='insecure_dependencies', title=f'Potentially vulnerable package: {pkg_name}', description=f'Package {pkg_name} may have known vulnerabilities. Run npm audit for details.', file_path=pkg_path, line_number=None, code_snippet=f'"{pkg_name}": "{all_deps[pkg_name]}"', recommendation='Run `npm audit` and update to the latest secure version.', cwe_id='CWE-1104', owasp_category='A06:2021-Vulnerable and Outdated Components' )) return issues def check_env_files(project_dir: str) -> list[SecurityIssue]: """Check for exposed environment files.""" issues = [] env_files = ['.env', '.env.local', '.env.production', '.env.development'] for env_file in env_files: env_path = os.path.join(project_dir, env_file) if os.path.exists(env_path): # Check if in .gitignore gitignore_path = os.path.join(project_dir, '.gitignore') in_gitignore = False if os.path.exists(gitignore_path): try: with open(gitignore_path, 'r') as f: gitignore_content = f.read() if env_file in gitignore_content or '.env*' in gitignore_content: in_gitignore = True except IOError: pass if not in_gitignore: issues.append(SecurityIssue( severity='HIGH', category='sensitive_data_exposure', title=f'Environment file not in .gitignore: {env_file}', description='Environment files containing secrets may be committed to version control.', file_path=env_path, line_number=None, code_snippet=env_file, recommendation=f'Add {env_file} to .gitignore immediately.', cwe_id='CWE-200', owasp_category='A02:2021-Cryptographic Failures' )) return issues def run_scan(project_dir: str, min_severity: str = 'LOW') -> ScanResult: """Run full security scan.""" import time start_time = time.time() severity_order = ['CRITICAL', 'HIGH', 'MEDIUM', 'LOW', 'INFO'] min_severity_index = severity_order.index(min_severity.upper()) if min_severity.upper() in severity_order else 3 result = ScanResult() # Find and scan source files files = find_source_files(project_dir) result.files_scanned = len(files) for file_path in files: issues = scan_file(file_path) result.issues.extend(issues) # Additional checks result.issues.extend(check_package_json(project_dir)) result.issues.extend(check_env_files(project_dir)) # Filter by severity result.issues = [ i for i in result.issues if severity_order.index(i.severity) <= min_severity_index ] # Sort by severity result.issues.sort(key=lambda x: severity_order.index(x.severity)) result.scan_duration = time.time() - start_time return result def format_report(result: ScanResult, format_type: str = 'text') -> str: """Format scan results.""" if format_type == 'json': return json.dumps({ 'files_scanned': result.files_scanned, 'scan_duration': result.scan_duration, 'total_issues': len(result.issues), 'by_severity': { 'CRITICAL': len([i for i in result.issues if i.severity == 'CRITICAL']), 'HIGH': len([i for i in result.issues if i.severity == 'HIGH']), 'MEDIUM': len([i for i in result.issues if i.severity == 'MEDIUM']), 'LOW': len([i for i in result.issues if i.severity == 'LOW']), }, 'issues': [ { 'severity': i.severity, 'category': i.category, 'title': i.title, 'description': i.description, 'file_path': i.file_path, 'line_number': i.line_number, 'code_snippet': i.code_snippet, 'recommendation': i.recommendation, 'cwe_id': i.cwe_id, 'owasp_category': i.owasp_category, } for i in result.issues ] }, indent=2) # Text format lines = [] # Header lines.append("") lines.append("=" * 80) lines.append(" SECURITY SCAN REPORT") lines.append("=" * 80) lines.append("") # Summary critical = len([i for i in result.issues if i.severity == 'CRITICAL']) high = len([i for i in result.issues if i.severity == 'HIGH']) medium = len([i for i in result.issues if i.severity == 'MEDIUM']) low = len([i for i in result.issues if i.severity == 'LOW']) lines.append("SUMMARY") lines.append("-" * 80) lines.append(f" Files scanned: {result.files_scanned}") lines.append(f" Scan duration: {result.scan_duration:.2f}s") lines.append(f" Total issues: {len(result.issues)}") lines.append("") lines.append(" By Severity:") lines.append(f" CRITICAL: {critical}") lines.append(f" HIGH: {high}") lines.append(f" MEDIUM: {medium}") lines.append(f" LOW: {low}") lines.append("") # Issues by severity if result.issues: for severity in ['CRITICAL', 'HIGH', 'MEDIUM', 'LOW']: severity_issues = [i for i in result.issues if i.severity == severity] if severity_issues: icon = {'CRITICAL': '!!!', 'HIGH': '!!', 'MEDIUM': '!', 'LOW': '.'}[severity] lines.append(f"{icon} {severity} SEVERITY ISSUES ({len(severity_issues)})") lines.append("-" * 80) for idx, issue in enumerate(severity_issues, 1): lines.append(f" [{idx}] {issue.title}") lines.append(f" Category: {issue.category}") if issue.file_path: loc = f"{issue.file_path}:{issue.line_number}" if issue.line_number else issue.file_path lines.append(f" Location: {loc}") if issue.code_snippet: lines.append(f" Code: {issue.code_snippet[:60]}...") if issue.cwe_id: lines.append(f" CWE: {issue.cwe_id}") if issue.owasp_category: lines.append(f" OWASP: {issue.owasp_category}") lines.append(f" Fix: {issue.recommendation}") lines.append("") else: lines.append("No security issues found!") lines.append("") # Result lines.append("=" * 80) if critical > 0: lines.append(f" RESULT: CRITICAL ({critical} critical issues require immediate attention)") elif high > 0: lines.append(f" RESULT: FAIL ({high} high severity issues found)") elif medium > 0: lines.append(f" RESULT: WARNING ({medium} medium severity issues found)") elif low > 0: lines.append(f" RESULT: PASS WITH NOTES ({low} low severity issues)") else: lines.append(" RESULT: PASS (no security issues detected)") lines.append("=" * 80) return "\n".join(lines) def main(): parser = argparse.ArgumentParser(description="Security scanner for codebase") parser.add_argument("--project-dir", default=".", help="Project directory to scan") parser.add_argument("--severity", default="LOW", choices=['CRITICAL', 'HIGH', 'MEDIUM', 'LOW'], help="Minimum severity to report") parser.add_argument("--json", action="store_true", help="Output as JSON") parser.add_argument("--strict", action="store_true", help="Fail on any HIGH or above") args = parser.parse_args() result = run_scan(args.project_dir, args.severity) format_type = 'json' if args.json else 'text' print(format_report(result, format_type)) # Exit code critical = len([i for i in result.issues if i.severity == 'CRITICAL']) high = len([i for i in result.issues if i.severity == 'HIGH']) if critical > 0: return 2 # Critical issues if args.strict and high > 0: return 1 # High issues in strict mode return 0 if __name__ == "__main__": sys.exit(main())