project-standalo-sonic-cloud/skills/guardrail-orchestrator/scripts/security_scan.py

602 lines
25 KiB
Python

#!/usr/bin/env python3
"""
Comprehensive Security Scanner for guardrail workflow.
Performs static security analysis on codebase:
- Hardcoded secrets and credentials
- SQL injection vulnerabilities
- XSS vulnerabilities
- Path traversal risks
- Insecure dependencies
- Authentication/Authorization issues
- OWASP Top 10 patterns
Usage:
python3 security_scan.py --project-dir . [--severity critical|high|medium|low]
"""
import argparse
import json
import os
import re
import sys
from pathlib import Path
from typing import NamedTuple
from dataclasses import dataclass, field
@dataclass
class SecurityIssue:
"""Security vulnerability finding."""
severity: str # CRITICAL, HIGH, MEDIUM, LOW, INFO
category: str
title: str
description: str
file_path: str
line_number: int | None
code_snippet: str
recommendation: str
cwe_id: str | None = None
owasp_category: str | None = None
@dataclass
class ScanResult:
"""Complete scan results."""
issues: list[SecurityIssue] = field(default_factory=list)
files_scanned: int = 0
scan_duration: float = 0.0
# Security patterns organized by category
SECURITY_PATTERNS = {
'hardcoded_secrets': {
'severity': 'CRITICAL',
'cwe': 'CWE-798',
'owasp': 'A07:2021-Identification and Authentication Failures',
'patterns': [
# API Keys
(r'''(?:api[_-]?key|apikey)\s*[:=]\s*['"]((?!process\.env)[^'"]{10,})['"']''', 'Hardcoded API key'),
(r'''(?:api[_-]?secret|apisecret)\s*[:=]\s*['"]((?!process\.env)[^'"]{10,})['"']''', 'Hardcoded API secret'),
# Passwords
(r'''(?:password|passwd|pwd)\s*[:=]\s*['"]([^'"]{4,})['"']''', 'Hardcoded password'),
# Private keys
(r'''-----BEGIN (?:RSA |EC |DSA )?PRIVATE KEY-----''', 'Embedded private key'),
# AWS credentials
(r'''(?:aws[_-]?access[_-]?key[_-]?id|aws[_-]?secret)\s*[:=]\s*['"]([A-Z0-9]{16,})['"']''', 'AWS credential'),
(r'''AKIA[0-9A-Z]{16}''', 'AWS Access Key ID'),
# JWT secrets
(r'''(?:jwt[_-]?secret|token[_-]?secret)\s*[:=]\s*['"]([^'"]{8,})['"']''', 'Hardcoded JWT secret'),
# Database connection strings
(r'''(?:mongodb|postgres|mysql|redis):\/\/[^:]+:[^@]+@''', 'Database credentials in connection string'),
# Generic secrets
(r'''(?:secret|token|auth)[_-]?(?:key)?\s*[:=]\s*['"]([^'"]{8,})['"']''', 'Potential hardcoded secret'),
]
},
'sql_injection': {
'severity': 'CRITICAL',
'cwe': 'CWE-89',
'owasp': 'A03:2021-Injection',
'patterns': [
# String concatenation in queries
(r'''(?:query|sql|execute)\s*\(\s*[`'"].*\$\{''', 'SQL injection via template literal'),
(r'''(?:query|sql|execute)\s*\(\s*['"].*\+\s*(?:req\.|params\.|body\.|query\.)''', 'SQL injection via concatenation'),
(r'''(?:SELECT|INSERT|UPDATE|DELETE|FROM|WHERE).*\$\{''', 'Raw SQL with template interpolation'),
# Raw queries
(r'''\.raw\s*\(\s*[`'"].*\$\{''', 'Raw query with interpolation'),
(r'''prisma\.\$queryRaw\s*`[^`]*\$\{''', 'Prisma raw query with interpolation'),
]
},
'xss': {
'severity': 'HIGH',
'cwe': 'CWE-79',
'owasp': 'A03:2021-Injection',
'patterns': [
# React dangerouslySetInnerHTML
(r'''dangerouslySetInnerHTML\s*=\s*\{\s*\{__html:\s*(?!DOMPurify|sanitize)''', 'Unsanitized dangerouslySetInnerHTML'),
# innerHTML assignment
(r'''\.innerHTML\s*=\s*(?!['"`]<)''', 'Direct innerHTML assignment'),
# document.write
(r'''document\.write\s*\(''', 'document.write usage'),
# eval with user input
(r'''eval\s*\(\s*(?:req\.|params\.|body\.|query\.|props\.)''', 'eval with user input'),
# jQuery html() with user input
(r'''\$\([^)]+\)\.html\s*\(\s*(?!['"`])''', 'jQuery html() with dynamic content'),
]
},
'path_traversal': {
'severity': 'HIGH',
'cwe': 'CWE-22',
'owasp': 'A01:2021-Broken Access Control',
'patterns': [
# File operations with user input
(r'''(?:readFile|writeFile|readFileSync|writeFileSync|createReadStream)\s*\(\s*(?:req\.|params\.|body\.|query\.)''', 'File operation with user input'),
(r'''(?:readFile|writeFile)\s*\(\s*[`'"].*\$\{(?:req\.|params\.|body\.|query\.)''', 'File path with user input interpolation'),
# Path.join with user input (without validation)
(r'''path\.(?:join|resolve)\s*\([^)]*(?:req\.|params\.|body\.|query\.)''', 'Path operation with user input'),
]
},
'command_injection': {
'severity': 'CRITICAL',
'cwe': 'CWE-78',
'owasp': 'A03:2021-Injection',
'patterns': [
# exec/spawn with user input
(r'''(?:exec|execSync|spawn|spawnSync)\s*\(\s*[`'"].*\$\{''', 'Command injection via template literal'),
(r'''(?:exec|execSync|spawn|spawnSync)\s*\(\s*(?:req\.|params\.|body\.|query\.)''', 'Command execution with user input'),
# child_process with concatenation
(r'''child_process.*\(\s*['"].*\+\s*(?:req\.|params\.|body\.|query\.)''', 'Command injection via concatenation'),
]
},
'insecure_auth': {
'severity': 'HIGH',
'cwe': 'CWE-287',
'owasp': 'A07:2021-Identification and Authentication Failures',
'patterns': [
# Weak JWT algorithms
(r'''algorithm\s*[:=]\s*['"](?:none|HS256)['"']''', 'Weak JWT algorithm'),
# No password hashing
(r'''password\s*===?\s*(?:req\.|body\.|params\.)''', 'Plain text password comparison'),
# Disabled security
(r'''(?:verify|secure|https|ssl)\s*[:=]\s*false''', 'Security feature disabled'),
# Cookie without security flags
(r'''cookie\s*\([^)]*\)\s*(?!.*(?:httpOnly|secure|sameSite))''', 'Cookie without security flags'),
]
},
'sensitive_data_exposure': {
'severity': 'MEDIUM',
'cwe': 'CWE-200',
'owasp': 'A02:2021-Cryptographic Failures',
'patterns': [
# Logging sensitive data
(r'''console\.(?:log|info|debug)\s*\([^)]*(?:password|secret|token|key|credential)''', 'Logging sensitive data'),
# Error messages with sensitive info
(r'''(?:throw|Error)\s*\([^)]*(?:password|secret|token|key|sql|query)''', 'Sensitive info in error message'),
# HTTP instead of HTTPS
(r'''['"]http:\/\/(?!localhost|127\.0\.0\.1)''', 'HTTP URL (should be HTTPS)'),
]
},
'insecure_dependencies': {
'severity': 'MEDIUM',
'cwe': 'CWE-1104',
'owasp': 'A06:2021-Vulnerable and Outdated Components',
'patterns': [
# Known vulnerable patterns
(r'''require\s*\(\s*['"](?:serialize-javascript|lodash\.template|node-serialize)['"]\s*\)''', 'Known vulnerable package'),
# Outdated crypto
(r'''crypto\.createCipher\s*\(''', 'Deprecated crypto.createCipher'),
(r'''md5\s*\(|createHash\s*\(\s*['"]md5['"]''', 'MD5 hash usage (weak)'),
(r'''sha1\s*\(|createHash\s*\(\s*['"]sha1['"]''', 'SHA1 hash usage (weak)'),
]
},
'cors_misconfiguration': {
'severity': 'MEDIUM',
'cwe': 'CWE-942',
'owasp': 'A01:2021-Broken Access Control',
'patterns': [
# Wildcard CORS
(r'''(?:Access-Control-Allow-Origin|origin)\s*[:=]\s*['"]\*['"']''', 'Wildcard CORS origin'),
(r'''cors\s*\(\s*\{[^}]*origin\s*:\s*true''', 'CORS allows all origins'),
# Credentials with wildcard
(r'''credentials\s*:\s*true[^}]*origin\s*:\s*['"]\*['"']''', 'CORS credentials with wildcard origin'),
]
},
'insecure_randomness': {
'severity': 'LOW',
'cwe': 'CWE-330',
'owasp': 'A02:2021-Cryptographic Failures',
'patterns': [
# Math.random for security
(r'''Math\.random\s*\(\s*\)[^;]*(?:token|secret|password|key|id|session)''', 'Math.random for security-sensitive value'),
(r'''(?:token|secret|key|session)[^=]*=\s*Math\.random''', 'Math.random for security-sensitive value'),
]
},
'debug_code': {
'severity': 'LOW',
'cwe': 'CWE-489',
'owasp': 'A05:2021-Security Misconfiguration',
'patterns': [
# Debug statements
(r'''console\.(?:log|debug|info|warn)\s*\(''', 'Console statement (remove in production)'),
(r'''debugger\s*;''', 'Debugger statement'),
# TODO/FIXME security notes
(r'''(?:TODO|FIXME|HACK|XXX).*(?:security|auth|password|secret|vulnerable)''', 'Security-related TODO'),
]
},
'nosql_injection': {
'severity': 'HIGH',
'cwe': 'CWE-943',
'owasp': 'A03:2021-Injection',
'patterns': [
# MongoDB injection
(r'''\.find\s*\(\s*\{[^}]*\$(?:where|regex|gt|lt|ne|in|nin|or|and).*(?:req\.|params\.|body\.|query\.)''', 'NoSQL injection risk'),
(r'''\.find\s*\(\s*(?:req\.|params\.|body\.|query\.)''', 'Direct user input in query'),
]
},
'prototype_pollution': {
'severity': 'HIGH',
'cwe': 'CWE-1321',
'owasp': 'A03:2021-Injection',
'patterns': [
# Deep merge without protection
(r'''(?:merge|extend|assign)\s*\([^)]*(?:req\.|params\.|body\.|query\.)''', 'Potential prototype pollution via merge'),
(r'''Object\.assign\s*\(\s*\{\}[^)]*(?:req\.|params\.|body\.|query\.)''', 'Object.assign with user input'),
# __proto__ access
(r'''__proto__''', 'Direct __proto__ access'),
(r'''constructor\s*\[\s*['"]prototype['"]''', 'Prototype access via constructor'),
]
},
'ssrf': {
'severity': 'HIGH',
'cwe': 'CWE-918',
'owasp': 'A10:2021-Server-Side Request Forgery',
'patterns': [
# Fetch/axios with user URL
(r'''(?:fetch|axios\.get|axios\.post|http\.get|https\.get)\s*\(\s*(?:req\.|params\.|body\.|query\.)''', 'SSRF via user-controlled URL'),
(r'''(?:fetch|axios)\s*\(\s*[`'"].*\$\{(?:req\.|params\.|body\.|query\.)''', 'SSRF via URL interpolation'),
]
},
}
# File extensions to scan
SCAN_EXTENSIONS = {'.ts', '.tsx', '.js', '.jsx', '.mjs', '.cjs'}
# Directories to skip
SKIP_DIRS = {'node_modules', '.next', 'dist', 'build', '.git', 'coverage', '__pycache__'}
def find_source_files(project_dir: str) -> list[str]:
"""Find all source files to scan."""
files = []
for root, dirs, filenames in os.walk(project_dir):
# Skip excluded directories
dirs[:] = [d for d in dirs if d not in SKIP_DIRS]
for filename in filenames:
ext = os.path.splitext(filename)[1]
if ext in SCAN_EXTENSIONS:
files.append(os.path.join(root, filename))
return files
def scan_file(file_path: str) -> list[SecurityIssue]:
"""Scan a single file for security issues."""
issues = []
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
content = f.read()
lines = content.split('\n')
except (IOError, OSError):
return []
for category, config in SECURITY_PATTERNS.items():
for pattern, title in config['patterns']:
try:
for match in re.finditer(pattern, content, re.IGNORECASE | re.MULTILINE):
# Find line number
line_start = content[:match.start()].count('\n') + 1
line_content = lines[line_start - 1] if line_start <= len(lines) else ''
# Skip if in comment
stripped = line_content.strip()
if stripped.startswith('//') or stripped.startswith('*') or stripped.startswith('/*'):
continue
# Skip if looks like env var reference
if 'process.env' in line_content or 'import.meta.env' in line_content:
continue
issues.append(SecurityIssue(
severity=config['severity'],
category=category,
title=title,
description=get_description(category),
file_path=file_path,
line_number=line_start,
code_snippet=line_content.strip()[:100],
recommendation=get_recommendation(category),
cwe_id=config.get('cwe'),
owasp_category=config.get('owasp')
))
except re.error:
continue
return issues
def get_description(category: str) -> str:
"""Get detailed description for category."""
descriptions = {
'hardcoded_secrets': 'Credentials or secrets hardcoded in source code can be extracted by attackers.',
'sql_injection': 'User input directly in SQL queries allows attackers to manipulate database operations.',
'xss': 'Unsanitized user input rendered in HTML allows attackers to inject malicious scripts.',
'path_traversal': 'User input in file paths allows attackers to access arbitrary files.',
'command_injection': 'User input in system commands allows attackers to execute arbitrary commands.',
'insecure_auth': 'Weak authentication mechanisms can be bypassed by attackers.',
'sensitive_data_exposure': 'Sensitive information may be exposed through logs or errors.',
'insecure_dependencies': 'Known vulnerable packages or weak cryptographic functions.',
'cors_misconfiguration': 'Overly permissive CORS allows unauthorized cross-origin requests.',
'insecure_randomness': 'Predictable random values can be guessed by attackers.',
'debug_code': 'Debug code in production may expose sensitive information.',
'nosql_injection': 'User input in NoSQL queries allows attackers to manipulate database operations.',
'prototype_pollution': 'Modifying object prototypes can lead to code execution.',
'ssrf': 'User-controlled URLs allow attackers to make requests to internal services.',
}
return descriptions.get(category, 'Security vulnerability detected.')
def get_recommendation(category: str) -> str:
"""Get remediation recommendation for category."""
recommendations = {
'hardcoded_secrets': 'Use environment variables (process.env) or a secrets manager.',
'sql_injection': 'Use parameterized queries or ORM methods. Never concatenate user input.',
'xss': 'Sanitize user input with DOMPurify or escape HTML entities.',
'path_traversal': 'Validate and sanitize file paths. Use path.basename() and whitelist allowed paths.',
'command_injection': 'Avoid shell commands with user input. Use execFile with argument arrays.',
'insecure_auth': 'Use strong algorithms (RS256), hash passwords with bcrypt, enable all security flags.',
'sensitive_data_exposure': 'Remove sensitive data from logs. Use generic error messages.',
'insecure_dependencies': 'Update to latest secure versions. Use crypto.createCipheriv and SHA-256+.',
'cors_misconfiguration': 'Specify exact allowed origins. Do not use wildcard with credentials.',
'insecure_randomness': 'Use crypto.randomBytes() or crypto.randomUUID() for security-sensitive values.',
'debug_code': 'Remove console statements and debugger in production builds.',
'nosql_injection': 'Sanitize input and use schema validation. Avoid $where operators.',
'prototype_pollution': 'Use Object.create(null) or validate/sanitize object keys.',
'ssrf': 'Validate URLs against allowlist. Block internal IP ranges.',
}
return recommendations.get(category, 'Review and remediate the security issue.')
def check_package_json(project_dir: str) -> list[SecurityIssue]:
"""Check package.json for security issues."""
issues = []
pkg_path = os.path.join(project_dir, 'package.json')
if not os.path.exists(pkg_path):
return []
try:
with open(pkg_path, 'r') as f:
pkg = json.load(f)
except (json.JSONDecodeError, IOError):
return []
# Known vulnerable packages (simplified check)
vulnerable_packages = {
'lodash': '< 4.17.21',
'axios': '< 0.21.1',
'node-fetch': '< 2.6.1',
'minimist': '< 1.2.6',
'serialize-javascript': '< 3.1.0',
}
all_deps = {}
all_deps.update(pkg.get('dependencies', {}))
all_deps.update(pkg.get('devDependencies', {}))
for pkg_name in vulnerable_packages:
if pkg_name in all_deps:
issues.append(SecurityIssue(
severity='MEDIUM',
category='insecure_dependencies',
title=f'Potentially vulnerable package: {pkg_name}',
description=f'Package {pkg_name} may have known vulnerabilities. Run npm audit for details.',
file_path=pkg_path,
line_number=None,
code_snippet=f'"{pkg_name}": "{all_deps[pkg_name]}"',
recommendation='Run `npm audit` and update to the latest secure version.',
cwe_id='CWE-1104',
owasp_category='A06:2021-Vulnerable and Outdated Components'
))
return issues
def check_env_files(project_dir: str) -> list[SecurityIssue]:
"""Check for exposed environment files."""
issues = []
env_files = ['.env', '.env.local', '.env.production', '.env.development']
for env_file in env_files:
env_path = os.path.join(project_dir, env_file)
if os.path.exists(env_path):
# Check if in .gitignore
gitignore_path = os.path.join(project_dir, '.gitignore')
in_gitignore = False
if os.path.exists(gitignore_path):
try:
with open(gitignore_path, 'r') as f:
gitignore_content = f.read()
if env_file in gitignore_content or '.env*' in gitignore_content:
in_gitignore = True
except IOError:
pass
if not in_gitignore:
issues.append(SecurityIssue(
severity='HIGH',
category='sensitive_data_exposure',
title=f'Environment file not in .gitignore: {env_file}',
description='Environment files containing secrets may be committed to version control.',
file_path=env_path,
line_number=None,
code_snippet=env_file,
recommendation=f'Add {env_file} to .gitignore immediately.',
cwe_id='CWE-200',
owasp_category='A02:2021-Cryptographic Failures'
))
return issues
def run_scan(project_dir: str, min_severity: str = 'LOW') -> ScanResult:
"""Run full security scan."""
import time
start_time = time.time()
severity_order = ['CRITICAL', 'HIGH', 'MEDIUM', 'LOW', 'INFO']
min_severity_index = severity_order.index(min_severity.upper()) if min_severity.upper() in severity_order else 3
result = ScanResult()
# Find and scan source files
files = find_source_files(project_dir)
result.files_scanned = len(files)
for file_path in files:
issues = scan_file(file_path)
result.issues.extend(issues)
# Additional checks
result.issues.extend(check_package_json(project_dir))
result.issues.extend(check_env_files(project_dir))
# Filter by severity
result.issues = [
i for i in result.issues
if severity_order.index(i.severity) <= min_severity_index
]
# Sort by severity
result.issues.sort(key=lambda x: severity_order.index(x.severity))
result.scan_duration = time.time() - start_time
return result
def format_report(result: ScanResult, format_type: str = 'text') -> str:
"""Format scan results."""
if format_type == 'json':
return json.dumps({
'files_scanned': result.files_scanned,
'scan_duration': result.scan_duration,
'total_issues': len(result.issues),
'by_severity': {
'CRITICAL': len([i for i in result.issues if i.severity == 'CRITICAL']),
'HIGH': len([i for i in result.issues if i.severity == 'HIGH']),
'MEDIUM': len([i for i in result.issues if i.severity == 'MEDIUM']),
'LOW': len([i for i in result.issues if i.severity == 'LOW']),
},
'issues': [
{
'severity': i.severity,
'category': i.category,
'title': i.title,
'description': i.description,
'file_path': i.file_path,
'line_number': i.line_number,
'code_snippet': i.code_snippet,
'recommendation': i.recommendation,
'cwe_id': i.cwe_id,
'owasp_category': i.owasp_category,
}
for i in result.issues
]
}, indent=2)
# Text format
lines = []
# Header
lines.append("")
lines.append("=" * 80)
lines.append(" SECURITY SCAN REPORT")
lines.append("=" * 80)
lines.append("")
# Summary
critical = len([i for i in result.issues if i.severity == 'CRITICAL'])
high = len([i for i in result.issues if i.severity == 'HIGH'])
medium = len([i for i in result.issues if i.severity == 'MEDIUM'])
low = len([i for i in result.issues if i.severity == 'LOW'])
lines.append("SUMMARY")
lines.append("-" * 80)
lines.append(f" Files scanned: {result.files_scanned}")
lines.append(f" Scan duration: {result.scan_duration:.2f}s")
lines.append(f" Total issues: {len(result.issues)}")
lines.append("")
lines.append(" By Severity:")
lines.append(f" CRITICAL: {critical}")
lines.append(f" HIGH: {high}")
lines.append(f" MEDIUM: {medium}")
lines.append(f" LOW: {low}")
lines.append("")
# Issues by severity
if result.issues:
for severity in ['CRITICAL', 'HIGH', 'MEDIUM', 'LOW']:
severity_issues = [i for i in result.issues if i.severity == severity]
if severity_issues:
icon = {'CRITICAL': '!!!', 'HIGH': '!!', 'MEDIUM': '!', 'LOW': '.'}[severity]
lines.append(f"{icon} {severity} SEVERITY ISSUES ({len(severity_issues)})")
lines.append("-" * 80)
for idx, issue in enumerate(severity_issues, 1):
lines.append(f" [{idx}] {issue.title}")
lines.append(f" Category: {issue.category}")
if issue.file_path:
loc = f"{issue.file_path}:{issue.line_number}" if issue.line_number else issue.file_path
lines.append(f" Location: {loc}")
if issue.code_snippet:
lines.append(f" Code: {issue.code_snippet[:60]}...")
if issue.cwe_id:
lines.append(f" CWE: {issue.cwe_id}")
if issue.owasp_category:
lines.append(f" OWASP: {issue.owasp_category}")
lines.append(f" Fix: {issue.recommendation}")
lines.append("")
else:
lines.append("No security issues found!")
lines.append("")
# Result
lines.append("=" * 80)
if critical > 0:
lines.append(f" RESULT: CRITICAL ({critical} critical issues require immediate attention)")
elif high > 0:
lines.append(f" RESULT: FAIL ({high} high severity issues found)")
elif medium > 0:
lines.append(f" RESULT: WARNING ({medium} medium severity issues found)")
elif low > 0:
lines.append(f" RESULT: PASS WITH NOTES ({low} low severity issues)")
else:
lines.append(" RESULT: PASS (no security issues detected)")
lines.append("=" * 80)
return "\n".join(lines)
def main():
parser = argparse.ArgumentParser(description="Security scanner for codebase")
parser.add_argument("--project-dir", default=".", help="Project directory to scan")
parser.add_argument("--severity", default="LOW",
choices=['CRITICAL', 'HIGH', 'MEDIUM', 'LOW'],
help="Minimum severity to report")
parser.add_argument("--json", action="store_true", help="Output as JSON")
parser.add_argument("--strict", action="store_true", help="Fail on any HIGH or above")
args = parser.parse_args()
result = run_scan(args.project_dir, args.severity)
format_type = 'json' if args.json else 'text'
print(format_report(result, format_type))
# Exit code
critical = len([i for i in result.issues if i.severity == 'CRITICAL'])
high = len([i for i in result.issues if i.severity == 'HIGH'])
if critical > 0:
return 2 # Critical issues
if args.strict and high > 0:
return 1 # High issues in strict mode
return 0
if __name__ == "__main__":
sys.exit(main())