project-standalo-sonic-cloud/skills/guardrail-orchestrator/scripts/validate_bash.py

272 lines
7.0 KiB
Python

#!/usr/bin/env python3
"""
Bash command validator for guardrail enforcement.
Blocks shell commands that could write files outside the workflow.
Exit codes:
0 = Command allowed
1 = Command blocked (with message)
"""
import argparse
import re
import sys
from pathlib import Path
# Patterns that indicate file writing
WRITE_PATTERNS = [
# Redirections
r'\s*>\s*["\']?([^"\'&|;\s]+)', # > file
r'\s*>>\s*["\']?([^"\'&|;\s]+)', # >> file
r'\s*2>\s*["\']?([^"\'&|;\s]+)', # 2> file
r'\s*&>\s*["\']?([^"\'&|;\s]+)', # &> file
# tee command
r'\btee\s+(?:-a\s+)?["\']?([^"\'&|;\s]+)',
# Direct file creation
r'\btouch\s+["\']?([^"\'&|;\s]+)',
# Copy/Move operations
r'\bcp\s+.*\s+["\']?([^"\'&|;\s]+)',
r'\bmv\s+.*\s+["\']?([^"\'&|;\s]+)',
# In-place editing
r'\bsed\s+-i',
r'\bawk\s+-i\s+inplace',
r'\bperl\s+-i',
# Here documents
r'<<\s*["\']?EOF',
r'<<\s*["\']?END',
r"cat\s*<<",
# mkdir (could be prep for writing)
r'\bmkdir\s+(?:-p\s+)?["\']?([^"\'&|;\s]+)',
# rm (destructive)
r'\brm\s+(?:-rf?\s+)?["\']?([^"\'&|;\s]+)',
# chmod/chown
r'\bchmod\s+',
r'\bchown\s+',
# curl/wget writing to file
r'\bcurl\s+.*-o\s+["\']?([^"\'&|;\s]+)',
r'\bwget\s+.*-O\s+["\']?([^"\'&|;\s]+)',
# git operations that modify files
r'\bgit\s+checkout\s+',
r'\bgit\s+reset\s+--hard',
r'\bgit\s+clean\s+',
r'\bgit\s+stash\s+pop',
# npm/yarn install (modifies node_modules)
r'\bnpm\s+install\b',
r'\byarn\s+add\b',
r'\bpnpm\s+add\b',
# dd command
r'\bdd\s+',
# patch command
r'\bpatch\s+',
# ln (symlinks)
r'\bln\s+',
]
# Commands that are always allowed
ALWAYS_ALLOWED = [
r'^ls\b',
r'^cat\s+[^>]+$', # cat without redirect
r'^head\b',
r'^tail\b',
r'^grep\b',
r'^find\b',
r'^wc\b',
r'^echo\s+[^>]+$', # echo without redirect
r'^pwd$',
r'^cd\b',
r'^which\b',
r'^type\b',
r'^file\b',
r'^stat\b',
r'^du\b',
r'^df\b',
r'^ps\b',
r'^env$',
r'^printenv',
r'^date$',
r'^whoami$',
r'^hostname$',
r'^uname\b',
r'^git\s+status',
r'^git\s+log',
r'^git\s+diff',
r'^git\s+branch',
r'^git\s+show',
r'^git\s+remote',
r'^npm\s+run\b',
r'^npm\s+test\b',
r'^npm\s+start\b',
r'^npx\b',
r'^node\b',
r'^python3?\b.*(?!.*>)', # python without redirect
r'^pip\s+list',
r'^pip\s+show',
r'^tree\b',
r'^jq\b',
r'^curl\s+(?!.*-o)', # curl without -o
r'^wget\s+(?!.*-O)', # wget without -O
]
# Paths that are always allowed for writing
ALLOWED_PATHS = [
'.workflow/',
'.claude/',
'skills/',
'project_manifest.json',
'/tmp/',
'/var/tmp/',
'node_modules/', # npm install
'.git/', # git operations
]
def is_always_allowed(command: str) -> bool:
"""Check if command matches always-allowed patterns."""
command = command.strip()
for pattern in ALWAYS_ALLOWED:
if re.match(pattern, command, re.IGNORECASE):
return True
return False
def extract_target_paths(command: str) -> list:
"""Extract potential file paths being written to."""
paths = []
for pattern in WRITE_PATTERNS:
matches = re.findall(pattern, command)
for match in matches:
if isinstance(match, tuple):
paths.extend(match)
elif match:
paths.append(match)
return [p for p in paths if p and not p.startswith('-')]
def is_path_allowed(path: str) -> bool:
"""Check if path is in allowed list."""
path = path.lstrip('./')
for allowed in ALLOWED_PATHS:
if path.startswith(allowed) or path == allowed.rstrip('/'):
return True
return False
def has_write_operation(command: str) -> tuple[bool, list]:
"""
Check if command contains write operations.
Returns (has_write, target_paths)
"""
for pattern in WRITE_PATTERNS:
if re.search(pattern, command, re.IGNORECASE):
paths = extract_target_paths(command)
return True, paths
return False, []
def validate_bash_command(command: str) -> tuple[bool, str]:
"""
Validate a bash command for guardrail compliance.
Returns (allowed, message)
"""
if not command or not command.strip():
return True, "✓ GUARDRAIL: Empty command"
command = command.strip()
# Check if always allowed
if is_always_allowed(command):
return True, f"✓ GUARDRAIL: Safe command allowed"
# Check for write operations
has_write, target_paths = has_write_operation(command)
if not has_write:
return True, f"✓ GUARDRAIL: No write operations detected"
# Check if all target paths are allowed
blocked_paths = []
for path in target_paths:
if not is_path_allowed(path):
blocked_paths.append(path)
if not blocked_paths:
return True, f"✓ GUARDRAIL: Write to allowed paths"
# Block the command
suggested_feature = f"modify files via bash"
error_msg = f"""
⛔ GUARDRAIL VIOLATION: Bash command blocked
Command: {command[:100]}{'...' if len(command) > 100 else ''}
Detected write operation to unauthorized paths:
{chr(10).join(f' - {p}' for p in blocked_paths)}
╔══════════════════════════════════════════════════════════════════╗
║ 👉 REQUIRED ACTION: Use the workflow instead of bash ║
║ ║
║ Run this command: ║
║ /workflow:spawn {suggested_feature}
║ ║
║ Then use Write/Edit tools (not bash) to modify files. ║
║ ║
║ Bash is for reading/running, not writing files. ║
╚══════════════════════════════════════════════════════════════════╝
Allowed bash write targets:
- .workflow/*, .claude/*, skills/*
- project_manifest.json
- /tmp/*, node_modules/
"""
return False, error_msg
def main():
parser = argparse.ArgumentParser(description="Validate bash command for guardrails")
parser.add_argument("--command", help="Bash command to validate")
args = parser.parse_args()
command = args.command or ""
# Also try reading from stdin if no command provided
if not command and not sys.stdin.isatty():
command = sys.stdin.read().strip()
allowed, message = validate_bash_command(command)
if allowed:
print(message)
return 0
else:
print(message, file=sys.stderr)
return 1
if __name__ == "__main__":
sys.exit(main())