project-standalo-sonic-cloud/skills/guardrail-orchestrator/scripts/task_state_manager.py

#!/usr/bin/env python3
"""
Task State Manager for parallel execution and dependency tracking.

Manages task-level states independently from workflow phase, enabling:
- Multiple tasks in_progress simultaneously (if no blocking dependencies)
- Dependency validation before task execution
- Task grouping by agent type for parallel frontend/backend work
"""

import argparse
import json
import os
import sys
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional, Set, Tuple

# Try to import yaml
try:
    import yaml
    HAS_YAML = True
except ImportError:
    HAS_YAML = False


# ============================================================================
# YAML Helpers
# ============================================================================

def load_yaml(filepath: str) -> dict:
    """Load YAML file."""
    if not os.path.exists(filepath):
        return {}
    with open(filepath, 'r') as f:
        content = f.read()
    if not content.strip():
        return {}
    if HAS_YAML:
        return yaml.safe_load(content) or {}
    return parse_simple_yaml(content)


def parse_simple_yaml(content: str) -> dict:
    """Parse simple YAML without PyYAML dependency."""
    result = {}
    current_key = None
    current_list = None

    for line in content.split('\n'):
        stripped = line.strip()

        if not stripped or stripped.startswith('#'):
            continue

        if stripped.startswith('- '):
            if current_list is not None:
                value = stripped[2:].strip()
                if (value.startswith('"') and value.endswith('"')) or \
                   (value.startswith("'") and value.endswith("'")):
                    value = value[1:-1]
                current_list.append(value)
            continue

        if ':' in stripped:
            key, _, value = stripped.partition(':')
            key = key.strip()
            value = value.strip()

            if value == '' or value == '[]':
                current_key = key
                current_list = []
                result[key] = current_list
            elif value == '{}':
                result[key] = {}
                current_list = None
            elif value == 'null' or value == '~':
                result[key] = None
                current_list = None
            elif value == 'true':
                result[key] = True
                current_list = None
            elif value == 'false':
                result[key] = False
                current_list = None
            elif value.isdigit():
                result[key] = int(value)
                current_list = None
            else:
                if (value.startswith('"') and value.endswith('"')) or \
                   (value.startswith("'") and value.endswith("'")):
                    value = value[1:-1]
                result[key] = value
                current_list = None

    return result


def save_yaml(filepath: str, data: dict):
    """Save data to YAML file."""
    os.makedirs(os.path.dirname(filepath), exist_ok=True)
    if HAS_YAML:
        with open(filepath, 'w') as f:
            yaml.dump(data, f, default_flow_style=False, sort_keys=False, allow_unicode=True)
    else:
        with open(filepath, 'w') as f:
            json.dump(data, f, indent=2)


# ============================================================================
# Path Helpers
# ============================================================================

def get_workflow_dir() -> Path:
    return Path('.workflow')


def get_current_state_path() -> Path:
    return get_workflow_dir() / 'current.yml'


def get_active_version() -> Optional[str]:
    """Get the currently active workflow version."""
    current_path = get_current_state_path()
    if not current_path.exists():
        return None
    current = load_yaml(str(current_path))
    return current.get('active_version')


def get_tasks_dir() -> Optional[Path]:
    """Get the tasks directory for the active version."""
    version = get_active_version()
    if not version:
        return None
    tasks_dir = get_workflow_dir() / 'versions' / version / 'tasks'
    tasks_dir.mkdir(parents=True, exist_ok=True)
    return tasks_dir


# ============================================================================
# Task State Constants
# ============================================================================

TASK_STATES = ['pending', 'in_progress', 'review', 'approved', 'completed', 'blocked']

VALID_TASK_TRANSITIONS = {
    'pending': ['in_progress', 'blocked'],
    'in_progress': ['review', 'blocked', 'pending'],  # Can go back if paused
    'review': ['approved', 'in_progress'],  # Can go back if changes needed
    'approved': ['completed'],
    'completed': [],  # Terminal state
    'blocked': ['pending']  # Unblocked when dependencies resolve
}


# ============================================================================
# Task Loading
# ============================================================================

def load_all_tasks() -> Dict[str, dict]:
    """Load all tasks from the current version's tasks directory."""
    tasks_dir = get_tasks_dir()
    if not tasks_dir or not tasks_dir.exists():
        return {}

    tasks = {}
    for task_file in tasks_dir.glob('*.yml'):
        task_id = task_file.stem
        task = load_yaml(str(task_file))
        if task:
            tasks[task_id] = task

    return tasks


def load_task(task_id: str) -> Optional[dict]:
    """Load a single task by ID."""
    tasks_dir = get_tasks_dir()
    if not tasks_dir:
        return None

    task_path = tasks_dir / f"{task_id}.yml"
    if not task_path.exists():
        return None

    return load_yaml(str(task_path))


def save_task(task: dict):
    """Save a task to the tasks directory."""
    tasks_dir = get_tasks_dir()
    if not tasks_dir:
        print("Error: No active workflow")
        return

    task_id = task.get('id', task.get('task_id'))
    if not task_id:
        print("Error: Task has no ID")
        return

    task['updated_at'] = datetime.now().isoformat()
    save_yaml(str(tasks_dir / f"{task_id}.yml"), task)


# ============================================================================
# Dependency Resolution
# ============================================================================

def get_task_dependencies(task: dict) -> List[str]:
    """Get the list of task IDs that this task depends on."""
    return task.get('dependencies', []) or []


def check_dependencies_met(task_id: str, all_tasks: Dict[str, dict]) -> Tuple[bool, List[str]]:
    """
    Check if all dependencies for a task are completed.

    Returns:
        Tuple of (all_met, unmet_dependency_ids)
    """
    task = all_tasks.get(task_id)
    if not task:
        return False, [f"Task {task_id} not found"]

    dependencies = get_task_dependencies(task)
    unmet = []

    for dep_id in dependencies:
        dep_task = all_tasks.get(dep_id)
        if not dep_task:
            unmet.append(f"{dep_id} (not found)")
        elif dep_task.get('status') not in ['completed', 'approved']:
            unmet.append(f"{dep_id} (status: {dep_task.get('status', 'unknown')})")

    return len(unmet) == 0, unmet


def get_dependency_graph(all_tasks: Dict[str, dict]) -> Dict[str, Set[str]]:
    """Build a dependency graph for all tasks."""
    graph = {}
    for task_id, task in all_tasks.items():
        deps = get_task_dependencies(task)
        graph[task_id] = set(deps)
    return graph


def detect_circular_dependencies(all_tasks: Dict[str, dict]) -> List[List[str]]:
    """Detect circular dependencies using DFS."""
    graph = get_dependency_graph(all_tasks)
    cycles = []
    visited = set()
    rec_stack = set()

    def dfs(node: str, path: List[str]) -> bool:
        visited.add(node)
        rec_stack.add(node)
        path.append(node)

        for neighbor in graph.get(node, set()):
            if neighbor not in visited:
                if dfs(neighbor, path):
                    return True
            elif neighbor in rec_stack:
                # Found cycle
                cycle_start = path.index(neighbor)
                cycles.append(path[cycle_start:] + [neighbor])
                return True

        path.pop()
        rec_stack.remove(node)
        return False

    for node in graph:
        if node not in visited:
            dfs(node, [])

    return cycles


def get_execution_order(all_tasks: Dict[str, dict]) -> List[str]:
    """Get topologically sorted execution order respecting dependencies."""
    graph = get_dependency_graph(all_tasks)

    # Kahn's algorithm for topological sort
    in_degree = {task_id: 0 for task_id in all_tasks}
    for deps in graph.values():
        for dep in deps:
            if dep in in_degree:
                in_degree[dep] += 1

    queue = [t for t, d in in_degree.items() if d == 0]
    result = []

    while queue:
        node = queue.pop(0)
        result.append(node)

        for other, deps in graph.items():
            if node in deps:
                in_degree[other] -= 1
                if in_degree[other] == 0:
                    queue.append(other)

    # Reverse since we want dependencies first
    return list(reversed(result))


# ============================================================================
# Parallel Execution Support
# ============================================================================

def get_parallel_candidates(all_tasks: Dict[str, dict]) -> Dict[str, List[str]]:
    """
    Get tasks that can be executed in parallel, grouped by agent.

    Returns:
        Dict mapping agent type to list of task IDs ready for parallel execution
    """
    candidates = {'frontend': [], 'backend': [], 'other': []}

    for task_id, task in all_tasks.items():
        status = task.get('status', 'pending')

        # Only consider pending tasks
        if status != 'pending':
            continue

        # Check if dependencies are met
        deps_met, _ = check_dependencies_met(task_id, all_tasks)
        if not deps_met:
            continue

        # Group by agent
        agent = task.get('agent', 'other')
        if agent in candidates:
            candidates[agent].append(task_id)
        else:
            candidates['other'].append(task_id)

    return candidates


def get_active_tasks() -> Dict[str, List[str]]:
    """
    Get currently active (in_progress) tasks grouped by agent.

    Returns:
        Dict mapping agent type to list of active task IDs
    """
    all_tasks = load_all_tasks()
    active = {'frontend': [], 'backend': [], 'other': []}

    for task_id, task in all_tasks.items():
        if task.get('status') == 'in_progress':
            agent = task.get('agent', 'other')
            if agent in active:
                active[agent].append(task_id)
            else:
                active['other'].append(task_id)

    return active


def can_start_task(task_id: str, max_per_agent: int = 1) -> Tuple[bool, str]:
    """
    Check if a task can be started given current active tasks.

    Args:
        task_id: Task to check
        max_per_agent: Maximum concurrent tasks per agent type

    Returns:
        Tuple of (can_start, reason)
    """
    all_tasks = load_all_tasks()
    task = all_tasks.get(task_id)

    if not task:
        return False, f"Task {task_id} not found"

    status = task.get('status', 'pending')
    if status != 'pending':
        return False, f"Task is not pending (status: {status})"

    # Check dependencies
    deps_met, unmet = check_dependencies_met(task_id, all_tasks)
    if not deps_met:
        return False, f"Dependencies not met: {', '.join(unmet)}"

    # Check concurrent task limit per agent
    agent = task.get('agent', 'other')
    active = get_active_tasks()
    if len(active.get(agent, [])) >= max_per_agent:
        return False, f"Max concurrent {agent} tasks reached ({max_per_agent})"

    return True, "Ready to start"


# ============================================================================
# State Transitions
# ============================================================================

def transition_task(task_id: str, new_status: str) -> Tuple[bool, str]:
    """
    Transition a task to a new status with validation.

    Returns:
        Tuple of (success, message)
    """
    task = load_task(task_id)
    if not task:
        return False, f"Task {task_id} not found"

    current_status = task.get('status', 'pending')

    # Validate transition
    valid_next = VALID_TASK_TRANSITIONS.get(current_status, [])
    if new_status not in valid_next:
        return False, f"Invalid transition: {current_status} → {new_status}. Valid: {valid_next}"

    # For in_progress, check dependencies
    if new_status == 'in_progress':
        all_tasks = load_all_tasks()
        deps_met, unmet = check_dependencies_met(task_id, all_tasks)
        if not deps_met:
            # Block instead
            task['status'] = 'blocked'
            task['blocked_by'] = unmet
            task['blocked_at'] = datetime.now().isoformat()
            save_task(task)
            return False, f"Dependencies not met, task blocked: {', '.join(unmet)}"

    # Perform transition
    task['status'] = new_status
    task[f'{new_status}_at'] = datetime.now().isoformat()

    # Clear blocked info if unblocking
    if current_status == 'blocked' and new_status == 'pending':
        task.pop('blocked_by', None)
        task.pop('blocked_at', None)

    save_task(task)
    return True, f"Task {task_id}: {current_status} → {new_status}"


def update_blocked_tasks():
    """Check and unblock tasks whose dependencies are now met."""
    all_tasks = load_all_tasks()
    unblocked = []

    for task_id, task in all_tasks.items():
        if task.get('status') != 'blocked':
            continue

        deps_met, _ = check_dependencies_met(task_id, all_tasks)
        if deps_met:
            success, msg = transition_task(task_id, 'pending')
            if success:
                unblocked.append(task_id)

    return unblocked


# ============================================================================
# Status Report
# ============================================================================

def get_status_summary() -> dict:
    """Get summary of task statuses."""
    all_tasks = load_all_tasks()

    summary = {
        'total': len(all_tasks),
        'by_status': {status: 0 for status in TASK_STATES},
        'by_agent': {},
        'blocked_details': [],
        'ready_for_parallel': get_parallel_candidates(all_tasks)
    }

    for task_id, task in all_tasks.items():
        status = task.get('status', 'pending')
        agent = task.get('agent', 'other')

        summary['by_status'][status] = summary['by_status'].get(status, 0) + 1

        if agent not in summary['by_agent']:
            summary['by_agent'][agent] = {'total': 0, 'by_status': {}}
        summary['by_agent'][agent]['total'] += 1
        summary['by_agent'][agent]['by_status'][status] = \
            summary['by_agent'][agent]['by_status'].get(status, 0) + 1

        if status == 'blocked':
            summary['blocked_details'].append({
                'task_id': task_id,
                'blocked_by': task.get('blocked_by', []),
                'blocked_at': task.get('blocked_at')
            })

    return summary


def show_status():
    """Display task status summary."""
    summary = get_status_summary()

    print()
    print("╔" + "═" * 60 + "╗")
    print("║" + "TASK STATE MANAGER STATUS".center(60) + "║")
    print("╠" + "═" * 60 + "╣")
    print("║" + f"  Total Tasks: {summary['total']}".ljust(60) + "║")
    print("╠" + "═" * 60 + "╣")
    print("║" + "  BY STATUS".ljust(60) + "║")

    status_icons = {
        'pending': '⏳', 'in_progress': '🔄', 'review': '🔍',
        'approved': '✅', 'completed': '✓', 'blocked': '🚫'
    }

    for status, count in summary['by_status'].items():
        icon = status_icons.get(status, '•')
        print("║" + f"    {icon} {status}: {count}".ljust(60) + "║")

    print("╠" + "═" * 60 + "╣")
    print("║" + "  BY AGENT".ljust(60) + "║")

    for agent, data in summary['by_agent'].items():
        print("║" + f"    {agent}: {data['total']} tasks".ljust(60) + "║")
        for status, count in data['by_status'].items():
            if count > 0:
                print("║" + f"      └─ {status}: {count}".ljust(60) + "║")

    # Show parallel candidates
    parallel = summary['ready_for_parallel']
    has_parallel = any(len(v) > 0 for v in parallel.values())

    if has_parallel:
        print("╠" + "═" * 60 + "╣")
        print("║" + "  🔀 READY FOR PARALLEL EXECUTION".ljust(60) + "║")
        for agent, tasks in parallel.items():
            if tasks:
                print("║" + f"    {agent}: {', '.join(tasks[:3])}".ljust(60) + "║")
                if len(tasks) > 3:
                    print("║" + f"      (+{len(tasks) - 3} more)".ljust(60) + "║")

    # Show blocked tasks
    if summary['blocked_details']:
        print("╠" + "═" * 60 + "╣")
        print("║" + "  🚫 BLOCKED TASKS".ljust(60) + "║")
        for blocked in summary['blocked_details'][:5]:
            deps = ', '.join(blocked['blocked_by'][:2])
            if len(blocked['blocked_by']) > 2:
                deps += f" (+{len(blocked['blocked_by']) - 2})"
            print("║" + f"    {blocked['task_id']}".ljust(60) + "║")
            print("║" + f"      Blocked by: {deps}".ljust(60) + "║")

    print("╚" + "═" * 60 + "╝")


# ============================================================================
# CLI Interface
# ============================================================================

def main():
    parser = argparse.ArgumentParser(description="Task state management for parallel execution")
    subparsers = parser.add_subparsers(dest='command', help='Commands')

    # status command
    subparsers.add_parser('status', help='Show task status summary')

    # transition command
    trans_parser = subparsers.add_parser('transition', help='Transition task status')
    trans_parser.add_argument('task_id', help='Task ID')
    trans_parser.add_argument('status', choices=TASK_STATES, help='New status')

    # can-start command
    can_start_parser = subparsers.add_parser('can-start', help='Check if task can start')
    can_start_parser.add_argument('task_id', help='Task ID')
    can_start_parser.add_argument('--max-per-agent', type=int, default=1,
                                   help='Max concurrent tasks per agent')

    # parallel command
    subparsers.add_parser('parallel', help='Show tasks ready for parallel execution')

    # deps command
    deps_parser = subparsers.add_parser('deps', help='Show task dependencies')
    deps_parser.add_argument('task_id', nargs='?', help='Task ID (optional)')

    # check-deps command
    check_deps_parser = subparsers.add_parser('check-deps', help='Check if dependencies are met')
    check_deps_parser.add_argument('task_id', help='Task ID')

    # unblock command
    subparsers.add_parser('unblock', help='Update blocked tasks whose deps are now met')

    # order command
    subparsers.add_parser('order', help='Show execution order respecting dependencies')

    # cycles command
    subparsers.add_parser('cycles', help='Detect circular dependencies')

    args = parser.parse_args()

    if args.command == 'status':
        show_status()

    elif args.command == 'transition':
        success, msg = transition_task(args.task_id, args.status)
        print(msg)
        if not success:
            sys.exit(1)

    elif args.command == 'can-start':
        can_start, reason = can_start_task(args.task_id, args.max_per_agent)
        print(f"{'✅ Yes' if can_start else '❌ No'}: {reason}")
        sys.exit(0 if can_start else 1)

    elif args.command == 'parallel':
        all_tasks = load_all_tasks()
        candidates = get_parallel_candidates(all_tasks)

        print("\n🔀 Tasks Ready for Parallel Execution:\n")
        for agent, tasks in candidates.items():
            if tasks:
                print(f"  {agent}:")
                for task_id in tasks:
                    task = all_tasks.get(task_id, {})
                    print(f"    - {task_id}: {task.get('title', 'No title')}")

        if not any(candidates.values()):
            print("  No tasks ready for parallel execution")

    elif args.command == 'deps':
        all_tasks = load_all_tasks()

        if args.task_id:
            task = all_tasks.get(args.task_id)
            if task:
                deps = get_task_dependencies(task)
                print(f"\n{args.task_id} depends on:")
                if deps:
                    for dep_id in deps:
                        dep = all_tasks.get(dep_id, {})
                        status = dep.get('status', 'unknown')
                        print(f"  - {dep_id} ({status})")
                else:
                    print("  (no dependencies)")
            else:
                print(f"Task {args.task_id} not found")
        else:
            # Show all dependencies
            graph = get_dependency_graph(all_tasks)
            print("\nDependency Graph:\n")
            for task_id, deps in graph.items():
                if deps:
                    print(f"  {task_id} ← {', '.join(deps)}")

    elif args.command == 'check-deps':
        all_tasks = load_all_tasks()
        deps_met, unmet = check_dependencies_met(args.task_id, all_tasks)

        if deps_met:
            print(f"✅ All dependencies met for {args.task_id}")
        else:
            print(f"❌ Unmet dependencies for {args.task_id}:")
            for dep in unmet:
                print(f"  - {dep}")

        sys.exit(0 if deps_met else 1)

    elif args.command == 'unblock':
        unblocked = update_blocked_tasks()
        if unblocked:
            print(f"✅ Unblocked {len(unblocked)} tasks:")
            for task_id in unblocked:
                print(f"  - {task_id}")
        else:
            print("No tasks to unblock")

    elif args.command == 'order':
        all_tasks = load_all_tasks()

        # Check for cycles first
        cycles = detect_circular_dependencies(all_tasks)
        if cycles:
            print("⚠️  Cannot determine order - circular dependencies detected!")
            for cycle in cycles:
                print(f"  Cycle: {' → '.join(cycle)}")
            sys.exit(1)

        order = get_execution_order(all_tasks)
        print("\n📋 Execution Order (respecting dependencies):\n")
        for i, task_id in enumerate(order, 1):
            task = all_tasks.get(task_id, {})
            status = task.get('status', 'pending')
            agent = task.get('agent', '?')
            print(f"  {i}. [{agent}] {task_id} ({status})")

    elif args.command == 'cycles':
        all_tasks = load_all_tasks()
        cycles = detect_circular_dependencies(all_tasks)

        if cycles:
            print("⚠️  Circular dependencies detected:\n")
            for cycle in cycles:
                print(f"  {' → '.join(cycle)}")
        else:
            print("✅ No circular dependencies detected")

    else:
        parser.print_help()


if __name__ == "__main__":
    main()