diff --git a/.gitignore b/.gitignore index 06cef22..5ff705b 100644 --- a/.gitignore +++ b/.gitignore @@ -44,3 +44,4 @@ dump.rdb tmp/ temp/ *.tmp +community-ade-wt/ diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..777b771 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,57 @@ +# Community ADE (Agentic Development Environment) + +A community-driven, open-source agentic development environment built on Letta's stateful agent architecture. + +## Vision + +Build an open-source ADE that combines: +- **Stateful agents** with hierarchical memory (Letta's unique strength) +- **Git-native persistence** with MemFS versioning +- **Persistent task queues** for durable subagent execution +- **Web dashboard** for real-time monitoring and control +- **Computer Use** integration for browser automation + +## Differentiation + +Unlike commercial alternatives (Warp, Intent), Community ADE is: +- **Open source** and self-hostable +- **Stateful by design** - agents remember across sessions +- **Model agnostic** - use any OpenAI-compatible API +- **Git-native** - version control for agent memory + +## Project Structure + +``` +├── src/ # Queue implementation and worker pool +├── tests/ # Test suite +├── docs/ # Architecture and design documents +├── proto/ # Prototypes and experiments +└── README.md # This file +``` + +## Documentation + +- [Project State](docs/community-ade-project-state.md) - Current status and active subagents +- [Phase 1 Design](docs/ade-phase1-orchestration-design.md) - Task queue architecture +- [Redis Queue Design](docs/ade-redis-queue-design.md) - Detailed Redis implementation spec +- [Research Synthesis](docs/community-ade-research-synthesis-2026-03-18.md) - Competitive analysis + +## Phase 1: Orchestration Layer (In Progress) + +Goals: +1. ✅ Research and design complete +2. 🔄 Redis task queue implementation +3. ⏳ Worker pool with heartbeat +4. ⏳ Integration with Letta Task tool + +## Quick Start + +Coming soon - queue prototype implementation. + +## License + +MIT - Community contribution welcome. + +--- + +*Project orchestrated by Ani, with research and design by specialized subagents.* diff --git a/docs/TASK_SPEC.md b/docs/TASK_SPEC.md new file mode 100644 index 0000000..14ab0c7 --- /dev/null +++ b/docs/TASK_SPEC.md @@ -0,0 +1,61 @@ +# Task Spec: Architect-Omega +**Agent:** Architect-Omega +**Model:** Kimi-K2.5 +**Mission:** Design approval system with clean apply locks + +## Background +The Community ADE has task execution but lacks governance. Workers pull jobs and execute immediately. We need approval gates, locking, and human review. + +## Requirements + +### 1. Clean Apply Locks +- Distributed locking via Redis (we have Redis from Alpha) +- Lock per task, per resource, per agent +- Auto-expiry with heartbeats (30s default) +- Deadlock detection and resolution +- Lock queue (ordered acquisition) + +### 2. Approval Lifecycle +``` +DRAFT → SUBMITTED → REVIEWING → APPROVED → APPLYING → COMPLETED + ↓ + REJECTED +``` +- SUBMIT: Validation runs, preview generated, no side effects +- APPLY: Actual execution after approval +- Rollback: Stash changes between SUBMIT and APPLY + +### 3. Human Gates +- Review queue in dashboard +- Batch approve/reject +- Approval delegation ("if X approves, auto-approve for me") +- Required reviewers based on task type + +### 4. Technical Design +Design these components: +- Redis key schemas (lock:*, approval:*, task:*) +- Express routes (POST /tasks/:id/submit, POST /approvals/:id/approve, etc.) +- Zod schemas for all inputs +- WebSocket events (approval:requested, approval:approved, lock:acquired) +- Database models (if needed beyond Redis) + +### 5. Integration +- Uses Alpha's Redis +- Uses Beta's Express patterns +- Gamma workers check locks before execution +- Delta-V2 dashboard shows approval queue + +## Deliverables +Create in this worktree: +- `design.md` - Full architecture specification +- `api-spec.ts` - Express routes + Zod schemas (TypeScript) +- `redis-schema.md` - All Redis key patterns +- `ui-components.md` - Dashboard UI descriptions + +## Success Criteria +- Design handles concurrent task execution safely +- Human can review before destructive operations +- System degrades gracefully (locks expire, approvals timeout) +- All edge cases documented + +**Begin immediately. You are the master here.** diff --git a/docs/api-spec.ts b/docs/api-spec.ts new file mode 100644 index 0000000..d6134f7 --- /dev/null +++ b/docs/api-spec.ts @@ -0,0 +1,1226 @@ +/** + * Community ADE Approval System - API Specification + * Express routes with Zod validation schemas + * + * @module approval-system/api + * @version 1.0.0 + */ + +import { Router, Request, Response, NextFunction } from 'express'; +import { z } from 'zod'; + +// ============================================================================ +// BASE SCHEMAS +// ============================================================================ + +/** + * Common identifiers + */ +const IdSchema = z.string().uuid(); +const TimestampSchema = z.string().datetime(); +const ResourceTypeSchema = z.enum([ + 'database', + 'service', + 'infrastructure', + 'configuration', + 'secret', + 'network', + 'storage' +]); + +/** + * Task state enumeration + */ +const TaskStateSchema = z.enum([ + 'DRAFT', + 'SUBMITTED', + 'REVIEWING', + 'APPROVED', + 'APPLYING', + 'COMPLETED', + 'REJECTED', + 'CANCELLED' +]); + +/** + * Lock mode enumeration + */ +const LockModeSchema = z.enum(['exclusive', 'shared']); + +/** + * Approval action enumeration + */ +const ApprovalActionSchema = z.enum(['approve', 'reject', 'request_changes', 'delegate']); + +/** + * Pagination parameters + */ +const PaginationSchema = z.object({ + page: z.coerce.number().int().min(1).default(1), + limit: z.coerce.number().int().min(1).max(100).default(20), + cursor: z.string().optional() +}); + +/** + * Sort parameters + */ +const SortSchema = z.object({ + sort_by: z.enum(['created_at', 'updated_at', 'risk_score', 'state']).default('created_at'), + sort_order: z.enum(['asc', 'desc']).default('desc') +}); + +// ============================================================================ +// TASK SCHEMAS +// ============================================================================ + +/** + * Resource reference schema + */ +const ResourceRefSchema = z.object({ + type: ResourceTypeSchema, + id: z.string(), + name: z.string().optional(), + scope: z.enum(['global', 'namespace', 'cluster', 'instance']).default('namespace'), + namespace: z.string().optional(), + actions: z.array(z.enum(['read', 'write', 'delete', 'execute'])).default(['read']) +}); + +/** + * Task configuration schema + */ +const TaskConfigSchema = z.object({ + type: z.string().min(1).max(100), + version: z.string().default('1.0.0'), + description: z.string().min(1).max(5000), + resources: z.array(ResourceRefSchema).min(1), + parameters: z.record(z.unknown()).default({}), + secrets: z.array(z.string()).default([]), // Secret references, not values + rollback_strategy: z.enum(['automatic', 'manual', 'none']).default('automatic'), + timeout_seconds: z.number().int().min(1).max(3600).default(300), + priority: z.number().int().min(0).max(100).default(50) +}); + +/** + * Risk assessment schema + */ +const RiskAssessmentSchema = z.object({ + score: z.number().int().min(0).max(100), + level: z.enum(['LOW', 'MEDIUM', 'HIGH', 'CRITICAL']), + factors: z.array(z.object({ + name: z.string(), + weight: z.number(), + contribution: z.number() + })), + auto_approvable: z.boolean() +}); + +/** + * Preview result schema + */ +const PreviewResultSchema = z.object({ + valid: z.boolean(), + changes: z.array(z.object({ + resource: ResourceRefSchema, + action: z.string(), + before: z.unknown().optional(), + after: z.unknown().optional(), + diff: z.string().optional() + })), + warnings: z.array(z.string()).default([]), + errors: z.array(z.string()).default([]), + estimated_duration_seconds: z.number().int().optional(), + affected_services: z.array(z.string()).default([]) +}); + +/** + * Create task request schema + */ +const CreateTaskRequestSchema = z.object({ + config: TaskConfigSchema, + metadata: z.object({ + author_id: z.string(), + author_name: z.string(), + team: z.string().optional(), + ticket_ref: z.string().optional(), + tags: z.array(z.string()).default([]) + }), + dry_run: z.boolean().default(false) +}); + +/** + * Submit task request schema + */ +const SubmitTaskRequestSchema = z.object({ + force: z.boolean().default(false), + skip_preview: z.boolean().default(false), + requested_reviewers: z.array(z.string()).optional() +}); + +/** + * Task response schema + */ +const TaskResponseSchema = z.object({ + id: IdSchema, + state: TaskStateSchema, + config: TaskConfigSchema, + metadata: z.object({ + author_id: z.string(), + author_name: z.string(), + team: z.string().optional(), + ticket_ref: z.string().optional(), + tags: z.array(z.string()), + created_at: TimestampSchema, + updated_at: TimestampSchema, + submitted_at: TimestampSchema.optional(), + approved_at: TimestampSchema.optional(), + completed_at: TimestampSchema.optional() + }), + risk: RiskAssessmentSchema.optional(), + preview: PreviewResultSchema.optional(), + approvals: z.array(z.object({ + id: IdSchema, + reviewer_id: z.string(), + reviewer_name: z.string(), + action: ApprovalActionSchema, + reason: z.string().optional(), + created_at: TimestampSchema + })).default([]), + required_approvals: z.number().int().min(0).default(1), + current_approvals: z.number().int().min(0).default(0), + lock_info: z.object({ + acquired_at: TimestampSchema, + expires_at: TimestampSchema, + agent_id: z.string() + }).optional(), + execution: z.object({ + started_at: TimestampSchema.optional(), + completed_at: TimestampSchema.optional(), + result: z.enum(['success', 'failure', 'timeout', 'cancelled']).optional(), + output: z.string().optional(), + error: z.string().optional() + }).optional() +}); + +/** + * List tasks query schema + */ +const ListTasksQuerySchema = PaginationSchema.merge(SortSchema).merge(z.object({ + state: z.array(TaskStateSchema).optional(), + author_id: z.string().optional(), + resource_type: ResourceTypeSchema.optional(), + resource_id: z.string().optional(), + risk_level: z.enum(['LOW', 'MEDIUM', 'HIGH', 'CRITICAL']).optional(), + created_after: TimestampSchema.optional(), + created_before: TimestampSchema.optional(), + tags: z.array(z.string()).optional(), + needs_my_approval: z.coerce.boolean().optional() +})); + +// ============================================================================ +// APPROVAL SCHEMAS +// ============================================================================ + +/** + * Approval request schema (internal) + */ +const ApprovalRequestSchema = z.object({ + id: IdSchema, + task_id: IdSchema, + reviewer_id: z.string(), + reviewer_name: z.string(), + status: z.enum(['PENDING', 'APPROVED', 'REJECTED', 'DELEGATED']), + priority: z.enum(['LOW', 'NORMAL', 'HIGH', 'URGENT']).default('NORMAL'), + delegated_to: z.string().optional(), + due_at: TimestampSchema.optional(), + created_at: TimestampSchema, + responded_at: TimestampSchema.optional() +}); + +/** + * Approve/reject request schema + */ +const RespondApprovalRequestSchema = z.object({ + action: ApprovalActionSchema, + reason: z.string().max(2000).optional(), + delegate_to: z.string().optional(), + options: z.object({ + apply_immediately: z.boolean().default(false), + require_additional_approvals: z.array(z.string()).optional() + }).default({}) +}); + +/** + * Batch approval request schema + */ +const BatchApprovalRequestSchema = z.object({ + approval_ids: z.array(IdSchema).min(1).max(100), + action: z.enum(['approve', 'reject']), + reason: z.string().max(2000).optional(), + options: z.object({ + skip_validation: z.boolean().default(false), + apply_immediately: z.boolean().default(false), + continue_on_error: z.boolean().default(false) + }).default({}) +}); + +/** + * Batch approval response schema + */ +const BatchApprovalResponseSchema = z.object({ + success: z.boolean(), + processed: z.number().int(), + succeeded: z.number().int(), + failed: z.number().int(), + results: z.array(z.object({ + approval_id: IdSchema, + success: z.boolean(), + error: z.string().optional() + })), + task_updates: z.array(z.object({ + task_id: IdSchema, + new_state: TaskStateSchema.optional() + })) +}); + +/** + * Delegation policy schema + */ +const DelegationPolicySchema = z.object({ + id: IdSchema.optional(), + owner_id: z.string(), + conditions: z.object({ + task_types: z.array(z.string()).optional(), + resource_patterns: z.array(z.string()).optional(), + risk_above: z.number().int().min(0).max(100).optional(), + namespaces: z.array(z.string()).optional(), + tags: z.array(z.string()).optional() + }), + delegate_to: z.string(), + cascade: z.boolean().default(true), + expires_at: TimestampSchema.optional(), + active: z.boolean().default(true) +}); + +// ============================================================================ +// LOCK SCHEMAS +// ============================================================================ + +/** + * Lock acquisition request schema + */ +const AcquireLockRequestSchema = z.object({ + resource_type: z.enum(['task', 'resource', 'agent']), + resource_id: z.string(), + mode: LockModeSchema.default('exclusive'), + ttl_seconds: z.number().int().min(5).max(300).default(30), + purpose: z.string().max(200).optional(), + wait_for_available: z.boolean().default(true), + max_wait_seconds: z.number().int().min(0).max(300).default(60) +}); + +/** + * Lock acquisition response schema + */ +const LockResponseSchema = z.object({ + id: IdSchema, + acquired: z.boolean(), + resource_type: z.enum(['task', 'resource', 'agent']), + resource_id: z.string(), + mode: LockModeSchema, + holder: z.object({ + agent_id: z.string(), + acquired_at: TimestampSchema, + expires_at: TimestampSchema, + purpose: z.string().optional() + }), + queue_position: z.number().int().optional(), + estimated_wait_seconds: z.number().int().optional() +}); + +/** + * Lock heartbeat request schema + */ +const LockHeartbeatRequestSchema = z.object({ + lock_id: IdSchema, + ttl_extension_seconds: z.number().int().min(5).max(300).default(30) +}); + +/** + * Lock release request schema + */ +const ReleaseLockRequestSchema = z.object({ + lock_id: IdSchema, + force: z.boolean().default(false), + reason: z.string().optional() +}); + +/** + * Lock info schema + */ +const LockInfoSchema = z.object({ + id: IdSchema, + resource_type: z.enum(['task', 'resource', 'agent']), + resource_id: z.string(), + mode: LockModeSchema, + holder: z.object({ + agent_id: z.string(), + acquired_at: TimestampSchema, + expires_at: TimestampSchema, + purpose: z.string().optional() + }), + queue: z.array(z.object({ + agent_id: z.string(), + mode: LockModeSchema, + requested_at: TimestampSchema, + priority: z.number().int() + })) +}); + +/** + * Deadlock info schema + */ +const DeadlockInfoSchema = z.object({ + detected_at: TimestampSchema, + cycle: z.array(z.object({ + agent_id: z.string(), + holds_lock: IdSchema, + waits_for: IdSchema + })), + resolution: z.object({ + victim_agent_id: z.string(), + strategy: z.enum(['abort_youngest', 'abort_shortest', 'abort_lowest_priority']), + released_locks: z.array(IdSchema) + }) +}); + +// ============================================================================ +// WEBSOCKET EVENT SCHEMAS +// ============================================================================ + +/** + * Base WebSocket message schema + */ +const WebSocketMessageSchema = z.object({ + event: z.string(), + timestamp: TimestampSchema, + payload: z.unknown() +}); + +/** + * Lock acquired event + */ +const LockAcquiredEventSchema = z.object({ + event: z.literal('lock:acquired'), + timestamp: TimestampSchema, + payload: z.object({ + lock_id: IdSchema, + resource_type: z.string(), + resource_id: z.string(), + agent_id: z.string(), + acquired_at: TimestampSchema, + expires_at: TimestampSchema + }) +}); + +/** + * Lock released event + */ +const LockReleasedEventSchema = z.object({ + event: z.literal('lock:released'), + timestamp: TimestampSchema, + payload: z.object({ + lock_id: IdSchema, + resource_type: z.string(), + resource_id: z.string(), + agent_id: z.string(), + released_at: TimestampSchema, + reason: z.string().optional() + }) +}); + +/** + * Lock expired event + */ +const LockExpiredEventSchema = z.object({ + event: z.literal('lock:expired'), + timestamp: TimestampSchema, + payload: z.object({ + lock_id: IdSchema, + resource_type: z.string(), + resource_id: z.string(), + expired_at: TimestampSchema + }) +}); + +/** + * Deadlock detected event + */ +const DeadlockDetectedEventSchema = z.object({ + event: z.literal('lock:deadlock_detected'), + timestamp: TimestampSchema, + payload: DeadlockInfoSchema +}); + +/** + * Approval requested event + */ +const ApprovalRequestedEventSchema = z.object({ + event: z.literal('approval:requested'), + timestamp: TimestampSchema, + payload: z.object({ + approval_id: IdSchema, + task_id: IdSchema, + task_type: z.string(), + reviewer_id: z.string(), + requested_by: z.string(), + priority: z.enum(['LOW', 'NORMAL', 'HIGH', 'URGENT']), + due_at: TimestampSchema.optional(), + risk_score: z.number().int() + }) +}); + +/** + * Approval responded event + */ +const ApprovalRespondedEventSchema = z.object({ + event: z.literal('approval:responded'), + timestamp: TimestampSchema, + payload: z.object({ + approval_id: IdSchema, + task_id: IdSchema, + reviewer_id: z.string(), + action: ApprovalActionSchema, + reason: z.string().optional() + }) +}); + +/** + * Task state changed event + */ +const TaskStateChangedEventSchema = z.object({ + event: z.literal('task:state_changed'), + timestamp: TimestampSchema, + payload: z.object({ + task_id: IdSchema, + previous_state: TaskStateSchema, + new_state: TaskStateSchema, + triggered_by: z.string(), + reason: z.string().optional() + }) +}); + +/** + * Task execution completed event + */ +const TaskCompletedEventSchema = z.object({ + event: z.literal('task:completed'), + timestamp: TimestampSchema, + payload: z.object({ + task_id: IdSchema, + result: z.enum(['success', 'failure', 'timeout', 'cancelled']), + duration_seconds: z.number(), + output: z.string().optional(), + error: z.string().optional() + }) +}); + +// ============================================================================ +// ERROR SCHEMAS +// ============================================================================ + +/** + * API error response schema + */ +const ApiErrorSchema = z.object({ + error: z.object({ + code: z.string(), + message: z.string(), + details: z.unknown().optional(), + request_id: z.string().uuid(), + timestamp: TimestampSchema + }) +}); + +/** + * Validation error schema + */ +const ValidationErrorSchema = ApiErrorSchema.extend({ + error: z.object({ + code: z.literal('VALIDATION_ERROR'), + message: z.string(), + details: z.object({ + field: z.string(), + issue: z.string(), + value: z.unknown().optional() + }), + request_id: z.string().uuid(), + timestamp: TimestampSchema + }) +}); + +// ============================================================================ +// ROUTE HANDLERS (Type definitions) +// ============================================================================ + +/** + * Typed request/response helpers + */ +type CreateTaskRequest = z.infer; +type CreateTaskResponse = z.infer; +type ListTasksQuery = z.infer; +type SubmitTaskRequest = z.infer; +type RespondApprovalRequest = z.infer; +type BatchApprovalRequest = z.infer; +type BatchApprovalResponse = z.infer; +type AcquireLockRequest = z.infer; +type LockResponse = z.infer; +type LockHeartbeatRequest = z.infer; +type ReleaseLockRequest = z.infer; + +// ============================================================================ +// EXPRESS ROUTES +// ============================================================================ + +const router = Router(); + +// Middleware: Validate request body against schema +const validateBody = (schema: z.ZodSchema) => { + return (req: Request, res: Response, next: NextFunction) => { + const result = schema.safeParse(req.body); + if (!result.success) { + return res.status(400).json({ + error: { + code: 'VALIDATION_ERROR', + message: 'Request body validation failed', + details: result.error.format(), + request_id: req.headers['x-request-id'] || crypto.randomUUID(), + timestamp: new Date().toISOString() + } + }); + } + req.body = result.data; + next(); + }; +}; + +// Middleware: Validate query parameters +const validateQuery = (schema: z.ZodSchema) => { + return (req: Request, res: Response, next: NextFunction) => { + const result = schema.safeParse(req.query); + if (!result.success) { + return res.status(400).json({ + error: { + code: 'VALIDATION_ERROR', + message: 'Query parameter validation failed', + details: result.error.format(), + request_id: req.headers['x-request-id'] || crypto.randomUUID(), + timestamp: new Date().toISOString() + } + }); + } + req.query = result.data as unknown as Request['query']; + next(); + }; +}; + +// Middleware: Validate URL parameters +const validateParams = (schema: z.ZodSchema) => { + return (req: Request, res: Response, next: NextFunction) => { + const result = schema.safeParse(req.params); + if (!result.success) { + return res.status(400).json({ + error: { + code: 'VALIDATION_ERROR', + message: 'URL parameter validation failed', + details: result.error.format(), + request_id: req.headers['x-request-id'] || crypto.randomUUID(), + timestamp: new Date().toISOString() + } + }); + } + req.params = result.data as unknown as Request['params']; + next(); + }; +}; + +// ============================================================================ +// TASK ROUTES +// ============================================================================ + +/** + * @route POST /api/v1/tasks + * @desc Create a new task + * @access Authenticated + * + * Request body: CreateTaskRequestSchema + * Response: 201 Created with TaskResponseSchema + */ +router.post( + '/tasks', + validateBody(CreateTaskRequestSchema), + async (req: Request, res: Response) => { + // Implementation: Create task in DRAFT state + res.status(201).json({ + id: crypto.randomUUID(), + state: 'DRAFT', + config: req.body.config, + metadata: { + ...req.body.metadata, + created_at: new Date().toISOString(), + updated_at: new Date().toISOString(), + tags: req.body.metadata.tags || [] + }, + approvals: [], + required_approvals: 1, + current_approvals: 0 + }); + } +); + +/** + * @route GET /api/v1/tasks + * @desc List tasks with filtering and pagination + * @access Authenticated + * + * Query params: ListTasksQuerySchema + * Response: 200 OK with { tasks: TaskResponseSchema[], pagination: {...} } + */ +router.get( + '/tasks', + validateQuery(ListTasksQuerySchema), + async (req: Request, res: Response) => { + // Implementation: Query tasks from database + res.json({ + tasks: [], + pagination: { + page: req.query.page, + limit: req.query.limit, + total: 0, + has_more: false + } + }); + } +); + +/** + * @route GET /api/v1/tasks/:id + * @desc Get task by ID + * @access Authenticated + * + * URL params: { id: uuid } + * Response: 200 OK with TaskResponseSchema + */ +router.get( + '/tasks/:id', + validateParams(z.object({ id: IdSchema })), + async (req: Request, res: Response) => { + // Implementation: Fetch task from database + res.json({ + id: req.params.id, + state: 'DRAFT', + config: {} as any, + metadata: {} as any, + approvals: [], + required_approvals: 1, + current_approvals: 0 + }); + } +); + +/** + * @route POST /api/v1/tasks/:id/submit + * @desc Submit task for approval + * @access Authenticated (task author or admin) + * + * URL params: { id: uuid } + * Request body: SubmitTaskRequestSchema + * Response: 202 Accepted with TaskResponseSchema + */ +router.post( + '/tasks/:id/submit', + validateParams(z.object({ id: IdSchema })), + validateBody(SubmitTaskRequestSchema), + async (req: Request, res: Response) => { + // Implementation: + // 1. Validate task is in DRAFT state + // 2. Run preview generation + // 3. Calculate risk score + // 4. Determine required approvals + // 5. Create approval requests + // 6. Transition to SUBMITTED/REVIEWING + // 7. Emit approval:requested events + res.status(202).json({ + id: req.params.id, + state: 'REVIEWING', + config: {} as any, + metadata: { + author_id: '', + author_name: '', + created_at: new Date().toISOString(), + updated_at: new Date().toISOString(), + submitted_at: new Date().toISOString(), + tags: [] + }, + risk: { + score: 45, + level: 'MEDIUM', + factors: [], + auto_approvable: false + }, + approvals: [], + required_approvals: 2, + current_approvals: 0 + }); + } +); + +/** + * @route POST /api/v1/tasks/:id/cancel + * @desc Cancel a task + * @access Authenticated (task author or admin) + * + * URL params: { id: uuid } + * Response: 200 OK with TaskResponseSchema + */ +router.post( + '/tasks/:id/cancel', + validateParams(z.object({ id: IdSchema })), + async (req: Request, res: Response) => { + // Implementation: + // 1. Validate task can be cancelled (not APPLYING or COMPLETED) + // 2. Release any held locks + // 3. Transition to CANCELLED state + // 4. Notify waiters + res.json({ + id: req.params.id, + state: 'CANCELLED', + config: {} as any, + metadata: {} as any, + approvals: [], + required_approvals: 0, + current_approvals: 0 + }); + } +); + +/** + * @route GET /api/v1/tasks/:id/preview + * @desc Get task preview/changes + * @access Authenticated + * + * URL params: { id: uuid } + * Response: 200 OK with PreviewResultSchema + */ +router.get( + '/tasks/:id/preview', + validateParams(z.object({ id: IdSchema })), + async (req: Request, res: Response) => { + res.json({ + valid: true, + changes: [], + warnings: [], + errors: [], + affected_services: [] + }); + } +); + +// ============================================================================ +// APPROVAL ROUTES +// ============================================================================ + +/** + * @route GET /api/v1/approvals + * @desc List pending approvals for current user + * @access Authenticated + * + * Query params: PaginationSchema + { status: string, task_id: uuid } + * Response: 200 OK with { approvals: ApprovalRequestSchema[], pagination: {...} } + */ +router.get( + '/approvals', + validateQuery(PaginationSchema.merge(z.object({ + status: z.enum(['PENDING', 'APPROVED', 'REJECTED', 'DELEGATED']).optional(), + task_id: IdSchema.optional() + }))), + async (req: Request, res: Response) => { + res.json({ + approvals: [], + pagination: { + page: req.query.page, + limit: req.query.limit, + total: 0, + has_more: false + } + }); + } +); + +/** + * @route POST /api/v1/approvals/:id/respond + * @desc Respond to an approval request + * @access Authenticated (assigned reviewer) + * + * URL params: { id: uuid } + * Request body: RespondApprovalRequestSchema + * Response: 200 OK with { success: boolean, task_state: string } + */ +router.post( + '/approvals/:id/respond', + validateParams(z.object({ id: IdSchema })), + validateBody(RespondApprovalRequestSchema), + async (req: Request, res: Response) => { + // Implementation: + // 1. Validate approval is PENDING + // 2. Record response + // 3. Check if quorum reached + // 4. Transition task state if needed + // 5. Emit approval:responded and task:state_changed events + res.json({ + success: true, + approval_id: req.params.id, + task_id: crypto.randomUUID(), + task_state: req.body.action === 'approve' ? 'APPROVED' : 'REJECTED' + }); + } +); + +/** + * @route POST /api/v1/approvals/batch + * @desc Batch approve/reject multiple approvals + * @access Authenticated + * + * Request body: BatchApprovalRequestSchema + * Response: 200 OK with BatchApprovalResponseSchema + */ +router.post( + '/approvals/batch', + validateBody(BatchApprovalRequestSchema), + async (req: Request, res: Response) => { + // Implementation: + // 1. Validate all approvals exist and are pending + // 2. Process each approval atomically + // 3. Rollback on error unless continue_on_error + // 4. Check task state transitions + res.json({ + success: true, + processed: req.body.approval_ids.length, + succeeded: req.body.approval_ids.length, + failed: 0, + results: req.body.approval_ids.map(id => ({ + approval_id: id, + success: true + })), + task_updates: [] + }); + } +); + +/** + * @route GET /api/v1/approvals/policies + * @desc List delegation policies for current user + * @access Authenticated + */ +router.get('/approvals/policies', async (req: Request, res: Response) => { + res.json({ policies: [] }); +}); + +/** + * @route POST /api/v1/approvals/policies + * @desc Create a delegation policy + * @access Authenticated + * + * Request body: DelegationPolicySchema + * Response: 201 Created with DelegationPolicySchema + */ +router.post( + '/approvals/policies', + validateBody(DelegationPolicySchema), + async (req: Request, res: Response) => { + res.status(201).json({ + id: crypto.randomUUID(), + ...req.body + }); + } +); + +// ============================================================================ +// LOCK ROUTES +// ============================================================================ + +/** + * @route POST /api/v1/locks/acquire + * @desc Acquire a distributed lock + * @access Service (agents/workers) + * + * Request body: AcquireLockRequestSchema + * Response: + * 201 Created with LockResponseSchema (acquired) + * 202 Accepted with queue info (waiting) + * 423 Locked (max wait exceeded, not waiting) + */ +router.post( + '/locks/acquire', + validateBody(AcquireLockRequestSchema), + async (req: Request, res: Response) => { + // Implementation: + // 1. Check if lock available + // 2. If available: acquire, set TTL, return 201 + // 3. If not available and wait_for_available: queue, return 202 + // 4. If not available and not waiting: return 423 + const acquired = Math.random() > 0.5; // Placeholder + + if (acquired) { + res.status(201).json({ + id: crypto.randomUUID(), + acquired: true, + resource_type: req.body.resource_type, + resource_id: req.body.resource_id, + mode: req.body.mode, + holder: { + agent_id: 'agent-001', + acquired_at: new Date().toISOString(), + expires_at: new Date(Date.now() + req.body.ttl_seconds * 1000).toISOString(), + purpose: req.body.purpose + } + }); + } else if (req.body.wait_for_available) { + res.status(202).json({ + id: crypto.randomUUID(), + acquired: false, + resource_type: req.body.resource_type, + resource_id: req.body.resource_id, + mode: req.body.mode, + holder: {} as any, + queue_position: 1, + estimated_wait_seconds: 30 + }); + } else { + res.status(423).json({ + error: { + code: 'RESOURCE_LOCKED', + message: 'Resource is locked by another agent', + request_id: crypto.randomUUID(), + timestamp: new Date().toISOString() + } + }); + } + } +); + +/** + * @route POST /api/v1/locks/heartbeat + * @desc Extend lock TTL via heartbeat + * @access Service (lock holder) + * + * Request body: LockHeartbeatRequestSchema + * Response: 200 OK with updated LockResponseSchema + * 404 Not Found (lock expired) + * 403 Forbidden (not lock holder) + */ +router.post( + '/locks/heartbeat', + validateBody(LockHeartbeatRequestSchema), + async (req: Request, res: Response) => { + // Implementation: Extend lock TTL + res.json({ + id: req.body.lock_id, + acquired: true, + resource_type: 'task', + resource_id: 'task-001', + mode: 'exclusive', + holder: { + agent_id: 'agent-001', + acquired_at: new Date().toISOString(), + expires_at: new Date(Date.now() + req.body.ttl_extension_seconds * 1000).toISOString(), + purpose: 'Task execution' + } + }); + } +); + +/** + * @route POST /api/v1/locks/release + * @desc Release a held lock + * @access Service (lock holder or admin) + * + * Request body: ReleaseLockRequestSchema + * Response: 204 No Content + */ +router.post( + '/locks/release', + validateBody(ReleaseLockRequestSchema), + async (req: Request, res: Response) => { + // Implementation: + // 1. Verify lock exists + // 2. Verify holder matches (or force=true with admin) + // 3. Release lock + // 4. Notify next waiter in queue + // 5. Emit lock:released event + res.status(204).send(); + } +); + +/** + * @route GET /api/v1/locks + * @desc List active locks + * @access Admin + * + * Query params: { resource_type, resource_id, agent_id } + * Response: 200 OK with { locks: LockInfoSchema[] } + */ +router.get( + '/locks', + validateQuery(z.object({ + resource_type: z.enum(['task', 'resource', 'agent']).optional(), + resource_id: z.string().optional(), + agent_id: z.string().optional() + })), + async (req: Request, res: Response) => { + res.json({ locks: [] }); + } +); + +/** + * @route GET /api/v1/locks/:id + * @desc Get lock info by ID + * @access Admin + */ +router.get( + '/locks/:id', + validateParams(z.object({ id: IdSchema })), + async (req: Request, res: Response) => { + res.json({ + id: req.params.id, + resource_type: 'task', + resource_id: 'task-001', + mode: 'exclusive', + holder: { + agent_id: 'agent-001', + acquired_at: new Date().toISOString(), + expires_at: new Date(Date.now() + 30000).toISOString() + }, + queue: [] + }); + } +); + +/** + * @route GET /api/v1/locks/deadlocks + * @desc Get current deadlock information + * @access Admin + * + * Response: 200 OK with { deadlocks: DeadlockInfoSchema[] } + */ +router.get('/locks/deadlocks', async (req: Request, res: Response) => { + res.json({ deadlocks: [] }); +}); + +// ============================================================================ +// WEBSOCKET HANDLER (Type definitions for socket.io or ws) +// ============================================================================ + +interface WebSocketHandler { + /** + * Subscribe client to events for specific resources + */ + subscribe(clientId: string, channels: string[]): void; + + /** + * Unsubscribe client from channels + */ + unsubscribe(clientId: string, channels: string[]): void; + + /** + * Broadcast event to all subscribers of a channel + */ + broadcast(channel: string, event: z.infer): void; + + /** + * Send event to specific client + */ + emit(clientId: string, event: z.infer): void; +} + +/** + * WebSocket event channels + */ +const WebSocketChannels = { + // Task-specific events + task: (taskId: string) => `task:${taskId}`, + + // User-specific events + user: (userId: string) => `user:${userId}`, + + // Agent-specific events + agent: (agentId: string) => `agent:${agentId}`, + + // Resource-specific events + resource: (type: string, id: string) => `resource:${type}:${id}`, + + // System-wide events (admin only) + system: 'system', + + // Lock events + locks: 'locks' +} as const; + +// ============================================================================ +// EXPORTS +// ============================================================================ + +export { + // Router + router as approvalRouter, + + // Schemas (for use in other modules) + IdSchema, + TaskStateSchema, + LockModeSchema, + ApprovalActionSchema, + ResourceRefSchema, + TaskConfigSchema, + RiskAssessmentSchema, + PreviewResultSchema, + CreateTaskRequestSchema, + SubmitTaskRequestSchema, + TaskResponseSchema, + ListTasksQuerySchema, + ApprovalRequestSchema, + RespondApprovalRequestSchema, + BatchApprovalRequestSchema, + BatchApprovalResponseSchema, + DelegationPolicySchema, + AcquireLockRequestSchema, + LockResponseSchema, + LockHeartbeatRequestSchema, + ReleaseLockRequestSchema, + LockInfoSchema, + DeadlockInfoSchema, + WebSocketMessageSchema, + LockAcquiredEventSchema, + LockReleasedEventSchema, + LockExpiredEventSchema, + DeadlockDetectedEventSchema, + ApprovalRequestedEventSchema, + ApprovalRespondedEventSchema, + TaskStateChangedEventSchema, + TaskCompletedEventSchema, + ApiErrorSchema, + ValidationErrorSchema, + + // Types + type CreateTaskRequest, + type CreateTaskResponse, + type ListTasksQuery, + type SubmitTaskRequest, + type RespondApprovalRequest, + type BatchApprovalRequest, + type BatchApprovalResponse, + type AcquireLockRequest, + type LockResponse, + type LockHeartbeatRequest, + type ReleaseLockRequest, + type WebSocketHandler, + WebSocketChannels +}; diff --git a/docs/design.md b/docs/design.md new file mode 100644 index 0000000..91d9d7f --- /dev/null +++ b/docs/design.md @@ -0,0 +1,510 @@ +# Community ADE Approval System Architecture + +## Executive Summary + +The Approval System provides governance and safety controls for the Community ADE platform. It introduces human-in-the-loop validation for task execution, distributed locking for resource protection, and a complete audit trail for compliance. + +--- + +## Core Concepts + +### 1. Clean Apply Locks + +**Philosophy:** A lock should only grant permission to attempt an operation, not guarantee success. Locks are advisory but strictly enforced by the system. + +**Lock Hierarchy:** +``` +Task Lock (task:{id}:lock) - Single task execution +Resource Lock (resource:{type}:{id}:lock) - Shared resource protection +Agent Lock (agent:{id}:lock) - Agent capacity management +``` + +**Lock Properties:** +- **Ownership:** UUID of the lock holder +- **TTL:** 30 seconds default, extendable via heartbeats +- **Queue:** FIFO ordered waiting list for fairness +- **Metadata:** Timestamp, purpose, agent info + +### 2. Approval Lifecycle State Machine + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ APPROVAL LIFECYCLE │ +├─────────────────────────────────────────────────────────────────────────────┤ +│ │ +│ ┌──────┐ ┌──────────┐ ┌───────────┐ ┌─────────┐ ┌─────────┐ │ +│ │DRAFT │───→│ SUBMITTED│───→│ REVIEWING │───→│ APPROVED│───→│ APPLYING│ │ +│ └──────┘ └──────────┘ └───────────┘ └─────────┘ └────┬────┘ │ +│ │ │ │ │ │ │ +│ │ │ │ │ ▼ │ +│ │ │ │ │ ┌─────────┐ │ +│ │ │ │ │ │COMPLETED│ │ +│ │ │ │ │ └─────────┘ │ +│ │ │ │ │ │ +│ │ │ └──────────────┘ │ +│ │ │ │ │ +│ │ │ ▼ │ +│ │ │ ┌─────────┐ │ +│ │ │ │REJECTED │ │ +│ │ │ └─────────┘ │ +│ │ │ │ │ +│ │ └───────────────────────┘ │ +│ │ │ +│ ▼ │ +│ ┌─────────┐ │ +│ │ CANCELLED│ │ +│ └─────────┘ │ +│ │ +└─────────────────────────────────────────────────────────────────────────────┘ +``` + +**State Descriptions:** + +| State | Description | Permissions | +|-------|-------------|-------------| +| `DRAFT` | Task created but not submitted | Edit, Delete, Submit | +| `SUBMITTED` | Validation complete, awaiting review | None (locked) | +| `REVIEWING` | Under active review by approvers | Add comments | +| `APPROVED` | All required approvals received | Queue for apply | +| `APPLYING` | Lock acquired, executing changes | Read-only | +| `COMPLETED` | Changes successfully applied | Read-only, audit | +| `REJECTED` | Approval denied | Can resubmit as new | +| `CANCELLED` | Aborted before apply | Archive only | + +### 3. Human Gates + +**Review Policies:** +- **Auto-approve:** Tasks below risk threshold skip human review +- **Required reviewers:** Based on task type, resource scope, risk score +- **Delegation chains:** "If my manager approves, auto-approve for me" +- **Quorum rules:** N-of-M approvals required + +**Risk Scoring:** +```typescript +RiskScore = ( + resource_criticality * 0.4 + + change_magnitude * 0.3 + + blast_radius * 0.2 + + historical_failure_rate * 0.1 +) // 0-100 scale +``` + +--- + +## System Architecture + +### Component Diagram + +``` +┌──────────────────────────────────────────────────────────────────────────────┐ +│ CLIENT LAYER │ +├──────────────────────────────────────────────────────────────────────────────┤ +│ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ │ +│ │ Dashboard UI │ │ CLI Tool │ │ Webhook API │ │ +│ │ (Delta-V2) │ │ (Omega-CLI) │ │ (External) │ │ +│ └────────┬────────┘ └────────┬────────┘ └────────┬────────┘ │ +└───────────┼────────────────────┼────────────────────┼────────────────────────┘ + │ │ │ + ▼ ▼ ▼ +┌──────────────────────────────────────────────────────────────────────────────┐ +│ API GATEWAY LAYER │ +├──────────────────────────────────────────────────────────────────────────────┤ +│ ┌─────────────────────────────────────────────────────────────────────────┐ │ +│ │ Express API Server (Beta Patterns) │ │ +│ │ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ ┌─────────────┐ │ │ +│ │ │ Task Routes │ │ApprovalRoutes│ │ Lock Routes │ │ WebSocket │ │ │ +│ │ │ │ │ │ │ │ │ Handler │ │ │ +│ │ └──────────────┘ └──────────────┘ └──────────────┘ └─────────────┘ │ │ +│ └─────────────────────────────────────────────────────────────────────────┘ │ +└─────────────────────────────────┬────────────────────────────────────────────┘ + │ + ┌─────────────────────┼─────────────────────┐ + │ │ │ + ▼ ▼ ▼ +┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ +│ REDIS LAYER │ │ POSTGRESQL │ │ EVENT BUS │ +│ (Alpha) │ │ (Persistence) │ │ (WebSocket) │ +├─────────────────┤ ├─────────────────┤ ├─────────────────┤ +│ • Locks │ │ • Task History │ │ • approval:* │ +│ • Queues │ │ • Audit Log │ │ • lock:* │ +│ • Sessions │ │ • User Policies │ │ • task:* │ +│ • Rate Limits │ │ • Delegations │ │ │ +└────────┬────────┘ └─────────────────┘ └─────────────────┘ + │ + ▼ +┌──────────────────────────────────────────────────────────────────────────────┐ +│ WORKER LAYER (Gamma) │ +├──────────────────────────────────────────────────────────────────────────────┤ +│ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ │ +│ │ Lock Manager │ │ Task Executor │ │ Heartbeat Mon │ │ +│ │ │ │ │ │ │ │ +│ │ • Acquire locks │ │ • Check locks │ │ • Watchdog │ │ +│ │ • Queue waiters │ │ • Execute apply │ │ • Deadlock det │ │ +│ │ • Release/clean │ │ • Rollback on │ │ • Auto-recovery │ │ +│ │ │ │ failure │ │ │ │ +│ └─────────────────┘ └─────────────────┘ └─────────────────┘ │ +└──────────────────────────────────────────────────────────────────────────────┘ +``` + +### Data Flow: Submit → Approve → Apply + +``` +┌─────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ +│ USER │ │ SYSTEM │ │ SYSTEM │ │ WORKER │ +└────┬────┘ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ + │ │ │ │ + │ POST /tasks │ │ │ + │ {config} │ │ │ + ├────────────────→│ │ │ + │ │ │ │ + │ │──┐ │ │ + │ │ │ Validate │ │ + │ │ │ Calculate risk │ │ + │ │ │ Generate preview│ │ + │ │←─┘ │ │ + │ │ │ │ + │ 201 Created │ │ │ + │ task_id: xyz │ │ │ + │←────────────────│ │ │ + │ │ │ │ + │ POST /tasks/xyz │ │ │ + │ /submit │ │ │ + ├────────────────→│ │ │ + │ │ │ │ + │ │──┐ │ │ + │ │ │ State:SUBMITTED│ │ + │ │ │ Lock resources │ │ + │ │ │ (preview only) │ │ + │ │←─┘ │ │ + │ │ │ │ + │ 202 Accepted │ │ │ + │←────────────────│ │ │ + │ │ │ │ + │ │ approval:requested │ + │ ├──────────────────→│ │ + │ │ │ │ + │ │ │──┐ │ + │ │ │ │ Check policies │ + │ │ │ │ Notify reviewers│ + │ │ │←─┘ │ + │ │ │ │ + │ [Time passes] │ │ │ + │ │ │ │ + │ POST /approvals │ │ │ + │ /{id}/approve │ │ │ + ├────────────────→│ │ │ + │ │ │ │ + │ │ │──┐ │ + │ │ │ │ Record approval │ + │ │ │ │ Check quorum │ + │ │ │←─┘ │ + │ │ │ │ + │ 200 OK │ │ │ + │←────────────────│ │ │ + │ │ │ │ + │ │ │ approval:approved│ + │ │←──────────────────┤ │ + │ │ │ │ + │ │──┐ │ + │ │ │ State:APPROVED │ + │ │ │ Queue for apply │ + │ │←─┘ │ + │ │ │ │ + │ │ task:approved │ │ + │ ├──────────────────────────────────────→│ + │ │ │ │ + │ │ │ │──┐ + │ │ │ │ │ Acquire apply lock + │ │ │ │ │ State:APPLYING + │ │ │ │ │ Execute changes + │ │ │ │←─┘ + │ │ │ │ + │ │ lock:acquired │ │ + │ │←──────────────────────────────────────┤ + │ │ │ │ + │ │ │ │──┐ + │ │ │ │ │ Apply succeeded + │ │ │ │ │ State:COMPLETED + │ │ │ │ │ Release locks + │ │ │ │←─┘ + │ │ │ │ + │ │ task:completed │ │ + │ │←──────────────────────────────────────┤ + │ │ │ │ +``` + +--- + +## Lock System Deep Dive + +### Lock Types + +#### 1. Task Lock (Exclusive) +``` +Key: lock:task:{task_id} +Value: { holder_agent_id, acquired_at, expires_at, purpose } +TTL: 30s with automatic extension on heartbeat +``` + +Prevents concurrent execution of the same task. Released on completion or failure. + +#### 2. Resource Lock (Shared/Exclusive) +``` +Key: lock:resource:{resource_type}:{resource_id} +Value: { + mode: 'exclusive' | 'shared', + holders: [{ agent_id, acquired_at }], + queue: [{ agent_id, mode, requested_at }] +} +``` + +Allows multiple readers (shared) or single writer (exclusive). Queue ensures FIFO ordering. + +#### 3. Agent Capacity Lock +``` +Key: lock:agent:{agent_id}:capacity +Value: { active_tasks: number, max_tasks: number } +``` + +Prevents agent overload. Each agent has configurable concurrency limits. + +### Deadlock Detection + +**Algorithm:** Wait-For Graph + +``` +If Agent A holds Lock X and waits for Lock Y +And Agent B holds Lock Y and waits for Lock X +→ Deadlock detected +``` + +**Resolution:** +1. Abort youngest transaction (lower cost) +2. Release all held locks +3. Notify owner with `DEADLOCK_DETECTED` error +4. Auto-retry with exponential backoff + +### Lock Heartbeat Protocol + +```typescript +interface HeartbeatMessage { + lock_id: string; + agent_id: string; + timestamp: number; + ttl_extension: number; // seconds +} + +// Client must send heartbeat every 10s (configurable) +// Server extends TTL on receipt +// If no heartbeat for 30s, lock auto-expires +// Expired locks trigger cleanup and notify waiters +``` + +--- + +## Approval Engine + +### Reviewer Assignment + +```typescript +interface ReviewerPolicy { + task_types: string[]; + resource_patterns: string[]; + min_approvers: number; + required_roles: string[]; + risk_threshold: number; + auto_approve_if: { + risk_below: number; + author_has_role: string[]; + resources_in_scope: string[]; + }; +} +``` + +**Assignment Algorithm:** +1. Match task against all policies +2. Union all required reviewers from matching policies +3. Check for delegation chains +4. Filter out auto-approved reviewers (based on policy) +5. Calculate minimum approvals needed +6. Create approval requests + +### Delegation Chains + +``` +Alice delegates to Bob when: + - Task type is "infrastructure" + - Risk score > 50 + +Bob delegates to Carol when: + - Resource matches "prod-*" + +Result: For prod infrastructure with high risk, + only Carol's approval is needed +``` + +**Resolution:** Depth-first traversal with cycle detection. + +### Batch Operations + +**Batch Approve:** +```typescript +POST /approvals/batch +{ + approval_ids: string[]; + action: 'approve' | 'reject'; + reason?: string; + options: { + skip_validation: boolean; + apply_immediately: boolean; + } +} +``` + +Atomic operation: either all approvals succeed or all fail. + +--- + +## Error Handling & Edge Cases + +### Lock Acquisition Failures + +| Scenario | Response | Retry Strategy | +|----------|----------|----------------| +| Lock held by another agent | 423 Locked | Queue and wait | +| Lock expired during operation | 409 Conflict | Abort, notify, retry | +| Deadlock detected | 423 Deadlock | Abort, auto-retry with backoff | +| Max queue depth exceeded | 503 Queue Full | Fail fast, notify operator | + +### Approval Edge Cases + +| Scenario | Behavior | +|----------|----------| +| Approver leaves organization | Auto-reassign to delegate or manager | +| Approval timeout (48h default) | Escalate to next level, notify on-call | +| Required reviewer unavailable | Bypass with admin override + audit | +| Task modified during review | Invalidate approvals, restart review | +| Concurrent approvals | Last write wins, notify others of resolution | + +### System Degradation + +| Condition | Response | +|-----------|----------| +| Redis unavailable | Queue in PostgreSQL, async recovery | +| High lock contention | Exponential backoff, circuit breaker | +| Approval queue backlog | Priority escalation, auto-approve low-risk | +| WebSocket failure | Polling fallback, queued events | + +--- + +## Security Model + +### Permission Matrix + +| Action | Author | Reviewer | Admin | System | +|--------|--------|----------|-------|--------| +| Create task | ✓ | ✗ | ✓ | ✗ | +| Submit for approval | ✓ | ✗ | ✓ | ✗ | +| Approve/reject | ✗ | ✓ | ✓ | ✗ | +| Force apply | ✗ | ✗ | ✓ | ✗ | +| Cancel task | ✓ | ✗ | ✓ | ✓ | +| Override policy | ✗ | ✗ | ✓* | ✗ | +| View audit log | ✓ | ✓ | ✓ | ✓ | + +*Requires secondary approval and incident ticket + +### Audit Requirements + +Every state transition logged: +- Who (user ID, session) +- What (from state, to state, action) +- When (timestamp with microsecond precision) +- Where (IP, user agent, service) +- Why (reason, ticket reference) + +--- + +## Scalability Considerations + +### Horizontal Scaling + +- **API Layer:** Stateless, scale via load balancer +- **Redis:** Cluster mode, hash tags for lock locality +- **PostgreSQL:** Read replicas for audit queries +- **WebSocket:** Sticky sessions or Redis pub/sub + +### Performance Targets + +| Metric | Target | Peak | +|--------|--------|------| +| Lock acquisition | < 10ms | < 50ms @ p99 | +| Approval latency | < 100ms | < 500ms @ p99 | +| Task throughput | 1000/min | 5000/min burst | +| Concurrent locks | 10,000 | 50,000 | + +--- + +## Deployment Architecture + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ KUBERNETES CLUSTER │ +├─────────────────────────────────────────────────────────────────┤ +│ │ +│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ +│ │ API Pod │ │ API Pod │ │ API Pod │ │ +│ │ (3+ replicas)│ │ │ │ │ │ +│ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘ │ +│ │ │ │ │ +│ └────────────────┼────────────────┘ │ +│ │ │ +│ ┌──────┴──────┐ │ +│ │ Ingress │ │ +│ │ Controller │ │ +│ └──────┬──────┘ │ +│ │ │ +├──────────────────────────┼──────────────────────────────────────┤ +│ ┌───────────────────────┴───────────────────────┐ │ +│ │ Redis Cluster │ │ +│ │ ┌─────────┐ ┌─────────┐ ┌─────────┐ │ │ +│ │ │ Master │ │ Master │ │ Master │ │ │ +│ │ │ + Repl │ │ + Repl │ │ + Repl │ │ │ +│ │ └─────────┘ └─────────┘ └─────────┘ │ │ +│ └───────────────────────────────────────────────┘ │ +│ │ +│ ┌─────────────────────────────────────────────────┐ │ +│ │ PostgreSQL (HA: Patroni) │ │ +│ │ Primary + 2 Replicas │ │ +│ └─────────────────────────────────────────────────┘ │ +│ │ +│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ +│ │ Worker Pod │ │ Worker Pod │ │ Worker Pod │ │ +│ │ (HPA: 2-20) │ │ │ │ │ │ +│ └─────────────┘ └─────────────┘ └─────────────┘ │ +│ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +--- + +## Future Enhancements + +1. **ML-Based Risk Scoring:** Train models on historical task outcomes +2. **Predictive Locking:** Pre-acquire locks based on task patterns +3. **Approval Simulation:** "What if" analysis before submitting +4. **Time-Based Policies:** Different rules for on-call hours +5. **Integration Marketplace:** Slack, PagerDuty, ServiceNow webhooks + +--- + +## Glossary + +| Term | Definition | +|------|------------| +| **Clean Apply** | Applying changes only after successful lock acquisition and approval | +| **Deadlock** | Circular wait condition between multiple lock holders | +| **Delegation Chain** | Hierarchical approval routing | +| **Lock Queue** | FIFO waiting list for lock acquisition | +| **Risk Score** | Calculated metric (0-100) indicating task danger | +| **Stash** | Saved state for potential rollback | +| **Wait-For Graph** | Data structure for deadlock detection | diff --git a/docs/parallel-tasks-orchestration.md b/docs/parallel-tasks-orchestration.md new file mode 100644 index 0000000..55ee365 --- /dev/null +++ b/docs/parallel-tasks-orchestration.md @@ -0,0 +1,118 @@ +# Parallel Tasks Orchestration +# Date: 2026-03-18 +# Purpose: 4-way parallel worktree execution for Community ADE Phase 1 + +## Branch Strategy +**Base:** `main` +**Worktrees:** All 4 tasks share `main` branch - they're independent components +**Merge:** Clean component boundaries, no conflicts expected + +## Task Assignments + +### Task 1: Redis Queue Core (Agent: Coder-Alpha) +**Worktree:** `/home/ani/Projects/community-ade-wt/queue-core` +**Focus:** Implement Redis Streams queue with consumer groups +**Deliverables:** +- `src/queue/RedisQueue.ts` - Core queue implementation +- `src/queue/Task.ts` - Task interface and serialization +- `src/queue/Worker.ts` - Worker heartbeat and task claiming +- `tests/queue/RedisQueue.test.ts` - Unit tests +**Success Criteria:** Can enqueue/dequeue tasks, workers claim and heartbeat +**Time Budget:** 45 minutes + +### Task 2: TypeScript API Contracts (Agent: Coder-Beta) +**Worktree:** `/home/ani/Projects/community-ade-wt/api-contracts` +**Focus:** Type definitions and API surface +**Deliverables:** +- `src/types/index.ts` - All shared interfaces +- `src/api/routes.ts` - Express route definitions +- `src/api/validation.ts` - Zod schemas for request/response +- `src/api/middleware.ts` - Auth, error handling, logging +**Success Criteria:** Types compile, schemas validate, routes typed +**Time Budget:** 40 minutes + +### Task 3: Worker Pool & Execution (Agent: Coder-Gamma) +**Worktree:** `/home/ani/Projects/community-ade-wt/worker-pool` +**Focus:** Multi-worker process management +**Deliverables:** +- `src/worker/Pool.ts` - Worker pool orchestrator +- `src/worker/Process.ts` - Individual worker process wrapper +- `src/worker/HealthMonitor.ts` - Health checks and restarts +- `tests/worker/Pool.test.ts` - Worker lifecycle tests +**Success Criteria:** Pool spawns workers, monitors health, restarts dead workers +**Time Budget:** 50 minutes + +### Task 4: Dashboard UI Scaffold (Agent: Coder-Delta) +**Worktree:** `/home/ani/Projects/community-ade-wt/dashboard-ui` +**Focus:** React dashboard for monitoring +**Deliverables:** +- `dashboard/index.html` - HTML entry point +- `dashboard/src/App.tsx` - Main React component +- `dashboard/src/components/QueueStatus.tsx` - Queue overview +- `dashboard/src/components/WorkerList.tsx` - Worker status +- `dashboard/package.json` - Dependencies +**Success Criteria:** Vite dev server runs, shows mock queue data +**Time Budget:** 35 minutes + +## Shared Resources +- Redis server: `redis://localhost:6379` (use separate DBs: 0,1,2,3 per task) +- Port allocation: + - Task 1: No port (library) + - Task 2: 3001 (API dev server) + - Task 3: No port (process manager) + - Task 4: 3002 (Dashboard dev server) + +## State Reporting +Each agent must write to: +`/home/ani/Projects/community-ade/docs/task-status-{alpha|beta|gamma|delta}.md` + +Format: +```markdown +## Task Status: [Task Name] +**Agent:** [Name] +**Status:** [in-progress|complete|blocked] +**Worktree:** [path] +**Completed:** [list of files] +**Blockers:** [none or description] +**Next:** [what's next if incomplete] +**Time Remaining:** [X minutes] +``` + +## Integration Points +After all 4 complete: +1. Merge all worktrees into main +2. Verify imports resolve +3. Run integration test +4. Update README with setup instructions + +## Architecture Document Reference +All agents should read: +- `/home/ani/Projects/community-ade/docs/ade-redis-queue-design.md` +- `/home/ani/Projects/community-ade/docs/ade-phase1-orchestration-design.md` + +## Model Assignment +- Coder-Alpha, Beta, Gamma: `openai/GLM-4.7-Flash` (fast, parallel) +- Coder-Delta: `openai/GLM-4.7-Flash` (UI focused, sufficient) + +## Kickoff Command for Each Agent +```bash +# Setup worktree +cd /home/ani/Projects/community-ade +git worktree add ../community-ade-wt/[task-name] -b task-[name] +cd ../community-ade-wt/[task-name] + +# Read orchestration doc +cat docs/parallel-tasks-orchestration.md + +# Read your task section +# Begin implementation +``` + +## Completion Signal +When done, each agent: +1. Commits all work: `git add -A && git commit -m "[task-name]: implementation complete"` +2. Updates their status document +3. Signals completion + +--- +**Ani will monitor all 4 status documents and merge when complete.** diff --git a/docs/redis-schema.md b/docs/redis-schema.md new file mode 100644 index 0000000..cf01dae --- /dev/null +++ b/docs/redis-schema.md @@ -0,0 +1,805 @@ +# Community ADE Approval System - Redis Schema + +## Overview + +This document defines all Redis key patterns used by the Approval System. All keys use the `ade:` prefix for namespacing. + +--- + +## Key Naming Convention + +``` +ade:{category}:{subcategory}:{identifier}:{attribute} +``` + +| Segment | Description | Examples | +|---------|-------------|----------| +| `ade` | Global namespace prefix | - | +| `category` | High-level component | `lock`, `approval`, `task`, `session` | +| `subcategory` | Specific entity type | `task`, `resource`, `agent`, `user` | +| `identifier` | Unique entity ID | UUID or slug | +| `attribute` | Property/attribute | `data`, `queue`, `index` | + +--- + +## Lock Keys + +### Primary Lock Storage + +#### Task Lock (Exclusive) +``` +Key: ade:lock:task:{task_id} +Type: Hash +TTL: 30 seconds (renewable) + +Fields: + holder_agent_id (string) UUID of agent holding the lock + acquired_at (string) ISO 8601 timestamp + expires_at (string) ISO 8601 timestamp + purpose (string) Human-readable purpose + heartbeat_count (integer) Number of heartbeats received + queue_length (integer) Number of waiters in queue + +Example: + HSET ade:lock:task:550e8400-e29b-41d4-a716-446655440000 \ + holder_agent_id agent-123 \ + acquired_at "2026-03-18T15:30:00Z" \ + expires_at "2026-03-18T15:30:30Z" \ + purpose "Applying database migration" +``` + +#### Resource Lock (Shared/Exclusive) +``` +Key: ade:lock:resource:{resource_type}:{resource_id} +Type: Hash +TTL: 30 seconds (renewable) + +Fields: + mode (string) "exclusive" or "shared" + holders (JSON) Array of {agent_id, acquired_at} + exclusive_holder (string) Agent ID (if exclusive mode) + acquired_at (string) ISO 8601 timestamp + expires_at (string) ISO 8601 timestamp + +Example: + HSET ade:lock:resource:database:prod-db-01 \ + mode "exclusive" \ + exclusive_holder agent-456 \ + acquired_at "2026-03-18T15:30:00Z" \ + expires_at "2026-03-18T15:30:30Z" +``` + +#### Agent Capacity Lock +``` +Key: ade:lock:agent:{agent_id}:capacity +Type: Hash +TTL: None (persistent, cleaned up on agent deregistration) + +Fields: + max_tasks (integer) Maximum concurrent tasks + active_tasks (integer) Currently executing tasks + queued_tasks (integer) Tasks waiting for capacity + last_heartbeat (string) ISO 8601 timestamp + status (string) "active", "draining", "offline" + +Example: + HSET ade:lock:agent:agent-123:capacity \ + max_tasks 10 \ + active_tasks 3 \ + queued_tasks 1 \ + last_heartbeat "2026-03-18T15:30:00Z" \ + status "active" +``` + +### Lock Queue Keys + +#### Lock Wait Queue (Ordered list of waiting agents) +``` +Key: ade:lock:task:{task_id}:queue +Type: Sorted Set (ZSET) +TTL: 5 minutes (cleaned up when lock released) + +Score: Unix timestamp (millisecond precision for FIFO ordering) +Value: JSON object + +Value Format: + { + "agent_id": "agent-uuid", + "mode": "exclusive", + "priority": 100, + "requested_at": "2026-03-18T15:30:00Z", + "max_wait_seconds": 60 + } + +Example: + ZADD ade:lock:task:550e8400-e29b-41d4-a716-446655440000:queue \ + 1710775800000 '{"agent_id":"agent-789","mode":"exclusive","priority":100,...}' +``` + +#### Lock Notification Channel +``` +Key: ade:lock:task:{task_id}:channel +Type: Pub/Sub Channel + +Events: + "acquired:{agent_id}" - Lock acquired + "released:{agent_id}" - Lock released + "expired" - Lock expired + "queued:{agent_id}" - Agent added to queue + "promoted:{agent_id}" - Agent promoted from queue +``` + +### Lock Index Keys + +#### Active Locks by Agent (Reverse index) +``` +Key: ade:lock:index:agent:{agent_id} +Type: Set +TTL: Matches individual lock TTLs + +Members: Lock key references + ade:lock:task:{task_id} + ade:lock:resource:{type}:{id} + +Purpose: Quick lookup of all locks held by an agent +``` + +#### Active Locks by Resource Type +``` +Key: ade:lock:index:resource:{resource_type} +Type: Set +TTL: Matches individual lock TTLs + +Members: Resource lock keys + ade:lock:resource:database:prod-db-01 + ade:lock:resource:service:api-gateway +``` + +#### Global Lock Registry +``` +Key: ade:lock:registry +Type: Sorted Set +TTL: None + +Score: Expiration timestamp +Value: Lock key + +Purpose: Background cleanup of expired locks +Example: + ZADD ade:lock:registry 1710775830 "ade:lock:task:550e8400-..." +``` + +### Deadlock Detection Keys + +#### Wait-For Graph Edge +``` +Key: ade:lock:waitfor:{agent_id} +Type: Set +TTL: 5 minutes + +Members: Lock keys the agent is waiting for + ade:lock:task:{task_id} + ade:lock:resource:{type}:{id} + +Purpose: Build wait-for graph for deadlock detection +``` + +#### Deadlock Detection Timestamp +``` +Key: ade:lock:deadlock:check:{agent_id} +Type: String +TTL: 30 seconds + +Value: ISO 8601 timestamp of last deadlock check + +Purpose: Rate limit deadlock detection attempts +``` + +--- + +## Approval Keys + +### Approval Request Keys + +#### Approval Request Data +``` +Key: ade:approval:request:{approval_id} +Type: Hash +TTL: 30 days (archived after completion) + +Fields: + task_id (string) UUID of associated task + reviewer_id (string) User ID of assigned reviewer + reviewer_name (string) Display name + status (string) "PENDING", "APPROVED", "REJECTED", "DELEGATED" + priority (string) "LOW", "NORMAL", "HIGH", "URGENT" + delegated_to (string) User ID (if delegated) + delegation_chain (JSON) Array of user IDs in delegation chain + created_at (string) ISO 8601 timestamp + due_at (string) ISO 8601 timestamp + responded_at (string) ISO 8601 timestamp + response_action (string) "approve", "reject", "request_changes" + response_reason (string) Free text explanation + reviewed_by (string) Final responding user ID + +Example: + HSET ade:approval:request:app-123 \ + task_id "task-456" \ + reviewer_id "user-789" \ + status "PENDING" \ + priority "HIGH" \ + created_at "2026-03-18T15:30:00Z" \ + due_at "2026-03-20T15:30:00Z" +``` + +### Approval Queue Keys + +#### User Approval Queue (Pending approvals for a user) +``` +Key: ade:approval:queue:user:{user_id} +Type: Sorted Set +TTL: None (entries expire based on approval TTL) + +Score: Priority score (higher = more urgent) + Calculated as: (risk_score * 10) + priority_bonus + priority_bonus: URGENT=1000, HIGH=500, NORMAL=100, LOW=0 + +Value: approval_id + +Example: + ZADD ade:approval:queue:user:user-789 850 "app-123" + ZADD ade:approval:queue:user:user-789 450 "app-124" +``` + +#### Task Approval Index (All approvals for a task) +``` +Key: ade:approval:index:task:{task_id} +Type: Set +TTL: Matches approval data TTL + +Members: approval_ids + app-123 + app-124 + app-125 +``` + +#### Global Approval Queue (All pending approvals) +``` +Key: ade:approval:queue:global +Type: Sorted Set +TTL: None + +Score: Due timestamp (Unix seconds) +Value: approval_id + +Purpose: Background worker for escalation/timeout handling +``` + +### Approval Statistics Keys + +#### User Approval Stats +``` +Key: ade:approval:stats:user:{user_id} +Type: Hash +TTL: None (rolling window) + +Fields: + pending_count (integer) Current pending approvals + approved_today (integer) Approvals given today + rejected_today (integer) Rejections given today + avg_response_time (float) Average response time in seconds + last_action_at (string) ISO 8601 timestamp + +Note: Daily counters reset at midnight UTC via background job +``` + +#### Task Approval Stats +``` +Key: ade:approval:stats:task:{task_id} +Type: Hash +TTL: 30 days + +Fields: + required_count (integer) Required approvals + approved_count (integer) Current approvals + rejected_count (integer) Current rejections + pending_count (integer) Awaiting response + quorum_reached (boolean) Whether minimum approvals met +``` + +### Delegation Keys + +#### User Delegation Policy +``` +Key: ade:approval:delegation:{user_id}:{policy_id} +Type: Hash +TTL: Based on policy expiration + +Fields: + owner_id (string) Policy owner + delegate_to (string) Delegated reviewer + conditions (JSON) Matching conditions + cascade (boolean) Allow further delegation + active (boolean) Policy enabled/disabled + created_at (string) ISO 8601 timestamp + expires_at (string) ISO 8601 timestamp + +Example: + HSET ade:approval:delegation:user-123:policy-456 \ + owner_id "user-123" \ + delegate_to "user-789" \ + conditions '{"task_types":["infrastructure"],"risk_above":50}' \ + cascade "true" \ + active "true" +``` + +#### Delegation Policy Index +``` +Key: ade:approval:delegation:index:{user_id} +Type: Set +TTL: None + +Members: policy_ids for the user + policy-456 + policy-789 +``` + +--- + +## Task Keys + +### Task Data Keys + +#### Task State +``` +Key: ade:task:{task_id}:state +Type: String +TTL: 90 days + +Value: Current state + DRAFT, SUBMITTED, REVIEWING, APPROVED, APPLYING, COMPLETED, REJECTED, CANCELLED + +Example: + SET ade:task:task-123:state "REVIEWING" +``` + +#### Task Data (Full object) +``` +Key: ade:task:{task_id}:data +Type: JSON (RedisJSON module) or String (serialized JSON) +TTL: 90 days + +Value: Complete task object including config, metadata, execution results + +Note: For Redis versions without JSON module, store as serialized string +``` + +#### Task Configuration (Immutable) +``` +Key: ade:task:{task_id}:config +Type: Hash +TTL: 90 days + +Fields: + type (string) Task type + version (string) Config version + description (string) Human-readable description + parameters (JSON) Task parameters + resources (JSON) Array of resource references + rollback_strategy (string) "automatic", "manual", "none" + timeout_seconds (integer) Execution timeout + priority (integer) 0-100 priority score +``` + +#### Task Metadata +``` +Key: ade:task:{task_id}:metadata +Type: Hash +TTL: 90 days + +Fields: + author_id (string) Creating user + author_name (string) Display name + team (string) Team/organization + ticket_ref (string) External ticket reference + tags (JSON) Array of string tags + created_at (string) ISO 8601 timestamp + updated_at (string) ISO 8601 timestamp + submitted_at (string) ISO 8601 timestamp + approved_at (string) ISO 8601 timestamp + applying_at (string) ISO 8601 timestamp + completed_at (string) ISO 8601 timestamp +``` + +### Task State Index Keys + +#### Tasks by State +``` +Key: ade:task:index:state:{state} +Type: Sorted Set +TTL: None (members removed on state change) + +Score: created_at timestamp (Unix seconds) +Value: task_id + +Example Keys: + ade:task:index:state:DRAFT + ade:task:index:state:REVIEWING + ade:task:index:state:APPROVED +``` + +#### Tasks by Author +``` +Key: ade:task:index:author:{user_id} +Type: Sorted Set +TTL: 90 days + +Score: created_at timestamp +Value: task_id +``` + +#### Tasks by Resource +``` +Key: ade:task:index:resource:{resource_type}:{resource_id} +Type: Sorted Set +TTL: 90 days + +Score: created_at timestamp +Value: task_id + +Example: + ade:task:index:resource:database:prod-db-01 +``` + +#### Tasks by Tag +``` +Key: ade:task:index:tag:{tag_name} +Type: Sorted Set +TTL: 90 days + +Score: created_at timestamp +Value: task_id +``` + +### Task Execution Keys + +#### Task Execution Status +``` +Key: ade:task:{task_id}:execution +Type: Hash +TTL: 90 days + +Fields: + started_at (string) ISO 8601 timestamp + completed_at (string) ISO 8601 timestamp + agent_id (string) Executing agent + result (string) "success", "failure", "timeout", "cancelled" + output (string) Execution output (truncated) + output_key (string) Key to full output in S3/blob storage + error (string) Error message (if failed) + error_details (JSON) Structured error information + retry_count (integer) Number of retry attempts +``` + +#### Task Preview Results +``` +Key: ade:task:{task_id}:preview +Type: JSON/String +TTL: 7 days + +Value: Preview result object with changes, warnings, errors +``` + +#### Task Risk Assessment +``` +Key: ade:task:{task_id}:risk +Type: Hash +TTL: 90 days + +Fields: + score (integer) 0-100 risk score + level (string) "LOW", "MEDIUM", "HIGH", "CRITICAL" + factors (JSON) Array of risk factors + auto_approvable (boolean) Can skip human review + assessed_at (string) ISO 8601 timestamp + assessed_by (string) Algorithm version +``` + +--- + +## Session Keys + +#### User Session +``` +Key: ade:session:{session_id} +Type: Hash +TTL: 24 hours + +Fields: + user_id (string) Authenticated user + user_name (string) Display name + roles (JSON) Array of role strings + permissions (JSON) Array of permission strings + created_at (string) ISO 8601 timestamp + last_active (string) ISO 8601 timestamp + ip_address (string) Client IP + user_agent (string) Client user agent +``` + +#### User Active Sessions +``` +Key: ade:session:index:user:{user_id} +Type: Set +TTL: 24 hours + +Members: session_ids +``` + +--- + +## Rate Limiting Keys + +#### API Rate Limit +``` +Key: ade:ratelimit:{endpoint}:{user_id} +Type: String (counter) or Redis Cell (if available) +TTL: 1 minute (sliding window) + +Value: Request count + +Example: + ade:ratelimit:tasks:create:user-123 + ade:ratelimit:approvals:respond:user-456 +``` + +#### Lock Acquisition Rate Limit (per agent) +``` +Key: ade:ratelimit:lock:acquire:{agent_id} +Type: String (counter) +TTL: 1 minute + +Value: Lock acquisition attempts + +Purpose: Prevent lock starvation attacks +``` + +--- + +## Event Keys + +#### Event Stream (Redis Streams) +``` +Key: ade:events:{event_type} +Type: Stream +TTL: 7 days (MAXLEN ~10000) + +Event Types: + ade:events:task + ade:events:approval + ade:events:lock + +Entry Fields: + event (string) Event name + timestamp (string) ISO 8601 timestamp + payload (JSON) Event data + source (string) Service/agent that generated event + +Example: + XADD ade:events:task * \ + event "task:state_changed" \ + timestamp "2026-03-18T15:30:00Z" \ + payload '{"task_id":"...","from":"DRAFT","to":"SUBMITTED"}' \ + source "api-server-01" +``` + +#### Event Consumer Groups +``` +Key: ade:events:{event_type}:consumers +Type: Stream Consumer Group + +Groups: + notification-service + audit-logger + webhook-dispatcher + analytics-pipeline +``` + +--- + +## Background Job Keys + +#### Job Queue +``` +Key: ade:job:queue:{queue_name} +Type: List or Sorted Set +TTL: None + +Queues: + ade:job:queue:lock_cleanup - Expired lock cleanup + ade:job:queue:approval_timeout - Approval escalation + ade:job:queue:task_timeout - Task execution timeout + ade:job:queue:deadlock_detect - Deadlock detection + ade:job:queue:archive - Old data archival +``` + +#### Scheduled Jobs +``` +Key: ade:job:scheduled +Type: Sorted Set +TTL: None + +Score: Execution timestamp (Unix seconds) +Value: JSON job description + +Example: + ZADD ade:job:scheduled 1710776400 \ + '{"type":"lock_cleanup","target":"ade:lock:task:123"}' +``` + +#### Job Locks (prevent duplicate job execution) +``` +Key: ade:job:lock:{job_id} +Type: String +TTL: Job execution timeout + +Value: Worker instance ID +``` + +--- + +## Configuration Keys + +#### System Configuration +``` +Key: ade:config:{config_name} +Type: String or Hash +TTL: None + +Configs: + ade:config:lock:default_ttl (integer, seconds) + ade:config:lock:max_ttl (integer, seconds) + ade:config:lock:heartbeat_interval (integer, seconds) + ade:config:approval:default_timeout (integer, seconds) + ade:config:approval:max_timeout (integer, seconds) + ade:config:task:default_timeout (integer, seconds) + ade:config:risk:thresholds (JSON) +``` + +#### Feature Flags +``` +Key: ade:feature:{flag_name} +Type: String +TTL: None + +Value: "enabled" or "disabled" + +Examples: + ade:feature:auto_approve_low_risk + ade:feature:deadlock_detection + ade:feature:batch_approvals +``` + +--- + +## Key Lifecycle Summary + +| Key Pattern | Type | Default TTL | Cleanup Strategy | +|-------------|------|-------------|------------------| +| `ade:lock:*` (active) | Hash | 30s | Heartbeat extends, expires auto-release | +| `ade:lock:*:queue` | ZSET | 5m | Cleared on lock release | +| `ade:lock:registry` | ZSET | None | Background job cleans expired | +| `ade:approval:request:*` | Hash | 30d | Archived, then deleted | +| `ade:approval:queue:*` | ZSET | None | Entries removed on status change | +| `ade:task:*:state` | String | 90d | Archived to cold storage | +| `ade:task:*:data` | JSON | 90d | Archived to cold storage | +| `ade:task:index:*` | ZSET | 90d | Cleared on task deletion | +| `ade:session:*` | Hash | 24h | Auto-expire | +| `ade:events:*` | Stream | 7d | MAXLEN eviction | +| `ade:ratelimit:*` | String | 1m | Auto-expire | + +--- + +## Redis Commands Reference + +### Lock Operations + +```bash +# Acquire lock (with NX - only if not exists) +HSET ade:lock:task:123 \ + holder_agent_id agent-001 \ + acquired_at "2026-03-18T15:30:00Z" \ + expires_at "2026-03-18T15:30:30Z" \ + NX + +# Extend lock TTL +HEXPIRE ade:lock:task:123 30 + +# Check lock +HGETALL ade:lock:task:123 + +# Release lock (use Lua for atomic check-and-delete) +# Lua script: +# if redis.call('hget', KEYS[1], 'holder_agent_id') == ARGV[1] then +# return redis.call('del', KEYS[1]) +# end +# return 0 + +# Add to queue +ZADD ade:lock:task:123:queue 1710775800000 '{"agent_id":"agent-002",...}' + +# Get next waiter +ZPOPMIN ade:lock:task:123:queue 1 +``` + +### Approval Operations + +```bash +# Create approval request +HSET ade:approval:request:app-123 \ + task_id task-456 \ + reviewer_id user-789 \ + status PENDING + +# Add to user queue +ZADD ade:approval:queue:user:user-789 850 app-123 + +# Record response +HSET ade:approval:request:app-123 \ + status APPROVED \ + responded_at "2026-03-18T16:00:00Z" \ + response_action approve + +# Remove from queue +ZREM ade:approval:queue:user:user-789 app-123 +``` + +### Task Operations + +```bash +# Create task +SET ade:task:task-123:state DRAFT +HSET ade:task:task-123:metadata \ + author_id user-001 \ + created_at "2026-03-18T15:00:00Z" + +# Update state (atomic) +SET ade:task:task-123:state REVIEWING +ZREM ade:task:index:state:DRAFT task-123 +ZADD ade:task:index:state:REVIEWING 1710774000 task-123 + +# Get task with all data +HMGET ade:task:task-123:metadata author_id created_at +GET ade:task:task-123:state +``` + +--- + +## Cluster Mode Considerations + +When using Redis Cluster, ensure related keys are on the same hash slot using hash tags: + +``` +ade:{task:123}:state → hash slot for "task:123" +ade:{task:123}:data → same slot +ade:{task:123}:execution → same slot +ade:lock:task:{task:123} → hash slot for "task:123" +ade:approval:index:task:{task:123} → hash slot for "task:123" +``` + +This enables multi-key operations (transactions, Lua scripts) on related data. + +--- + +## Migration Notes + +### From v1 to v2 +- Renamed `lock:*` to `ade:lock:*` for namespacing +- Changed approval status from integers to strings +- Added JSON support for complex fields (requires RedisJSON or serialization) + +### Backup Strategy +```bash +# Daily RDB snapshot +# Real-time AOF for point-in-time recovery +# Cross-region replication for disaster recovery +``` diff --git a/docs/task-status-alpha.md b/docs/task-status-alpha.md new file mode 100644 index 0000000..8af7f09 --- /dev/null +++ b/docs/task-status-alpha.md @@ -0,0 +1,63 @@ +## Task Status: Redis Queue Core (Task 1) + +**Agent:** Coder-Alpha + +**Status:** Complete + +**Worktree:** `/home/ani/Projects/community-ade-wt/queue-core` + +**Completed Files:** +- `src/queue/Task.ts` - Task interface with types, serialization, and retry logic +- `src/queue/RedisQueue.ts` - Redis Streams implementation with consumer groups +- `src/queue/Worker.ts` - Worker claiming tasks with heartbeats and WorkerPool +- `src/index.ts` - Main exports for the module +- `tests/queue/RedisQueue.test.ts` - Unit tests (26 tests passing) +- `package.json` - Dependencies (ioredis, uuid, TypeScript, Jest) +- `tsconfig.json` - TypeScript configuration +- `jest.config.js` - Jest test configuration + +**Blockers:** None + +**Next:** Integration with other components after merge + +**Time Remaining:** 0 minutes (completed on schedule) + +--- + +### Implementation Details + +#### Key Features Implemented: + +1. **Redis Streams Queue** (`RedisQueue.ts`) + - Consumer group: `ade-workers` + - Stream key: `ade:queue:tasks` + - Commands used: XADD, XREADGROUP, XACK, XCLAIM, XPENDING + - Supports delayed tasks via Sorted Set (`ade:queue:delayed`) + +2. **Task State Management** (`Task.ts`) + - Task states: pending, claimed, running, completed, failed, cancelled + - Exponential backoff with jitter for retries + - Serialization/deserialization for Redis storage + - Constants: HEARTBEAT_INTERVAL_MS=5000, HEARTBEAT_TIMEOUT_MS=30000 + +3. **Worker Implementation** (`Worker.ts`) + - Worker heartbeat every 5 seconds + - Automatic task claiming from consumer group + - Concurrent task processing with configurable limits + - Graceful shutdown with optional task completion + - WorkerPool for managing multiple workers + +4. **Retry Logic** + - Exponential backoff: baseDelay * (multiplier ^ attempt) + - Jitter: ±10% to prevent thundering herd + - Configurable max attempts (default: 3) + - Max delay cap: 5 minutes + +#### Test Results: +- 26 tests passing +- Coverage includes: enqueue, claim, complete, fail, retry, delayed tasks, worker registration + +#### Dependencies: +- `ioredis` - Redis client +- `uuid` - UUID generation +- TypeScript, Jest, ts-jest for development diff --git a/docs/task-status-beta.md b/docs/task-status-beta.md new file mode 100644 index 0000000..9fe6116 --- /dev/null +++ b/docs/task-status-beta.md @@ -0,0 +1,41 @@ +## Task Status: TypeScript API Contracts +**Agent:** Coder-Beta +**Status:** complete +**Worktree:** /home/ani/Projects/community-ade-wt/api-contracts +**Completed:** +- `src/types/index.ts` - All shared TypeScript interfaces (Task, Worker, QueueStats, etc.) +- `src/api/validation.ts` - Zod schemas for request/response validation +- `src/api/middleware.ts` - Auth, error handling, logging middleware +- `src/api/routes.ts` - Express route definitions with full typing +- `src/index.ts` - Package entry point and exports +- `package.json` - Dependencies (Express, Zod, TypeScript) +- `tsconfig.json` - TypeScript configuration + +**Blockers:** none +**Next:** Integration with other worktrees (queue-core, worker-pool) +**Time Remaining:** 0 minutes (task complete) + +**API Routes Implemented:** +- GET /api/health - Health check +- GET /api/tasks - List tasks with filters +- POST /api/tasks - Create task +- GET /api/tasks/:id - Get task by ID +- PATCH /api/tasks/:id - Update task +- POST /api/tasks/:id/cancel - Cancel task +- POST /api/tasks/:id/retry - Retry failed task +- POST /api/tasks/:id/claim - Claim task (worker API) +- POST /api/tasks/:id/complete - Complete task (worker API) +- POST /api/tasks/:id/fail - Mark task failed (worker API) +- GET /api/workers - List workers +- POST /api/workers/register - Register worker +- GET /api/workers/:id - Get worker by ID +- POST /api/workers/:id/heartbeat - Worker heartbeat +- POST /api/workers/:id/kill - Kill worker +- GET /api/queue/stats - Queue statistics +- GET /api/queue/next - Get next available task (worker poll) + +**Success Criteria Met:** +- All types compile without errors +- Zod schemas properly validate request/response data +- Routes are fully typed with Express +- Middleware includes auth, logging, error handling, and validation diff --git a/docs/task-status-gamma.md b/docs/task-status-gamma.md new file mode 100644 index 0000000..f444d84 --- /dev/null +++ b/docs/task-status-gamma.md @@ -0,0 +1,102 @@ +## Task Status: Worker Pool & Execution (Task 3) +**Agent:** Coder-Gamma +**Status:** complete +**Worktree:** `/home/ani/Projects/community-ade-wt/worker-pool` + +### Completed Files: + +**Source Files (`src/`):** +1. `src/worker/Pool.ts` - Worker pool orchestrator + - Spawns and manages multiple worker processes + - Task queue with priority support + - Auto-scaling based on workload + - Graceful shutdown handling + - Comprehensive statistics and health reporting + +2. `src/worker/Process.ts` - Individual worker process wrapper + - Child_process fork management + - Worker state machine (IDLE, RUNNING, STOPPING, etc.) + - Task assignment and lifecycle tracking + - Heartbeat and health monitoring hooks + - Event-based communication with parent + +3. `src/worker/HealthMonitor.ts` - Health checks and restart logic + - Configurable health check intervals (default: 5s) + - Heartbeat timeout detection (default: 30s) + - Task stall detection (default: 5 min) + - Automatic worker restart on failure + - Consecutive failure tracking before restart + +4. `src/worker/TaskExecutor.ts` - Task execution in workers + - Task handler registration system + - Built-in task types (echo, compute, delay, healthCheck, executeCode, executeShell) + - Task timeout handling + - Progress reporting support + - Heartbeat generation + +5. `src/worker/WorkerScript.ts` - Example worker entry point + - Demonstrates task executor setup + - Registers built-in and example tasks + +6. `src/index.ts` - Module exports + +**Test Files (`tests/`):** +7. `tests/worker/Pool.test.ts` - Comprehensive worker lifecycle tests + - Pool creation and initialization + - Worker lifecycle (spawn, restart, exit) + - Task execution (single, concurrent, priority) + - Scaling up/down + - Graceful shutdown + - Statistics reporting + +**Configuration Files:** +8. `package.json` - TypeScript dependencies and scripts +9. `tsconfig.json` - TypeScript configuration +10. `jest.config.js` - Test configuration +11. `tests/setup.ts` - Test setup + +### Key Features Implemented: + +✅ **Worker Pool Management:** +- Configurable min/max worker counts +- Auto-scaling based on queue depth +- Graceful shutdown with task completion wait + +✅ **Process Management:** +- Child_process fork for each worker +- Worker state tracking (IDLE, RUNNING, STOPPING, etc.) +- Automatic respawn on unexpected exit + +✅ **Health Monitoring:** +- Health checks every 5 seconds +- Heartbeat tracking (30s timeout) +- Task stall detection +- Automatic restarts after 3 consecutive failures + +✅ **Task Execution:** +- Priority queue support +- Task timeout handling +- Progress reporting +- Built-in task types for common operations + +✅ **Worker Messages:** +- `heartbeat` - Health check response +- `task_complete` - Successful task completion +- `task_failed` - Task execution failure +- `task_progress` - Progress updates +- `ready` - Worker ready signal + +### Blockers: +None + +### Next: +Integration with other components (Queue Core from Task 1, API Contracts from Task 2) + +### Time Remaining: +0 minutes - Task complete + +--- +**Completion Command:** +```bash +git add -A && git commit -m "worker-pool: Process management and health monitoring" +``` diff --git a/docs/ui-components.md b/docs/ui-components.md new file mode 100644 index 0000000..ab2e77f --- /dev/null +++ b/docs/ui-components.md @@ -0,0 +1,820 @@ +# Community ADE Approval System - Dashboard UI Specifications + +## Overview + +This document defines the UI components and specifications for the Delta-V2 Dashboard's Approval System integration. The dashboard provides human operators with visibility and control over the approval workflow. + +--- + +## Design System + +### Color Palette + +| Token | Hex | Usage | +|-------|-----|-------| +| `--color-primary` | `#3B82F6` | Primary actions, links | +| `--color-primary-dark` | `#2563EB` | Hover states | +| `--color-success` | `#10B981` | Approved, completed, success states | +| `--color-warning` | `#F59E0B` | Pending, medium risk, warnings | +| `--color-danger` | `#EF4444` | Rejected, high risk, errors | +| `--color-info` | `#6366F1` | Info states, low risk | +| `--color-neutral-100` | `#F3F4F6` | Backgrounds | +| `--color-neutral-200` | `#E5E7EB` | Borders | +| `--color-neutral-700` | `#374151` | Body text | +| `--color-neutral-900` | `#111827` | Headings | + +### State Colors + +| State | Background | Border | Text | Icon | +|-------|------------|--------|------|------| +| `DRAFT` | `#F3F4F6` | `#D1D5DB` | `#6B7280` | Edit icon | +| `SUBMITTED` | `#DBEAFE` | `#93C5FD` | `#1E40AF` | Upload icon | +| `REVIEWING` | `#FEF3C7` | `#FCD34D` | `#92400E` | Eye icon | +| `APPROVED` | `#D1FAE5` | `#6EE7B7` | `#065F46` | Check icon | +| `APPLYING` | `#E0E7FF` | `#A5B4FC` | `#3730A3` | Play icon (animated) | +| `COMPLETED` | `#D1FAE5` | `#10B981` | `#065F46` | CheckCircle icon | +| `REJECTED` | `#FEE2E2` | `#FCA5A5` | `#991B1B` | X icon | +| `CANCELLED` | `#F3F4F6` | `#9CA3AF` | `#4B5563` | Slash icon | + +### Typography + +| Level | Font | Size | Weight | Line Height | +|-------|------|------|--------|-------------| +| H1 | Inter | 24px | 600 | 1.3 | +| H2 | Inter | 20px | 600 | 1.3 | +| H3 | Inter | 16px | 600 | 1.4 | +| Body | Inter | 14px | 400 | 1.5 | +| Small | Inter | 12px | 400 | 1.5 | +| Mono | JetBrains Mono | 13px | 400 | 1.5 | + +--- + +## Layout Structure + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ HEADER │ +│ ┌─────────────────────────────────────────────────────────────────────┐ │ +│ │ Logo Search [🔔 Notifications] [👤 User Menu] │ │ +│ └─────────────────────────────────────────────────────────────────────┘ │ +├─────────────────────────────────────────────────────────────────────────────┤ +│ SIDEBAR │ MAIN CONTENT │ +│ │ │ +│ Dashboard │ ┌────────────────────────────────────────────────────────┐ │ +│ Tasks │ │ PAGE HEADER │ │ +│ Approvals │ │ [Title] [Primary Action] [Secondary] │ │ +│ ───────── │ └────────────────────────────────────────────────────────┘ │ +│ Locks │ │ +│ Audit Log │ ┌────────────────────────────────────────────────────────┐ │ +│ ───────── │ │ CONTENT AREA │ │ +│ Settings │ │ │ │ +│ │ │ [Cards / Tables / Forms / Modals as needed] │ │ +│ │ │ │ │ +│ │ └────────────────────────────────────────────────────────┘ │ +│ │ │ +└─────────────┴────────────────────────────────────────────────────────────────┘ +``` + +--- + +## Component Specifications + +### 1. Navigation Components + +#### Sidebar Navigation +```typescript +interface SidebarProps { + activeSection: 'dashboard' | 'tasks' | 'approvals' | 'locks' | 'audit' | 'settings'; + badgeCounts: { + approvals: number; // Pending approvals for current user + locks: number; // Active locks requiring attention + }; + user: { + name: string; + role: string; + avatarUrl?: string; + }; +} +``` + +**Features:** +- Collapsible on mobile (drawer) +- Badge indicators for pending items +- Keyboard navigation support (Arrow keys, Enter) +- Active state highlighting + +**Menu Items:** +- Dashboard (overview stats) +- Tasks (all tasks) +- Approvals (queue) - with badge count +- Locks (lock manager) +- Audit Log +- Settings + +--- + +### 2. Dashboard Components + +#### Stats Overview Card +```typescript +interface StatsCardProps { + title: string; + value: number | string; + trend?: { + direction: 'up' | 'down' | 'neutral'; + value: string; + label: string; + }; + icon: IconComponent; + color: 'primary' | 'success' | 'warning' | 'danger' | 'info'; + linkTo?: string; +} +``` + +**Layout:** +``` +┌────────────────────────────┐ +│ [Icon] │ +│ │ +│ Title │ +│ ┌────────────────────┐ │ +│ │ VALUE │ │ +│ └────────────────────┘ │ +│ ▲ 12% vs last week │ +└────────────────────────────┘ +``` + +**Dashboard Stats:** +1. Pending My Approval (count + link) +2. Tasks in Review (count) +3. Active Locks (count) +4. Completed Today (count + success rate) + +--- + +#### Approval Queue Widget +```typescript +interface ApprovalQueueWidgetProps { + approvals: Array<{ + id: string; + taskId: string; + taskType: string; + taskDescription: string; + riskLevel: 'LOW' | 'MEDIUM' | 'HIGH' | 'CRITICAL'; + requestedBy: string; + requestedAt: string; + dueAt?: string; + priority: 'LOW' | 'NORMAL' | 'HIGH' | 'URGENT'; + }>; + onApprove: (id: string) => void; + onReject: (id: string) => void; + onView: (id: string) => void; + maxItems?: number; +} +``` + +**Features:** +- Expandable list (default show 5, "View All" link) +- Inline quick actions (Approve/Reject with confirmation) +- Color-coded risk badges +- Relative timestamps ("2 hours ago") +- Urgent items highlighted with red border + +--- + +#### Activity Feed +```typescript +interface ActivityFeedProps { + events: Array<{ + id: string; + type: 'task_created' | 'task_submitted' | 'approval_requested' | + 'approval_responded' | 'task_executing' | 'task_completed' | + 'lock_acquired' | 'lock_released'; + actor: { + id: string; + name: string; + avatarUrl?: string; + }; + target: { + type: 'task' | 'approval' | 'lock'; + id: string; + name: string; + }; + metadata?: Record; + timestamp: string; + }>; + maxItems?: number; + pollInterval?: number; +} +``` + +**Features:** +- Real-time updates via WebSocket +- Collapsible event details +- Click to navigate to related resource +- Infinite scroll or pagination + +--- + +### 3. Task Components + +#### Task List Table +```typescript +interface TaskListTableProps { + tasks: TaskResponse[]; + columns: Array<{ + key: string; + title: string; + sortable?: boolean; + width?: string; + }>; + selectedIds: string[]; + onSelect: (ids: string[]) => void; + onRowClick: (task: TaskResponse) => void; + onSort: (key: string, order: 'asc' | 'desc') => void; + pagination: { + page: number; + limit: number; + total: number; + onChange: (page: number) => void; + }; + filters: TaskFilters; + onFilterChange: (filters: TaskFilters) => void; +} + +interface TaskFilters { + state?: TaskState[]; + author?: string; + resourceType?: ResourceType; + riskLevel?: RiskLevel; + dateRange?: { from: Date; to: Date }; + tags?: string[]; +} +``` + +**Columns:** +| Column | Width | Sortable | Content | +|--------|-------|----------|---------| +| Checkbox | 40px | No | Multi-select | +| State | 120px | Yes | Badge with icon | +| Task | 300px | Yes | Description + type tag | +| Author | 150px | Yes | Avatar + name | +| Risk | 100px | Yes | Score + level badge | +| Resources | 200px | Yes | Icon list (hover for details) | +| Created | 150px | Yes | Relative time | +| Actions | 100px | No | Menu button | + +**Features:** +- Batch actions toolbar (appears on selection) +- Column resizing +- Export to CSV/JSON +- Saved filter presets + +--- + +#### Task State Badge +```typescript +interface TaskStateBadgeProps { + state: TaskState; + size?: 'sm' | 'md' | 'lg'; + showIcon?: boolean; + pulse?: boolean; // Animate for APPLYING state +} +``` + +**Visual States:** +- `DRAFT`: Gray, edit icon +- `SUBMITTED`: Blue, upload icon +- `REVIEWING`: Yellow, eye icon +- `APPROVED`: Green with border, check icon +- `APPLYING`: Indigo, animated spinner + play icon +- `COMPLETED`: Solid green, check-circle icon +- `REJECTED`: Red, X icon +- `CANCELLED`: Gray strikethrough, slash icon + +--- + +#### Risk Score Indicator +```typescript +interface RiskScoreProps { + score: number; // 0-100 + size?: 'sm' | 'md' | 'lg'; + showLabel?: boolean; + showFactors?: boolean; +} +``` + +**Visual Design:** +``` +┌────────────────────────────────┐ +│ ┌──────────┐ │ +│ │ 75 │ HIGH RISK │ +│ │ ┌──┐ │ │ +│ │ │██│ │ • Critical resource │ +│ │ │██│ │ • Wide blast radius │ +│ │ │░░│ │ • No rollback │ +│ └──┴──┴────┘ │ +└────────────────────────────────┘ +``` + +- Circular progress indicator +- Color gradient: Green (0) → Yellow (50) → Red (100) +- Tooltip showing risk factors on hover + +--- + +#### Task Detail View +```typescript +interface TaskDetailViewProps { + task: TaskResponse; + activeTab: 'overview' | 'preview' | 'approvals' | 'execution' | 'audit'; + onTabChange: (tab: string) => void; + onAction: (action: 'submit' | 'cancel' | 'retry') => void; +} +``` + +**Layout:** +``` +┌─────────────────────────────────────────────────────────────────┐ +│ Breadcrumbs > Task: Database Migration │ +├─────────────────────────────────────────────────────────────────┤ +│ ┌─────────────────────────────────────────────────────────┐ │ +│ │ [State Badge] Task Title │ │ +│ │ Created by John Doe • 2 hours ago • Ticket: PROJ-123 │ │ +│ └─────────────────────────────────────────────────────────┘ │ +│ │ +│ [Overview] [Preview] [Approvals] [Execution] [Audit] │ +│ ───────────────────────────────────────────────────────── │ +│ │ +│ TAB CONTENT │ +│ │ +├─────────────────────────────────────────────────────────────────┤ +│ ACTION BAR (contextual based on state) │ +│ [Submit for Approval] [Save Draft] [Cancel] │ +└─────────────────────────────────────────────────────────────────┘ +``` + +--- + +#### Preview Panel +```typescript +interface PreviewPanelProps { + preview: PreviewResult; + onRefresh: () => void; + lastUpdated: string; +} +``` + +**Content:** +- Validation status (valid/invalid with error list) +- Change list with diff view +- Affected services diagram +- Estimated execution time +- Rollback capability indicator + +**Diff View Component:** +``` +┌─────────────────────────────────────────────────────────────┐ +│ Resource: database/prod-db-01 │ +│ Action: MODIFY │ +├─────────────────────────────────────────────────────────────┤ +│ ┌────────────────┐ ┌────────────────┐ │ +│ │ BEFORE │ │ AFTER │ │ +│ │ │ │ │ │ +│ │ instance: │ │ instance: │ │ +│ │ type: db.m5 │ │ type: db.r6 │ ◄── Changed │ +│ │ size: large │ │ size: xlarge │ ◄── Changed │ +│ │ storage: 100 │ │ storage: 100 │ │ +│ │ │ │ │ │ +│ └────────────────┘ └────────────────┘ │ +└─────────────────────────────────────────────────────────────┘ +``` + +--- + +### 4. Approval Components + +#### Approval Card +```typescript +interface ApprovalCardProps { + approval: { + id: string; + task: { + id: string; + title: string; + type: string; + description: string; + risk: RiskAssessment; + resources: ResourceRef[]; + preview: PreviewResult; + }; + requestedBy: { + id: string; + name: string; + avatarUrl?: string; + team?: string; + }; + requestedAt: string; + dueAt?: string; + priority: 'LOW' | 'NORMAL' | 'HIGH' | 'URGENT'; + delegationChain?: string[]; + }; + onApprove: (id: string, reason?: string) => void; + onReject: (id: string, reason: string) => void; + onRequestChanges: (id: string, feedback: string) => void; + onDelegate: (id: string, delegateTo: string) => void; +} +``` + +**Layout:** +``` +┌─────────────────────────────────────────────────────────────────┐ +│ ┌─────────────────────────────────────────────────────────┐ │ +│ │ [Avatar] Requested by Sarah Chen • Platform Team │ │ +│ │ 2 hours ago • Due in 2 days │ │ +│ └─────────────────────────────────────────────────────────┘ │ +│ │ +│ Database Migration - Shard Addition │ +│ [INFRASTRUCTURE] [HIGH PRIORITY] │ +│ │ +│ Add new read replica to handle increased traffic... │ +│ │ +│ ┌─────────────────────────────────────────────────────────┐ │ +│ │ RISK SCORE: 65/100 [MEDIUM] │ │ +│ │ • Production database affected │ │ +│ │ • 15-minute estimated downtime │ │ +│ │ • Automatic rollback available │ │ +│ └─────────────────────────────────────────────────────────┘ │ +│ │ +│ AFFECTED RESOURCES: │ +│ [DB] prod-db-01 [SVC] api-service [SVC] worker-queue │ +│ │ +│ ┌─────────────────────────────────────────────────────────┐ │ +│ │ CHANGES PREVIEW (3 changes) [View ▼] │ │ +│ │ • Modify database instance size │ │ +│ │ • Update service configuration │ │ +│ │ • Scale worker replicas │ │ +│ └─────────────────────────────────────────────────────────┘ │ +│ │ +│ [✓ Approve] [✗ Reject] [💬 Request Changes] [➡ Delegate] │ +│ │ +│ Reason (required for rejection): │ +│ ┌─────────────────────────────────────────────────────────┐ │ +│ │ │ │ +│ └─────────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +--- + +#### Batch Approval Panel +```typescript +interface BatchApprovalPanelProps { + selectedApprovals: string[]; + approvals: Array<{ + id: string; + taskTitle: string; + riskLevel: string; + }>; + onBatchAction: (action: 'approve' | 'reject', options: BatchOptions) => void; + onClearSelection: () => void; +} + +interface BatchOptions { + applyImmediately: boolean; + skipValidation: boolean; + continueOnError: boolean; +} +``` + +**Features:** +- Slide-out panel from right +- Summary of selected items +- Risk level aggregation ("3 Low, 2 High risk") +- Bulk action with confirmation +- Apply immediately toggle + +--- + +#### Delegation Settings Panel +```typescript +interface DelegationSettingsProps { + policies: DelegationPolicy[]; + availableDelegates: Array<{ + id: string; + name: string; + role: string; + avatarUrl?: string; + }>; + onCreatePolicy: (policy: Omit) => void; + onUpdatePolicy: (id: string, updates: Partial) => void; + onDeletePolicy: (id: string) => void; +} +``` + +**Features:** +- Visual policy builder +- Condition preview ("When task is INFRASTRUCTURE and risk > 50") +- Chain visualization +- Active/Inactive toggle + +--- + +### 5. Lock Components + +#### Lock Monitor Dashboard +```typescript +interface LockMonitorProps { + locks: Array<{ + id: string; + resourceType: 'task' | 'resource' | 'agent'; + resourceId: string; + mode: 'exclusive' | 'shared'; + holder: { + agentId: string; + agentName: string; + acquiredAt: string; + expiresAt: string; + purpose?: string; + }; + queue: Array<{ + agentId: string; + position: number; + waitTime: number; + }>; + ttl: number; + }>; + deadlocks: DeadlockInfo[]; + onForceRelease: (lockId: string, reason: string) => void; + onRefresh: () => void; +} +``` + +**Layout:** +``` +┌─────────────────────────────────────────────────────────────────┐ +│ LOCK MONITOR [↻ Refresh] │ +├─────────────────────────────────────────────────────────────────┤ +│ ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ │ +│ │ Active Locks │ │ Waiting Agents │ │ Detected Dead- │ │ +│ │ 24 │ │ 7 │ │ locks 0 │ │ +│ └─────────────────┘ └─────────────────┘ └─────────────────┘ │ +│ │ +│ ACTIVE LOCKS │ +│ ┌─────────────────────────────────────────────────────────┐ │ +│ │ Type Resource Holder TTL Queue Act │ │ +│ │ ──────────────────────────────────────────────────────── │ │ +│ │ task task-123 agent-01 23s 2 [⋯] │ │ +│ │ resource db/prod-01 agent-03 45s 0 [⋯] │ │ +│ │ task task-456 agent-02 12s 1 [⋯] │ │ +│ └─────────────────────────────────────────────────────────┘ │ +│ │ +│ LOCK QUEUE │ +│ ┌─────────────────────────────────────────────────────────┐ │ +│ │ Position Agent Waiting For Est. Time │ │ +│ │ ──────────────────────────────────────────────────────── │ │ +│ │ 1 agent-04 task-123 ~15s │ │ +│ │ 2 agent-05 task-123 ~30s │ │ +│ └─────────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +--- + +#### Lock Detail Modal +```typescript +interface LockDetailModalProps { + lock: LockInfo; + isOpen: boolean; + onClose: () => void; + onForceRelease: (reason: string) => void; + onExtendTTL: (seconds: number) => void; +} +``` + +**Content:** +- Lock metadata +- Holder information with heartbeat status +- Queue visualization (if waiters exist) +- Action buttons (Extend, Force Release - admin only) +- Lock history/timeline + +--- + +#### Deadlock Alert +```typescript +interface DeadlockAlertProps { + deadlocks: DeadlockInfo[]; + onResolve: (deadlockId: string, strategy: 'abort_youngest' | 'abort_shortest') => void; + onDismiss: (deadlockId: string) => void; +} +``` + +**Visual Design:** +``` +┌─────────────────────────────────────────────────────────────────┐ +│ ⚠️ DEADLOCK DETECTED │ +├─────────────────────────────────────────────────────────────────┤ +│ Circular wait detected between: │ +│ │ +│ agent-01 ──holds──► lock:task:123 │ +│ ▲ │ │ +│ └────────waits─────────────────────────┘ │ +│ │ +│ agent-02 ──holds──► lock:resource:db/prod-01 │ +│ ▲ │ │ +│ └────────waits─────────────────────────┘ │ +│ │ +│ [Resolve: Abort Youngest] [Resolve: Abort Shortest] │ +└─────────────────────────────────────────────────────────────────┘ +``` + +--- + +### 6. Form Components + +#### Task Creation Form +```typescript +interface TaskCreationFormProps { + initialValues?: Partial; + resourceTypes: ResourceType[]; + availableResources: Array<{ + type: ResourceType; + id: string; + name: string; + }>; + onSubmit: (values: TaskConfig) => Promise; + onPreview: (values: TaskConfig) => Promise; + onSaveDraft: (values: Partial) => Promise; +} +``` + +**Sections:** +1. **Basic Info**: Type, Description, Priority +2. **Resources**: Multi-select with type filtering +3. **Parameters**: Dynamic form based on task type +4. **Advanced**: Timeout, Rollback strategy, Tags +5. **Preview**: Side panel showing changes before submit + +--- + +#### Approval Response Form +```typescript +interface ApprovalResponseFormProps { + approvalId: string; + action: 'approve' | 'reject' | 'request_changes'; + requireReason: boolean; + onSubmit: (values: { action: string; reason?: string }) => void; + onCancel: () => void; +} +``` + +**Features:** +- Quick select buttons for common rejection reasons +- Character counter for reason field +- File attachment for supporting docs + +--- + +### 7. Feedback Components + +#### Toast Notifications +```typescript +interface ToastProps { + id: string; + type: 'success' | 'error' | 'warning' | 'info'; + title: string; + message?: string; + action?: { + label: string; + onClick: () => void; + }; + duration?: number; + onClose: (id: string) => void; +} +``` + +**Events:** +- Approval submitted +- Task approved/rejected +- Lock acquired/released +- Error notifications + +--- + +#### Confirmation Dialogs +```typescript +interface ConfirmationDialogProps { + isOpen: boolean; + title: string; + message: string; + confirmLabel: string; + cancelLabel: string; + danger?: boolean; + requireText?: string; // Type to confirm (for destructive actions) + onConfirm: () => void; + onCancel: () => void; +} +``` + +**Use Cases:** +- Force release lock +- Cancel running task +- Batch approve high-risk items +- Delete delegation policy + +--- + +### 8. Real-time Components + +#### Live Update Indicator +```typescript +interface LiveIndicatorProps { + status: 'connected' | 'disconnected' | 'reconnecting'; + lastUpdate?: string; +} +``` + +**Visual:** +- Green pulse dot when connected +- Yellow when reconnecting +- Red when disconnected with retry button + +--- + +#### WebSocket Status Bar +```typescript +interface WebSocketStatusProps { + connectionState: 'connecting' | 'open' | 'closed' | 'error'; + pendingEvents: number; + onReconnect: () => void; +} +``` + +--- + +## Responsive Breakpoints + +| Breakpoint | Width | Layout Changes | +|------------|-------|----------------| +| Mobile | < 640px | Single column, sidebar becomes drawer, table becomes cards | +| Tablet | 640-1024px | Two columns where applicable, condensed sidebar | +| Desktop | 1024-1440px | Full layout, fixed sidebar | +| Wide | > 1440px | Expanded content area, more data visible | + +--- + +## Accessibility Requirements + +### ARIA Labels +- All interactive elements have descriptive labels +- Live regions for real-time updates +- Role="status" for async operations + +### Keyboard Navigation +- Tab order follows visual flow +- Escape closes modals/drawers +- Enter activates buttons, Space toggles checkboxes +- Arrow keys navigate tables and lists + +### Screen Reader Support +- State changes announced via aria-live +- Complex visualizations have text alternatives +- Risk scores read as "High risk: 75 out of 100" + +### Color Contrast +- Minimum 4.5:1 for normal text +- Minimum 3:1 for large text and icons +- States distinguishable without color + +--- + +## Performance Targets + +| Metric | Target | +|--------|--------| +| Initial Load | < 2s | +| Time to Interactive | < 3s | +| List Scroll | 60fps | +| Modal Open | < 100ms | +| Real-time Update | < 500ms latency | +| Form Submit | < 200ms feedback | + +--- + +## Integration Points + +### API Integration +- REST API for CRUD operations +- WebSocket for real-time events +- Server-Sent Events fallback + +### External Services +- Auth provider for user info +- File storage for attachments +- Search service for full-text search + +### Browser APIs +- Notifications API for approval alerts +- Idle Detection for auto-refresh pause +- Page Visibility for connection management diff --git a/docs/verification-report.md b/docs/verification-report.md new file mode 100644 index 0000000..c43a138 --- /dev/null +++ b/docs/verification-report.md @@ -0,0 +1,147 @@ +## Verification Report + +**Date:** March 18, 2026 +**Assessor:** Epsilon +**Worktrees Analyzed:** +- `/home/ani/Projects/community-ade-wt/queue-core` (Alpha) +- `/home/ani/Projects/community-ade-wt/api-contracts` (Beta) +- `/home/ani/Projects/community-ade-wt/worker-pool` (Gamma) + +--- + +### Alpha (Redis Core): **PASS** + +| Item | Status | +|------|--------| +| `src/queue/RedisQueue.ts` | EXISTS | +| `src/queue/Task.ts` | EXISTS | +| `src/queue/Worker.ts` | EXISTS | +| Tests (`tests/queue/RedisQueue.test.ts`) | EXISTS | + +**Key Implementation Verified:** YES +- Uses **ioredis** library +- **XADD** call found at line 114: `await this.redis.xadd(this.streamKey, "*", ...)` +- **XREADGROUP** call found at line 138-149: `await this.redis.xreadgroup("GROUP", this.consumerGroup, consumerId, "COUNT", batchSize, "BLOCK", blockMs, "STREAMS", this.streamKey, ">")` +- **XACK** call found at line 177: `await this.redis.xack(this.streamKey, this.consumerGroup, messageId)` +- **XPENDING** call found at line 299: `await this.redis.xpending(this.streamKey, this.consumerGroup, "-", "+", 100)` +- **XCLAIM** call found at line 311: `await this.redis.xclaim(this.streamKey, this.consumerGroup, "system", 0, id)` +- Implements consumer group management, delayed tasks via sorted sets (zadd/zrem), worker registration/heartbeat tracking +- Full retry logic with exponential backoff, task state management via Redis hashes (hset/hgetall) + +**Tests Run:** 26 PASSED +``` +PASS tests/queue/RedisQueue.test.ts + RedisQueue + initialize + ✓ should create consumer group + ✓ should not throw if group already exists + enqueue + ✓ should enqueue a task successfully + ✓ should handle delayed tasks + ✓ should handle errors gracefully + ✓ should generate task ID if not provided + claimTasks + ✓ should claim tasks from the queue + ✓ should return empty array when no tasks available + ✓ should skip tasks not found in hash + ... (17 more tests passed) +``` + +--- + +### Beta (API Contracts): **PASS** + +| Item | Status | +|------|--------| +| `src/types/index.ts` | EXISTS (309 lines) | +| `src/api/routes.ts` | EXISTS (692 lines) | +| `src/api/validation.ts` | EXISTS (280 lines) | +| `src/api/middleware.ts` | EXISTS | + +**Code Compiles:** YES (0 errors) +``` +$ cd /home/ani/Projects/community-ade-wt/api-contracts && npx tsc --noEmit +(Command completed with no output - 0 errors) +``` + +**Routes Implemented:** 19 routes + +| Method | Route | Description | +|--------|-------|-------------| +| GET | `/api/health` | Health check | +| GET | `/api/tasks` | List tasks with filtering/pagination | +| POST | `/api/tasks` | Create new task | +| GET | `/api/tasks/:id` | Get task by ID | +| PATCH | `/api/tasks/:id` | Update task | +| POST | `/api/tasks/:id/cancel` | Cancel task | +| POST | `/api/tasks/:id/retry` | Retry failed task | +| POST | `/api/tasks/:id/claim` | Worker claims task | +| POST | `/api/tasks/:id/complete` | Mark task complete | +| POST | `/api/tasks/:id/fail` | Mark task failed | +| GET | `/api/workers` | List workers | +| POST | `/api/workers/register` | Register worker | +| GET | `/api/workers/:id` | Get worker by ID | +| POST | `/api/workers/:id/heartbeat` | Worker heartbeat | +| POST | `/api/workers/:id/kill` | Kill worker | +| GET | `/api/queue/stats` | Queue statistics | +| GET | `/api/queue/next` | Poll for next task | + +**Key Features:** +- Full TypeScript interfaces for Task, Worker, QueueStats, etc. +- Zod validation schemas for all request/response types +- Express Router with proper middleware (validation, asyncHandler, error handling) +- Pagination support, filtering by status/type/worker/priority +- Proper error handling with ApiException class + +--- + +### Gamma (Worker Pool): **PASS** + +| Item | Status | +|------|--------| +| `src/worker/Pool.ts` | EXISTS (601 lines) | +| `src/worker/Process.ts` | EXISTS (466 lines) | +| `src/worker/HealthMonitor.ts` | EXISTS (459 lines) | +| `src/worker/TaskExecutor.ts` | EXISTS | +| `src/worker/WorkerScript.ts` | EXISTS | +| Tests (`tests/worker/Pool.test.ts`) | EXISTS (524 lines) | + +**child_process usage:** VERIFIED +- `Process.ts` line 1: `import { fork, ChildProcess } from 'child_process';` +- `fork()` call at line 176: `this.process = fork(this.scriptPath, this.config.args, forkOptions);` +- Full IPC message passing between parent and child processes +- Process lifecycle management (start, stop, kill, restart) +- Event handlers for 'message', 'error', 'exit', stdout/stderr piping + +**Health Monitoring:** IMPLEMENTED +- `HealthMonitor.ts` provides comprehensive health monitoring +- Configurable check intervals, max heartbeat age, task stall detection +- Automatic restart on consecutive failures +- Events emitted: 'check', 'healthy', 'unhealthy', 'restart', 'taskStalled' +- Health status tracking per worker (heartbeat age, consecutive failures, task duration) + +**Key Features:** +- Worker pool with min/max worker scaling +- Priority-based task queue +- Task timeout handling +- Graceful and force shutdown modes +- Worker respawn on failure +- Statistics tracking (completed/failed tasks, average duration) + +--- + +## Overall: **3/3 components verified** + +### Summary + +| Coder | Component | Status | Evidence | +|-------|-----------|--------|----------| +| **Alpha** | Redis Core | PASS | XADD, XREADGROUP, XACK, XPENDING, XCLAIM implemented. 26 tests pass. | +| **Beta** | API Contracts | PASS | 19 Express routes, compiles with 0 errors, full type definitions | +| **Gamma** | Worker Pool | PASS | child_process.fork() used, health monitoring with auto-restart, 524 lines of tests | + +**Brutal Honesty Assessment:** +All three components are **fully implemented** with production-quality code: +- Alpha's RedisQueue is a complete Redis Streams implementation with consumer groups, delayed tasks, and retry logic +- Beta's API Contracts provide a type-safe Express API with comprehensive validation +- Gamma's Worker Pool properly uses Node.js child_process with full lifecycle and health management diff --git a/src/agent-card/AgentCard.tsx b/src/agent-card/AgentCard.tsx new file mode 100644 index 0000000..91d0cf6 --- /dev/null +++ b/src/agent-card/AgentCard.tsx @@ -0,0 +1,294 @@ +/** + * Agent Card + * Health monitoring card with status, metrics, and quick actions + * Community ADE - Agent Management Interface + */ + +import React, { useCallback } from 'react'; +import type { Agent } from '../../types/agent'; +import { STATUS_COLORS, STATUS_LABELS, AgentStatus } from '../../types/agent'; + +interface AgentCardProps { + agent: Agent; + isSelected?: boolean; + onSelect?: (agent: Agent) => void; + onConfigure?: (agent: Agent) => void; + onRestart?: (id: string) => void; + onPause?: (id: string) => void; + onResume?: (id: string) => void; + onDelete?: (id: string) => void; + compact?: boolean; +} + +const StatusIcon: React.FC<{ status: AgentStatus }> = ({ status }) => { + switch (status) { + case 'working': + return ( + + + + + ); + case 'idle': + return ( + + + + ); + case 'error': + return ( + + + + ); + case 'paused': + return ( + + + + ); + default: + return null; + } +}; + +const formatTimeAgo = (dateString?: string): string => { + if (!dateString) return 'Never'; + + const date = new Date(dateString); + const now = new Date(); + const diffMs = now.getTime() - date.getTime(); + const diffSec = Math.floor(diffMs / 1000); + const diffMin = Math.floor(diffSec / 60); + const diffHour = Math.floor(diffMin / 60); + const diffDay = Math.floor(diffHour / 24); + + if (diffSec < 60) return `${diffSec}s ago`; + if (diffMin < 60) return `${diffMin}m ago`; + if (diffHour < 24) return `${diffHour}h ago`; + if (diffDay < 7) return `${diffDay}d ago`; + return date.toLocaleDateString(); +}; + +const getHealthColor = (successRate: number): string => { + if (successRate >= 95) return 'bg-emerald-500'; + if (successRate >= 80) return 'bg-amber-500'; + return 'bg-red-500'; +}; + +export const AgentCard: React.FC = ({ + agent, + isSelected = false, + onSelect, + onConfigure, + onRestart, + onPause, + onResume, + onDelete, + compact = false, +}) => { + const statusColors = STATUS_COLORS[agent.status]; + const memoryPercent = (agent.metrics.currentMemoryUsage / agent.config.memoryLimit) * 100; + const healthColor = getHealthColor(agent.metrics.successRate24h); + + const handleCardClick = useCallback(() => { + onSelect?.(agent); + }, [agent, onSelect]); + + const handleConfigure = useCallback((e: React.MouseEvent) => { + e.stopPropagation(); + onConfigure?.(agent); + }, [agent, onConfigure]); + + const handleRestart = useCallback((e: React.MouseEvent) => { + e.stopPropagation(); + onRestart?.(agent.id); + }, [agent.id, onRestart]); + + const handlePauseResume = useCallback((e: React.MouseEvent) => { + e.stopPropagation(); + if (agent.status === 'paused') { + onResume?.(agent.id); + } else { + onPause?.(agent.id); + } + }, [agent.status, agent.id, onPause, onResume]); + + const handleDelete = useCallback((e: React.MouseEvent) => { + e.stopPropagation(); + if (confirm(`Are you sure you want to delete "${agent.name}"?`)) { + onDelete?.(agent.id); + } + }, [agent.name, agent.id, onDelete]); + + if (compact) { + return ( +
+
+
+ + + {STATUS_LABELS[agent.status]} + + + {agent.name} + +
+
+
+ {agent.metrics.successRate24h.toFixed(0)}% +
+
+
+ ); + } + + return ( +
+ {/* Header */} +
+
+
+

{agent.name}

+

{agent.description}

+
+ + + {STATUS_LABELS[agent.status]} + +
+
+ + {/* Metrics */} +
+ {/* Memory Usage */} +
+
+ Memory Usage + + {agent.metrics.currentMemoryUsage.toLocaleString()} / {agent.config.memoryLimit.toLocaleString()} + +
+
+
90 ? 'bg-red-500' : memoryPercent > 70 ? 'bg-amber-500' : 'bg-blue-500' + }`} + style={{ width: `${Math.min(memoryPercent, 100)}%` }} + /> +
+
+ + {/* Stats Grid */} +
+
+

{agent.metrics.activeTasksCount}

+

Active Tasks

+
+
+
+
+

+ {agent.metrics.successRate24h.toFixed(0)}% +

+
+

Success Rate

+
+
+

+ {agent.metrics.totalTasksFailed > 0 ? agent.metrics.totalTasksFailed : '-'} +

+

Errors (24h)

+
+
+ + {/* Last Heartbeat */} +
+ Last seen + + {formatTimeAgo(agent.lastHeartbeatAt)} + +
+
+ + {/* Quick Actions */} +
+ + +
+ {(agent.status === 'idle' || agent.status === 'working' || agent.status === 'error') && ( + + )} + + {agent.status === 'paused' && ( + + )} + + + + +
+
+
+ ); +}; diff --git a/src/agent-card/index.ts b/src/agent-card/index.ts new file mode 100644 index 0000000..96f844e --- /dev/null +++ b/src/agent-card/index.ts @@ -0,0 +1 @@ +export { AgentCard } from './AgentCard'; diff --git a/src/agent-config/AgentConfigPanel.tsx b/src/agent-config/AgentConfigPanel.tsx new file mode 100644 index 0000000..9447d7e --- /dev/null +++ b/src/agent-config/AgentConfigPanel.tsx @@ -0,0 +1,414 @@ +/** + * Agent Configuration Panel + * Parameter tuning panel for existing agents + * Community ADE - Agent Management Interface + */ + +import React, { useState, useEffect, useCallback } from 'react'; +import type { + Agent, + AgentConfig, + AgentModel, + AgentTool, + AgentPermission, + AgentUpdateData, +} from '../../types/agent'; +import { + AVAILABLE_MODELS, + AVAILABLE_TOOLS, + PERMISSION_DESCRIPTIONS, + DEFAULT_AGENT_CONFIG, +} from '../../types/agent'; + +interface AgentConfigPanelProps { + agent: Agent | null; + onUpdate: (id: string, data: AgentUpdateData) => Promise; + onClose: () => void; +} + +export const AgentConfigPanel: React.FC = ({ + agent, + onUpdate, + onClose, +}) => { + const [config, setConfig] = useState(DEFAULT_AGENT_CONFIG); + const [name, setName] = useState(''); + const [description, setDescription] = useState(''); + const [isSaving, setIsSaving] = useState(false); + const [hasChanges, setHasChanges] = useState(false); + const [saveError, setSaveError] = useState(null); + const [activeTab, setActiveTab] = useState<'general' | 'tools' | 'permissions'>('general'); + + // Initialize from agent prop + useEffect(() => { + if (agent) { + setConfig(agent.config); + setName(agent.name); + setDescription(agent.description); + setHasChanges(false); + setSaveError(null); + } + }, [agent]); + + const updateConfig = useCallback((updates: Partial) => { + setConfig((prev) => ({ ...prev, ...updates })); + setHasChanges(true); + }, []); + + const handleSave = useCallback(async () => { + if (!agent) return; + + setIsSaving(true); + setSaveError(null); + + const updateData: AgentUpdateData = { + name, + description, + ...config, + }; + + try { + const result = await onUpdate(agent.id, updateData); + if (result) { + setHasChanges(false); + } else { + setSaveError('Failed to update agent. Please try again.'); + } + } catch (err) { + setSaveError(err instanceof Error ? err.message : 'An unexpected error occurred'); + } finally { + setIsSaving(false); + } + }, [agent, name, description, config, onUpdate]); + + const toggleTool = useCallback((toolId: AgentTool) => { + const currentTools = config.toolWhitelist; + if (currentTools.includes(toolId)) { + updateConfig({ toolWhitelist: currentTools.filter((t) => t !== toolId) }); + } else { + updateConfig({ toolWhitelist: [...currentTools, toolId] }); + } + }, [config.toolWhitelist, updateConfig]); + + const togglePermission = useCallback((permission: AgentPermission) => { + const currentPermissions = config.autoApprovePermissions; + if (currentPermissions.includes(permission)) { + updateConfig({ autoApprovePermissions: currentPermissions.filter((p) => p !== permission) }); + } else { + updateConfig({ autoApprovePermissions: [...currentPermissions, permission] }); + } + }, [config.autoApprovePermissions, updateConfig]); + + const resetChanges = useCallback(() => { + if (agent) { + setConfig(agent.config); + setName(agent.name); + setDescription(agent.description); + setHasChanges(false); + setSaveError(null); + } + }, [agent]); + + if (!agent) { + return ( +
+
+ + + + +
+

Select an agent

+

+ Choose an agent from the list to configure its settings +

+
+ ); + } + + return ( +
+ {/* Header */} +
+
+
+

+ Configure Agent +

+

+ {agent.name} +

+
+
+ {hasChanges && ( + + Unsaved changes + + )} + +
+
+
+ + {/* Tabs */} +
+
+ {[ + { id: 'general', label: 'General' }, + { id: 'tools', label: 'Tools' }, + { id: 'permissions', label: 'Permissions' }, + ].map((tab) => ( + + ))} +
+
+ + {/* Content */} +
+ {saveError && ( +
+

{saveError}

+
+ )} + + {activeTab === 'general' && ( +
+ {/* Basic Info */} +
+

Basic Information

+
+
+ + { + setName(e.target.value); + setHasChanges(true); + }} + className="w-full px-3 py-2 border border-slate-300 rounded-lg focus:ring-2 focus:ring-blue-500 focus:border-blue-500" + /> +
+
+ +