// taskswitch.c -- Cooperative task switching library for DJGPP // // Uses inline assembly for context switching (i386 and x86_64). The // scheduler uses credit-based weighted round-robin so all tasks run, // but higher-priority tasks run proportionally more often. // // Task storage is a stb_ds dynamic array that grows as needed. // Terminated task slots are recycled by tsCreate(). // // Why inline asm instead of setjmp/longjmp for context switching: // setjmp/longjmp only save callee-saved registers and don't give us // control over the stack pointer in a portable way. We need to set up // a brand-new stack for each task and jump into a trampoline -- setjmp // can't bootstrap a fresh stack. The asm approach also avoids ABI // differences in jmp_buf layout across DJGPP versions. // // Why stb_ds dynamic array instead of a linked list: // Tasks are indexed by integer ID for O(1) lookup (tsGetState, tsKill, // etc.). A linked list would require O(n) traversal for every ID-based // operation. The array also has better cache locality during the // scheduler's linear scan. The downside (holes after termination) is // mitigated by slot recycling in findFreeSlot(). #define STB_DS_IMPLEMENTATION #include "thirdparty/stb_ds.h" #include "taskswitch.h" #include #include // ============================================================================ // Internal types // ============================================================================ // Only callee-saved registers need to be in the context struct because // the C calling convention guarantees the caller has already saved // everything else. The compiler treats contextSwitch() as a normal // function call, so caller-saved regs (eax/ecx/edx on i386, // rax/rcx/rdx/r8-r11 on x86_64) are spilled by the compiler before // the call. This minimizes context size and switch overhead. // // Field order is critical: the asm uses hardcoded byte offsets into // this struct. Reordering fields will silently corrupt context switches. #if defined(__x86_64__) // Saved CPU context for x86_64 (field order matches asm byte offsets) typedef struct { uintptr_t rbx; // offset 0 uintptr_t r12; // offset 8 uintptr_t r13; // offset 16 uintptr_t r14; // offset 24 uintptr_t r15; // offset 32 uintptr_t rbp; // offset 40 uintptr_t rsp; // offset 48 uintptr_t rip; // offset 56 } TaskContextT; #else // Saved CPU context for i386 (field order matches asm byte offsets) typedef struct { uintptr_t ebx; // offset 0 uintptr_t esi; // offset 4 uintptr_t edi; // offset 8 uintptr_t ebp; // offset 12 uintptr_t esp; // offset 16 uintptr_t eip; // offset 20 } TaskContextT; #endif // Task control block -- one per task slot. The 'allocated' flag tracks // whether the slot is live or recyclable, separate from the state enum, // because we need to distinguish "never used" from "terminated and reaped". // The 'isMain' flag protects task 0 from kill/pause -- destroying the // main task would orphan all other tasks with no scheduler to resume them. typedef struct { char name[TS_NAME_MAX]; TaskContextT context; uint8_t *stack; uint32_t stackSize; TaskStateE state; int32_t priority; int32_t credits; TaskEntryT entry; void *arg; bool isMain; bool allocated; // true if slot is in use, false if free for reuse } TaskBlockT; // ============================================================================ // Module state // ============================================================================ // stb_ds dynamic array of task control blocks. Slot 0 is always the main // task. Slots 1..N are app tasks. Terminated slots have allocated=false // and are reused by findFreeSlot() to prevent unbounded growth. static TaskBlockT *tasks = NULL; // Index of the currently executing task. Updated only by tsYield, tsExit, // tsPause (self-pause), and tsRecoverToMain. static uint32_t currentIdx = 0; static bool initialized = false; // ============================================================================ // Forward declarations // ============================================================================ // Static helpers static void contextSwitch(TaskContextT *save, TaskContextT *restore); static int32_t findFreeSlot(void); static uint32_t scheduleNext(void); static void taskTrampoline(void); // Public API prototypes are provided by taskswitch.h via #include. // Explicit prototypes repeated here per project convention: uint32_t tsActiveCount(void); int32_t tsCreate(const char *name, TaskEntryT entry, void *arg, uint32_t stackSize, int32_t priority); uint32_t tsCurrentId(void); void tsExit(void); const char *tsGetName(uint32_t taskId); int32_t tsGetPriority(uint32_t taskId); TaskStateE tsGetState(uint32_t taskId); int32_t tsInit(void); int32_t tsKill(uint32_t taskId); int32_t tsPause(uint32_t taskId); void tsRecoverToMain(void); int32_t tsResume(uint32_t taskId); int32_t tsSetPriority(uint32_t taskId, int32_t priority); void tsShutdown(void); void tsYield(void); // ============================================================================ // Static functions (alphabetical) // ============================================================================ // Switch execution from the current task to another by saving and restoring // callee-saved registers and the stack pointer. The return address is // captured as a local label so that when another task switches back to us, // execution resumes right after the save point. // // The mechanism: // 1. Save all callee-saved regs + esp/rsp into *save // 2. Capture the address of local label "1:" as the saved EIP/RIP // 3. Load all regs + esp/rsp from *restore // 4. Jump to the restored EIP/RIP (which is label "1:" in the other task) // // For a newly created task, the restored EIP points to taskTrampoline // (set up in tsCreate) rather than label "1:", so the first switch into // a task bootstraps it into its entry function. // // noinline is critical: if the compiler inlines this, the callee-saved // register assumptions break because the enclosing function may use // different register allocation. The asm clobber list tells GCC which // registers we destroy so it spills them before the call. // // The "memory" clobber acts as a compiler fence, ensuring all memory // writes are flushed before the switch and re-read after resumption. #if defined(__x86_64__) // x86_64: save rbx, r12-r15, rbp, rsp, rip. // Inputs via GCC constraints: %rdi = save ptr, %rsi = restore ptr. static void __attribute__((noinline)) contextSwitch(TaskContextT *save, TaskContextT *restore) { __asm__ __volatile__( // Save current context "movq %%rbx, 0(%%rdi)\n\t" "movq %%r12, 8(%%rdi)\n\t" "movq %%r13, 16(%%rdi)\n\t" "movq %%r14, 24(%%rdi)\n\t" "movq %%r15, 32(%%rdi)\n\t" "movq %%rbp, 40(%%rdi)\n\t" "movq %%rsp, 48(%%rdi)\n\t" // RIP-relative lea captures the resume point address "leaq 1f(%%rip), %%rax\n\t" "movq %%rax, 56(%%rdi)\n\t" // Restore new context -- once rsp is swapped we're on the other // task's stack. The jmp completes the switch. "movq 0(%%rsi), %%rbx\n\t" "movq 8(%%rsi), %%r12\n\t" "movq 16(%%rsi), %%r13\n\t" "movq 24(%%rsi), %%r14\n\t" "movq 32(%%rsi), %%r15\n\t" "movq 40(%%rsi), %%rbp\n\t" "movq 48(%%rsi), %%rsp\n\t" "movq 56(%%rsi), %%rax\n\t" "jmp *%%rax\n\t" // Resume point: when someone switches back to us, execution // continues here as if contextSwitch() just returned normally. "1:\n\t" : : "D" (save), "S" (restore) : "rax", "rcx", "rdx", "r8", "r9", "r10", "r11", "memory", "cc" ); } #else // i386: save ebx, esi, edi, ebp, esp, eip. // Inputs via GCC constraints: %eax = save ptr, %edx = restore ptr. static void __attribute__((noinline)) contextSwitch(TaskContextT *save, TaskContextT *restore) { __asm__ __volatile__( // Save current context "movl %%ebx, 0(%%eax)\n\t" "movl %%esi, 4(%%eax)\n\t" "movl %%edi, 8(%%eax)\n\t" "movl %%ebp, 12(%%eax)\n\t" "movl %%esp, 16(%%eax)\n\t" // i386 can't do RIP-relative lea, so use an absolute label address "movl $1f, 20(%%eax)\n\t" // Restore new context "movl 0(%%edx), %%ebx\n\t" "movl 4(%%edx), %%esi\n\t" "movl 8(%%edx), %%edi\n\t" "movl 12(%%edx), %%ebp\n\t" "movl 16(%%edx), %%esp\n\t" "movl 20(%%edx), %%eax\n\t" "jmp *%%eax\n\t" "1:\n\t" : : "a" (save), "d" (restore) : "ecx", "memory", "cc" ); } #endif // Find a free (terminated or unallocated) slot in the task array. // Returns the index, or -1 if no free slot exists. // Starts at 1 because slot 0 is always the main task and cannot be reused. // Linear scan is fine -- SHELL_MAX_APPS caps the practical limit at ~32 tasks. static int32_t findFreeSlot(void) { ptrdiff_t count = arrlen(tasks); for (ptrdiff_t i = 1; i < count; i++) { if (!tasks[i].allocated) { return (int32_t)i; } } return -1; } // Find the next task to run using credit-based weighted round-robin. // Each ready task holds (priority + 1) credits. One credit is consumed // per scheduling turn. When no ready task has credits left, every // ready task is refilled. This guarantees all tasks run while giving // higher-priority tasks proportionally more turns. // // Algorithm (variant of Linux 2.4's goodness() scheduler): // 1. Scan forward from currentIdx looking for a ready task with credits > 0 // 2. If found, decrement its credits and select it // 3. If no task has credits, refill ALL ready tasks (one "epoch") // 4. Scan again after refill // // The round-robin scan starts at (currentIdx + 1) and wraps, ensuring // fairness among tasks with equal priority -- no task gets picked twice // in a row unless it's the only ready task. // // If no ready tasks exist at all (everything paused/terminated), return // currentIdx so the caller stays on the current task (always task 0 in // practice, since task 0 is the shell's main loop and never pauses). static uint32_t scheduleNext(void) { uint32_t count = (uint32_t)arrlen(tasks); // First pass: look for a ready task with remaining credits for (uint32_t i = 1; i <= count; i++) { uint32_t idx = (currentIdx + i) % count; if (tasks[idx].allocated && tasks[idx].state == TaskStateReady && tasks[idx].credits > 0) { tasks[idx].credits--; return idx; } } // All credits exhausted -- start a new epoch by refilling every ready task bool anyReady = false; for (uint32_t i = 0; i < count; i++) { if (tasks[i].allocated && tasks[i].state == TaskStateReady) { tasks[i].credits = tasks[i].priority + 1; anyReady = true; } } if (!anyReady) { return currentIdx; } // Pick the first ready task after refill for (uint32_t i = 1; i <= count; i++) { uint32_t idx = (currentIdx + i) % count; if (tasks[idx].allocated && tasks[idx].state == TaskStateReady && tasks[idx].credits > 0) { tasks[idx].credits--; return idx; } } return currentIdx; } // Entry point for every new task. The first context switch into a new task // jumps here (via the EIP/RIP set up in tsCreate). This is a trampoline // rather than calling entry directly because we need to call tsExit() when // the entry function returns -- if we just set EIP to the entry function, // it would return to a garbage address (the dummy 0 on the stack). // The trampoline ensures clean task termination even if the app forgets // to call tsExit() explicitly. static void taskTrampoline(void) { TaskBlockT *task = &tasks[currentIdx]; task->entry(task->arg); tsExit(); } // ============================================================================ // Public API (alphabetical, main-equivalent functions last if applicable) // ============================================================================ uint32_t tsActiveCount(void) { if (!initialized) { return 0; } uint32_t active = 0; ptrdiff_t count = arrlen(tasks); for (ptrdiff_t i = 0; i < count; i++) { if (tasks[i].allocated && tasks[i].state != TaskStateTerminated) { active++; } } return active; } int32_t tsCreate(const char *name, TaskEntryT entry, void *arg, uint32_t stackSize, int32_t priority) { if (!initialized || !entry) { return TS_ERR_PARAM; } if (stackSize == 0) { stackSize = TS_DEFAULT_STACK_SIZE; } // Reuse a terminated/free slot, or append a new one. // Recycling avoids unbounded array growth when apps are repeatedly // launched and terminated over the lifetime of the shell. int32_t id = findFreeSlot(); if (id < 0) { TaskBlockT blank = {0}; arrput(tasks, blank); id = (int32_t)(arrlen(tasks) - 1); } TaskBlockT *task = &tasks[id]; memset(task, 0, sizeof(*task)); task->stack = (uint8_t *)malloc(stackSize); if (!task->stack) { return TS_ERR_NOMEM; } if (name) { strncpy(task->name, name, TS_NAME_MAX - 1); task->name[TS_NAME_MAX - 1] = '\0'; } task->stackSize = stackSize; task->state = TaskStateReady; task->priority = priority; task->credits = priority + 1; task->entry = entry; task->arg = arg; task->isMain = false; task->allocated = true; // Set up initial stack (grows downward, 16-byte aligned). // The ABI requires 16-byte stack alignment at function entry. We align // the top, then push a dummy return address (0) to simulate a CALL // instruction -- this keeps the stack aligned for the trampoline. // The dummy address is never used because taskTrampoline calls tsExit() // which switches away without returning, but it satisfies debuggers // and ABI checkers that expect a return address at the bottom of each frame. uintptr_t top = (uintptr_t)(task->stack + stackSize); top &= ~(uintptr_t)0xF; top -= sizeof(uintptr_t); *(uintptr_t *)top = 0; // dummy return address; trampoline never returns #if defined(__x86_64__) task->context.rsp = top; task->context.rbp = 0; task->context.rbx = 0; task->context.r12 = 0; task->context.r13 = 0; task->context.r14 = 0; task->context.r15 = 0; task->context.rip = (uintptr_t)taskTrampoline; #else task->context.esp = top; task->context.ebp = 0; task->context.ebx = 0; task->context.esi = 0; task->context.edi = 0; task->context.eip = (uintptr_t)taskTrampoline; #endif return id; } uint32_t tsCurrentId(void) { return currentIdx; } // Self-termination. Frees resources and switches to the next task. // This function never returns -- the terminated task's context is abandoned. // We save to tasks[prev].context even though we'll never restore it because // contextSwitch always writes to the save pointer; the data is harmless // and will be overwritten when the slot is recycled. void tsExit(void) { if (!initialized || tasks[currentIdx].isMain) { return; } tasks[currentIdx].state = TaskStateTerminated; // Free the stack immediately -- safe because we're about to switch // away and never return. The context switch itself doesn't touch // the old stack after swapping ESP/RSP. free(tasks[currentIdx].stack); tasks[currentIdx].stack = NULL; tasks[currentIdx].allocated = false; uint32_t next = scheduleNext(); uint32_t prev = currentIdx; currentIdx = next; tasks[next].state = TaskStateRunning; contextSwitch(&tasks[prev].context, &tasks[next].context); // Terminated task never resumes here } const char *tsGetName(uint32_t taskId) { if (!initialized || taskId >= (uint32_t)arrlen(tasks)) { return NULL; } if (!tasks[taskId].allocated) { return NULL; } return tasks[taskId].name; } int32_t tsGetPriority(uint32_t taskId) { if (!initialized || taskId >= (uint32_t)arrlen(tasks)) { return TS_ERR_PARAM; } if (!tasks[taskId].allocated) { return TS_ERR_PARAM; } return tasks[taskId].priority; } TaskStateE tsGetState(uint32_t taskId) { if (!initialized || taskId >= (uint32_t)arrlen(tasks)) { return TaskStateTerminated; } if (!tasks[taskId].allocated) { return TaskStateTerminated; } return tasks[taskId].state; } // Register the calling context as task 0 (main). No stack is allocated // because the main task uses the process stack. The main task's context // struct is filled in lazily by contextSwitch on the first tsYield() -- // until then, the saved EIP/ESP are zero, which is fine because we // never restore task 0 from a cold start. int32_t tsInit(void) { if (initialized) { return TS_ERR_PARAM; } // Start with the main task at slot 0 TaskBlockT main = {0}; strncpy(main.name, "main", TS_NAME_MAX - 1); main.state = TaskStateRunning; main.priority = TS_PRIORITY_NORMAL; main.credits = TS_PRIORITY_NORMAL + 1; main.isMain = true; main.stack = NULL; main.allocated = true; arrput(tasks, main); currentIdx = 0; initialized = true; return TS_OK; } // Forcibly terminate another task. This is safe in a cooperative system // because the target is guaranteed to be suspended at a yield point -- it // cannot be in the middle of a critical section. The stack is freed and // the slot is recycled immediately. // // Cannot kill self (use tsExit instead) -- killing self would free the // stack we're currently executing on. Cannot kill main (task 0) because // the shell's main loop must always be runnable for crash recovery. // // The shell uses this for two purposes: // 1. shellForceKillApp: "End Task" from the task manager // 2. Crash recovery: after a signal handler longjmps to main, the // crashed task's slot is cleaned up via tsKill int32_t tsKill(uint32_t taskId) { if (!initialized || taskId >= (uint32_t)arrlen(tasks)) { return TS_ERR_PARAM; } if (!tasks[taskId].allocated) { return TS_ERR_PARAM; } if (tasks[taskId].isMain) { return TS_ERR_STATE; } if (taskId == currentIdx) { return TS_ERR_STATE; } if (tasks[taskId].state == TaskStateTerminated) { return TS_ERR_STATE; } tasks[taskId].state = TaskStateTerminated; free(tasks[taskId].stack); tasks[taskId].stack = NULL; tasks[taskId].allocated = false; return TS_OK; } int32_t tsPause(uint32_t taskId) { if (!initialized || taskId >= (uint32_t)arrlen(tasks)) { return TS_ERR_PARAM; } if (!tasks[taskId].allocated) { return TS_ERR_PARAM; } if (tasks[taskId].isMain) { return TS_ERR_STATE; } if (tasks[taskId].state != TaskStateRunning && tasks[taskId].state != TaskStateReady) { return TS_ERR_STATE; } tasks[taskId].state = TaskStatePaused; // If we paused ourselves, must yield immediately -- a paused task // won't be selected by scheduleNext, so staying on CPU would deadlock. // If pausing another task, no yield needed; it will simply be skipped // the next time the scheduler scans. if (taskId == currentIdx) { uint32_t next = scheduleNext(); if (next != currentIdx) { uint32_t prev = currentIdx; currentIdx = next; tasks[next].state = TaskStateRunning; contextSwitch(&tasks[prev].context, &tasks[next].context); } } return TS_OK; } // Emergency recovery after a crash in an app task. When a signal handler // fires (e.g., SIGSEGV), DJGPP's signal dispatch saves the exception // state and calls our handler. The handler does longjmp back to the // shell's setjmp point in main(), which restores the main task's stack. // However, the task switcher's currentIdx still points to the crashed // app task. This function fixes the bookkeeping so the scheduler treats // task 0 as the running task again. // // The crashed task's slot is NOT freed here -- its stack is corrupt and // the caller (shellMain's crash recovery) must call shellForceKillApp // to clean it up properly (destroying windows, closing DXE, etc.). void tsRecoverToMain(void) { if (!initialized) { return; } currentIdx = 0; tasks[0].state = TaskStateRunning; } int32_t tsResume(uint32_t taskId) { if (!initialized || taskId >= (uint32_t)arrlen(tasks)) { return TS_ERR_PARAM; } if (!tasks[taskId].allocated) { return TS_ERR_PARAM; } if (tasks[taskId].state != TaskStatePaused) { return TS_ERR_STATE; } // Transition from Paused back to Ready and refill credits immediately. // Without the refill, a resumed task might have 0 credits and would have // to wait for the next epoch to run, making resume feel sluggish. tasks[taskId].state = TaskStateReady; tasks[taskId].credits = tasks[taskId].priority + 1; return TS_OK; } int32_t tsSetPriority(uint32_t taskId, int32_t priority) { if (!initialized || taskId >= (uint32_t)arrlen(tasks)) { return TS_ERR_PARAM; } if (!tasks[taskId].allocated) { return TS_ERR_PARAM; } if (tasks[taskId].state == TaskStateTerminated) { return TS_ERR_STATE; } tasks[taskId].priority = priority; tasks[taskId].credits = priority + 1; return TS_OK; } void tsShutdown(void) { if (!initialized) { return; } ptrdiff_t count = arrlen(tasks); for (ptrdiff_t i = 0; i < count; i++) { free(tasks[i].stack); } arrfree(tasks); tasks = NULL; currentIdx = 0; initialized = false; } // The core cooperative yield. Called explicitly by app code (or implicitly // via the shell's idle callback and main loop). If no other task is ready, // returns immediately -- no context switch overhead when running solo. // // The state transition: current task moves Running -> Ready (still // schedulable), next task moves Ready -> Running. The previous task will // resume here when someone else yields and the scheduler picks it again. void tsYield(void) { if (!initialized) { return; } uint32_t next = scheduleNext(); if (next == currentIdx) { return; } uint32_t prev = currentIdx; // Only transition to Ready if still Running -- a task that paused itself // will already be in Paused state when tsYield is called from tsPause. if (tasks[prev].state == TaskStateRunning) { tasks[prev].state = TaskStateReady; } currentIdx = next; tasks[next].state = TaskStateRunning; contextSwitch(&tasks[prev].context, &tasks[next].context); }