DVX_GUI/tasks/taskswitch.c

673 lines
23 KiB
C

// taskswitch.c -- Cooperative task switching library for DJGPP
//
// Uses inline assembly for context switching (i386 and x86_64). The
// scheduler uses credit-based weighted round-robin so all tasks run,
// but higher-priority tasks run proportionally more often.
//
// Task storage is a stb_ds dynamic array that grows as needed.
// Terminated task slots are recycled by tsCreate().
//
// Why inline asm instead of setjmp/longjmp for context switching:
// setjmp/longjmp only save callee-saved registers and don't give us
// control over the stack pointer in a portable way. We need to set up
// a brand-new stack for each task and jump into a trampoline -- setjmp
// can't bootstrap a fresh stack. The asm approach also avoids ABI
// differences in jmp_buf layout across DJGPP versions.
//
// Why stb_ds dynamic array instead of a linked list:
// Tasks are indexed by integer ID for O(1) lookup (tsGetState, tsKill,
// etc.). A linked list would require O(n) traversal for every ID-based
// operation. The array also has better cache locality during the
// scheduler's linear scan. The downside (holes after termination) is
// mitigated by slot recycling in findFreeSlot().
#define STB_DS_IMPLEMENTATION
#include "thirdparty/stb_ds.h"
#include "taskswitch.h"
#include <stdlib.h>
#include <string.h>
// ============================================================================
// Internal types
// ============================================================================
// Only callee-saved registers need to be in the context struct because
// the C calling convention guarantees the caller has already saved
// everything else. The compiler treats contextSwitch() as a normal
// function call, so caller-saved regs (eax/ecx/edx on i386,
// rax/rcx/rdx/r8-r11 on x86_64) are spilled by the compiler before
// the call. This minimizes context size and switch overhead.
//
// Field order is critical: the asm uses hardcoded byte offsets into
// this struct. Reordering fields will silently corrupt context switches.
#if defined(__x86_64__)
// Saved CPU context for x86_64 (field order matches asm byte offsets)
typedef struct {
uintptr_t rbx; // offset 0
uintptr_t r12; // offset 8
uintptr_t r13; // offset 16
uintptr_t r14; // offset 24
uintptr_t r15; // offset 32
uintptr_t rbp; // offset 40
uintptr_t rsp; // offset 48
uintptr_t rip; // offset 56
} TaskContextT;
#else
// Saved CPU context for i386 (field order matches asm byte offsets)
typedef struct {
uintptr_t ebx; // offset 0
uintptr_t esi; // offset 4
uintptr_t edi; // offset 8
uintptr_t ebp; // offset 12
uintptr_t esp; // offset 16
uintptr_t eip; // offset 20
} TaskContextT;
#endif
// Task control block -- one per task slot. The 'allocated' flag tracks
// whether the slot is live or recyclable, separate from the state enum,
// because we need to distinguish "never used" from "terminated and reaped".
// The 'isMain' flag protects task 0 from kill/pause -- destroying the
// main task would orphan all other tasks with no scheduler to resume them.
typedef struct {
char name[TS_NAME_MAX];
TaskContextT context;
uint8_t *stack;
uint32_t stackSize;
TaskStateE state;
int32_t priority;
int32_t credits;
TaskEntryT entry;
void *arg;
bool isMain;
bool allocated; // true if slot is in use, false if free for reuse
} TaskBlockT;
// ============================================================================
// Module state
// ============================================================================
// stb_ds dynamic array of task control blocks. Slot 0 is always the main
// task. Slots 1..N are app tasks. Terminated slots have allocated=false
// and are reused by findFreeSlot() to prevent unbounded growth.
static TaskBlockT *tasks = NULL;
// Index of the currently executing task. Updated only by tsYield, tsExit,
// tsPause (self-pause), and tsRecoverToMain.
static uint32_t currentIdx = 0;
static bool initialized = false;
// ============================================================================
// Forward declarations
// ============================================================================
// Static helpers
static void contextSwitch(TaskContextT *save, TaskContextT *restore);
static int32_t findFreeSlot(void);
static uint32_t scheduleNext(void);
static void taskTrampoline(void);
// Public API prototypes are provided by taskswitch.h via #include.
// Explicit prototypes repeated here per project convention:
uint32_t tsActiveCount(void);
int32_t tsCreate(const char *name, TaskEntryT entry, void *arg, uint32_t stackSize, int32_t priority);
uint32_t tsCurrentId(void);
void tsExit(void);
const char *tsGetName(uint32_t taskId);
int32_t tsGetPriority(uint32_t taskId);
TaskStateE tsGetState(uint32_t taskId);
int32_t tsInit(void);
int32_t tsKill(uint32_t taskId);
int32_t tsPause(uint32_t taskId);
void tsRecoverToMain(void);
int32_t tsResume(uint32_t taskId);
int32_t tsSetPriority(uint32_t taskId, int32_t priority);
void tsShutdown(void);
void tsYield(void);
// ============================================================================
// Static functions (alphabetical)
// ============================================================================
// Switch execution from the current task to another by saving and restoring
// callee-saved registers and the stack pointer. The return address is
// captured as a local label so that when another task switches back to us,
// execution resumes right after the save point.
//
// The mechanism:
// 1. Save all callee-saved regs + esp/rsp into *save
// 2. Capture the address of local label "1:" as the saved EIP/RIP
// 3. Load all regs + esp/rsp from *restore
// 4. Jump to the restored EIP/RIP (which is label "1:" in the other task)
//
// For a newly created task, the restored EIP points to taskTrampoline
// (set up in tsCreate) rather than label "1:", so the first switch into
// a task bootstraps it into its entry function.
//
// noinline is critical: if the compiler inlines this, the callee-saved
// register assumptions break because the enclosing function may use
// different register allocation. The asm clobber list tells GCC which
// registers we destroy so it spills them before the call.
//
// The "memory" clobber acts as a compiler fence, ensuring all memory
// writes are flushed before the switch and re-read after resumption.
#if defined(__x86_64__)
// x86_64: save rbx, r12-r15, rbp, rsp, rip.
// Inputs via GCC constraints: %rdi = save ptr, %rsi = restore ptr.
static void __attribute__((noinline)) contextSwitch(TaskContextT *save, TaskContextT *restore) {
__asm__ __volatile__(
// Save current context
"movq %%rbx, 0(%%rdi)\n\t"
"movq %%r12, 8(%%rdi)\n\t"
"movq %%r13, 16(%%rdi)\n\t"
"movq %%r14, 24(%%rdi)\n\t"
"movq %%r15, 32(%%rdi)\n\t"
"movq %%rbp, 40(%%rdi)\n\t"
"movq %%rsp, 48(%%rdi)\n\t"
// RIP-relative lea captures the resume point address
"leaq 1f(%%rip), %%rax\n\t"
"movq %%rax, 56(%%rdi)\n\t"
// Restore new context -- once rsp is swapped we're on the other
// task's stack. The jmp completes the switch.
"movq 0(%%rsi), %%rbx\n\t"
"movq 8(%%rsi), %%r12\n\t"
"movq 16(%%rsi), %%r13\n\t"
"movq 24(%%rsi), %%r14\n\t"
"movq 32(%%rsi), %%r15\n\t"
"movq 40(%%rsi), %%rbp\n\t"
"movq 48(%%rsi), %%rsp\n\t"
"movq 56(%%rsi), %%rax\n\t"
"jmp *%%rax\n\t"
// Resume point: when someone switches back to us, execution
// continues here as if contextSwitch() just returned normally.
"1:\n\t"
:
: "D" (save), "S" (restore)
: "rax", "rcx", "rdx", "r8", "r9", "r10", "r11", "memory", "cc"
);
}
#else
// i386: save ebx, esi, edi, ebp, esp, eip.
// Inputs via GCC constraints: %eax = save ptr, %edx = restore ptr.
static void __attribute__((noinline)) contextSwitch(TaskContextT *save, TaskContextT *restore) {
__asm__ __volatile__(
// Save current context
"movl %%ebx, 0(%%eax)\n\t"
"movl %%esi, 4(%%eax)\n\t"
"movl %%edi, 8(%%eax)\n\t"
"movl %%ebp, 12(%%eax)\n\t"
"movl %%esp, 16(%%eax)\n\t"
// i386 can't do RIP-relative lea, so use an absolute label address
"movl $1f, 20(%%eax)\n\t"
// Restore new context
"movl 0(%%edx), %%ebx\n\t"
"movl 4(%%edx), %%esi\n\t"
"movl 8(%%edx), %%edi\n\t"
"movl 12(%%edx), %%ebp\n\t"
"movl 16(%%edx), %%esp\n\t"
"movl 20(%%edx), %%eax\n\t"
"jmp *%%eax\n\t"
"1:\n\t"
:
: "a" (save), "d" (restore)
: "ecx", "memory", "cc"
);
}
#endif
// Find a free (terminated or unallocated) slot in the task array.
// Returns the index, or -1 if no free slot exists.
// Starts at 1 because slot 0 is always the main task and cannot be reused.
// Linear scan is fine -- SHELL_MAX_APPS caps the practical limit at ~32 tasks.
static int32_t findFreeSlot(void) {
ptrdiff_t count = arrlen(tasks);
for (ptrdiff_t i = 1; i < count; i++) {
if (!tasks[i].allocated) {
return (int32_t)i;
}
}
return -1;
}
// Find the next task to run using credit-based weighted round-robin.
// Each ready task holds (priority + 1) credits. One credit is consumed
// per scheduling turn. When no ready task has credits left, every
// ready task is refilled. This guarantees all tasks run while giving
// higher-priority tasks proportionally more turns.
//
// Algorithm (variant of Linux 2.4's goodness() scheduler):
// 1. Scan forward from currentIdx looking for a ready task with credits > 0
// 2. If found, decrement its credits and select it
// 3. If no task has credits, refill ALL ready tasks (one "epoch")
// 4. Scan again after refill
//
// The round-robin scan starts at (currentIdx + 1) and wraps, ensuring
// fairness among tasks with equal priority -- no task gets picked twice
// in a row unless it's the only ready task.
//
// If no ready tasks exist at all (everything paused/terminated), return
// currentIdx so the caller stays on the current task (always task 0 in
// practice, since task 0 is the shell's main loop and never pauses).
static uint32_t scheduleNext(void) {
uint32_t count = (uint32_t)arrlen(tasks);
// First pass: look for a ready task with remaining credits
for (uint32_t i = 1; i <= count; i++) {
uint32_t idx = (currentIdx + i) % count;
if (tasks[idx].allocated && tasks[idx].state == TaskStateReady && tasks[idx].credits > 0) {
tasks[idx].credits--;
return idx;
}
}
// All credits exhausted -- start a new epoch by refilling every ready task
bool anyReady = false;
for (uint32_t i = 0; i < count; i++) {
if (tasks[i].allocated && tasks[i].state == TaskStateReady) {
tasks[i].credits = tasks[i].priority + 1;
anyReady = true;
}
}
if (!anyReady) {
return currentIdx;
}
// Pick the first ready task after refill
for (uint32_t i = 1; i <= count; i++) {
uint32_t idx = (currentIdx + i) % count;
if (tasks[idx].allocated && tasks[idx].state == TaskStateReady && tasks[idx].credits > 0) {
tasks[idx].credits--;
return idx;
}
}
return currentIdx;
}
// Entry point for every new task. The first context switch into a new task
// jumps here (via the EIP/RIP set up in tsCreate). This is a trampoline
// rather than calling entry directly because we need to call tsExit() when
// the entry function returns -- if we just set EIP to the entry function,
// it would return to a garbage address (the dummy 0 on the stack).
// The trampoline ensures clean task termination even if the app forgets
// to call tsExit() explicitly.
static void taskTrampoline(void) {
TaskBlockT *task = &tasks[currentIdx];
task->entry(task->arg);
tsExit();
}
// ============================================================================
// Public API (alphabetical, main-equivalent functions last if applicable)
// ============================================================================
uint32_t tsActiveCount(void) {
if (!initialized) {
return 0;
}
uint32_t active = 0;
ptrdiff_t count = arrlen(tasks);
for (ptrdiff_t i = 0; i < count; i++) {
if (tasks[i].allocated && tasks[i].state != TaskStateTerminated) {
active++;
}
}
return active;
}
int32_t tsCreate(const char *name, TaskEntryT entry, void *arg, uint32_t stackSize, int32_t priority) {
if (!initialized || !entry) {
return TS_ERR_PARAM;
}
if (stackSize == 0) {
stackSize = TS_DEFAULT_STACK_SIZE;
}
// Reuse a terminated/free slot, or append a new one.
// Recycling avoids unbounded array growth when apps are repeatedly
// launched and terminated over the lifetime of the shell.
int32_t id = findFreeSlot();
if (id < 0) {
TaskBlockT blank = {0};
arrput(tasks, blank);
id = (int32_t)(arrlen(tasks) - 1);
}
TaskBlockT *task = &tasks[id];
memset(task, 0, sizeof(*task));
task->stack = (uint8_t *)malloc(stackSize);
if (!task->stack) {
return TS_ERR_NOMEM;
}
if (name) {
strncpy(task->name, name, TS_NAME_MAX - 1);
task->name[TS_NAME_MAX - 1] = '\0';
}
task->stackSize = stackSize;
task->state = TaskStateReady;
task->priority = priority;
task->credits = priority + 1;
task->entry = entry;
task->arg = arg;
task->isMain = false;
task->allocated = true;
// Set up initial stack (grows downward, 16-byte aligned).
// The ABI requires 16-byte stack alignment at function entry. We align
// the top, then push a dummy return address (0) to simulate a CALL
// instruction -- this keeps the stack aligned for the trampoline.
// The dummy address is never used because taskTrampoline calls tsExit()
// which switches away without returning, but it satisfies debuggers
// and ABI checkers that expect a return address at the bottom of each frame.
uintptr_t top = (uintptr_t)(task->stack + stackSize);
top &= ~(uintptr_t)0xF;
top -= sizeof(uintptr_t);
*(uintptr_t *)top = 0; // dummy return address; trampoline never returns
#if defined(__x86_64__)
task->context.rsp = top;
task->context.rbp = 0;
task->context.rbx = 0;
task->context.r12 = 0;
task->context.r13 = 0;
task->context.r14 = 0;
task->context.r15 = 0;
task->context.rip = (uintptr_t)taskTrampoline;
#else
task->context.esp = top;
task->context.ebp = 0;
task->context.ebx = 0;
task->context.esi = 0;
task->context.edi = 0;
task->context.eip = (uintptr_t)taskTrampoline;
#endif
return id;
}
uint32_t tsCurrentId(void) {
return currentIdx;
}
// Self-termination. Frees resources and switches to the next task.
// This function never returns -- the terminated task's context is abandoned.
// We save to tasks[prev].context even though we'll never restore it because
// contextSwitch always writes to the save pointer; the data is harmless
// and will be overwritten when the slot is recycled.
void tsExit(void) {
if (!initialized || tasks[currentIdx].isMain) {
return;
}
tasks[currentIdx].state = TaskStateTerminated;
// Free the stack immediately -- safe because we're about to switch
// away and never return. The context switch itself doesn't touch
// the old stack after swapping ESP/RSP.
free(tasks[currentIdx].stack);
tasks[currentIdx].stack = NULL;
tasks[currentIdx].allocated = false;
uint32_t next = scheduleNext();
uint32_t prev = currentIdx;
currentIdx = next;
tasks[next].state = TaskStateRunning;
contextSwitch(&tasks[prev].context, &tasks[next].context);
// Terminated task never resumes here
}
const char *tsGetName(uint32_t taskId) {
if (!initialized || taskId >= (uint32_t)arrlen(tasks)) {
return NULL;
}
if (!tasks[taskId].allocated) {
return NULL;
}
return tasks[taskId].name;
}
int32_t tsGetPriority(uint32_t taskId) {
if (!initialized || taskId >= (uint32_t)arrlen(tasks)) {
return TS_ERR_PARAM;
}
if (!tasks[taskId].allocated) {
return TS_ERR_PARAM;
}
return tasks[taskId].priority;
}
TaskStateE tsGetState(uint32_t taskId) {
if (!initialized || taskId >= (uint32_t)arrlen(tasks)) {
return TaskStateTerminated;
}
if (!tasks[taskId].allocated) {
return TaskStateTerminated;
}
return tasks[taskId].state;
}
// Register the calling context as task 0 (main). No stack is allocated
// because the main task uses the process stack. The main task's context
// struct is filled in lazily by contextSwitch on the first tsYield() --
// until then, the saved EIP/ESP are zero, which is fine because we
// never restore task 0 from a cold start.
int32_t tsInit(void) {
if (initialized) {
return TS_ERR_PARAM;
}
// Start with the main task at slot 0
TaskBlockT main = {0};
strncpy(main.name, "main", TS_NAME_MAX - 1);
main.state = TaskStateRunning;
main.priority = TS_PRIORITY_NORMAL;
main.credits = TS_PRIORITY_NORMAL + 1;
main.isMain = true;
main.stack = NULL;
main.allocated = true;
arrput(tasks, main);
currentIdx = 0;
initialized = true;
return TS_OK;
}
// Forcibly terminate another task. This is safe in a cooperative system
// because the target is guaranteed to be suspended at a yield point -- it
// cannot be in the middle of a critical section. The stack is freed and
// the slot is recycled immediately.
//
// Cannot kill self (use tsExit instead) -- killing self would free the
// stack we're currently executing on. Cannot kill main (task 0) because
// the shell's main loop must always be runnable for crash recovery.
//
// The shell uses this for two purposes:
// 1. shellForceKillApp: "End Task" from the task manager
// 2. Crash recovery: after a signal handler longjmps to main, the
// crashed task's slot is cleaned up via tsKill
int32_t tsKill(uint32_t taskId) {
if (!initialized || taskId >= (uint32_t)arrlen(tasks)) {
return TS_ERR_PARAM;
}
if (!tasks[taskId].allocated) {
return TS_ERR_PARAM;
}
if (tasks[taskId].isMain) {
return TS_ERR_STATE;
}
if (taskId == currentIdx) {
return TS_ERR_STATE;
}
if (tasks[taskId].state == TaskStateTerminated) {
return TS_ERR_STATE;
}
tasks[taskId].state = TaskStateTerminated;
free(tasks[taskId].stack);
tasks[taskId].stack = NULL;
tasks[taskId].allocated = false;
return TS_OK;
}
int32_t tsPause(uint32_t taskId) {
if (!initialized || taskId >= (uint32_t)arrlen(tasks)) {
return TS_ERR_PARAM;
}
if (!tasks[taskId].allocated) {
return TS_ERR_PARAM;
}
if (tasks[taskId].isMain) {
return TS_ERR_STATE;
}
if (tasks[taskId].state != TaskStateRunning && tasks[taskId].state != TaskStateReady) {
return TS_ERR_STATE;
}
tasks[taskId].state = TaskStatePaused;
// If we paused ourselves, must yield immediately -- a paused task
// won't be selected by scheduleNext, so staying on CPU would deadlock.
// If pausing another task, no yield needed; it will simply be skipped
// the next time the scheduler scans.
if (taskId == currentIdx) {
uint32_t next = scheduleNext();
if (next != currentIdx) {
uint32_t prev = currentIdx;
currentIdx = next;
tasks[next].state = TaskStateRunning;
contextSwitch(&tasks[prev].context, &tasks[next].context);
}
}
return TS_OK;
}
// Emergency recovery after a crash in an app task. When a signal handler
// fires (e.g., SIGSEGV), DJGPP's signal dispatch saves the exception
// state and calls our handler. The handler does longjmp back to the
// shell's setjmp point in main(), which restores the main task's stack.
// However, the task switcher's currentIdx still points to the crashed
// app task. This function fixes the bookkeeping so the scheduler treats
// task 0 as the running task again.
//
// The crashed task's slot is NOT freed here -- its stack is corrupt and
// the caller (shellMain's crash recovery) must call shellForceKillApp
// to clean it up properly (destroying windows, closing DXE, etc.).
void tsRecoverToMain(void) {
if (!initialized) {
return;
}
currentIdx = 0;
tasks[0].state = TaskStateRunning;
}
int32_t tsResume(uint32_t taskId) {
if (!initialized || taskId >= (uint32_t)arrlen(tasks)) {
return TS_ERR_PARAM;
}
if (!tasks[taskId].allocated) {
return TS_ERR_PARAM;
}
if (tasks[taskId].state != TaskStatePaused) {
return TS_ERR_STATE;
}
// Transition from Paused back to Ready and refill credits immediately.
// Without the refill, a resumed task might have 0 credits and would have
// to wait for the next epoch to run, making resume feel sluggish.
tasks[taskId].state = TaskStateReady;
tasks[taskId].credits = tasks[taskId].priority + 1;
return TS_OK;
}
int32_t tsSetPriority(uint32_t taskId, int32_t priority) {
if (!initialized || taskId >= (uint32_t)arrlen(tasks)) {
return TS_ERR_PARAM;
}
if (!tasks[taskId].allocated) {
return TS_ERR_PARAM;
}
if (tasks[taskId].state == TaskStateTerminated) {
return TS_ERR_STATE;
}
tasks[taskId].priority = priority;
tasks[taskId].credits = priority + 1;
return TS_OK;
}
void tsShutdown(void) {
if (!initialized) {
return;
}
ptrdiff_t count = arrlen(tasks);
for (ptrdiff_t i = 0; i < count; i++) {
free(tasks[i].stack);
}
arrfree(tasks);
tasks = NULL;
currentIdx = 0;
initialized = false;
}
// The core cooperative yield. Called explicitly by app code (or implicitly
// via the shell's idle callback and main loop). If no other task is ready,
// returns immediately -- no context switch overhead when running solo.
//
// The state transition: current task moves Running -> Ready (still
// schedulable), next task moves Ready -> Running. The previous task will
// resume here when someone else yields and the scheduler picks it again.
void tsYield(void) {
if (!initialized) {
return;
}
uint32_t next = scheduleNext();
if (next == currentIdx) {
return;
}
uint32_t prev = currentIdx;
// Only transition to Ready if still Running -- a task that paused itself
// will already be in Paused state when tsYield is called from tsPause.
if (tasks[prev].state == TaskStateRunning) {
tasks[prev].state = TaskStateReady;
}
currentIdx = next;
tasks[next].state = TaskStateRunning;
contextSwitch(&tasks[prev].context, &tasks[next].context);
}