DVX_GUI/dvx/platform/dvxPlatformDos.c

1666 lines
54 KiB
C

// dvxPlatformDos.c — DOS/DJGPP platform implementation for DVX GUI
//
// All BIOS calls, DPMI functions, port I/O, inline assembly, and
// DOS-specific file handling are isolated in this single file.
//
// This file is the ONLY place where DJGPP headers (dpmi.h, go32.h,
// sys/nearptr.h, etc.) appear. Every other DVX module calls through
// the dvxPlatform.h interface, so porting to a new OS (Linux/SDL,
// Win32, or bare-metal ARM) requires replacing only this file and
// nothing else. The abstraction covers five domains:
// 1. VESA VBE video init / mode set / LFB mapping
// 2. Backbuffer-to-LFB flush using rep movsl
// 3. Span fill/copy primitives using inline asm (rep stosl / rep movsl)
// 4. Mouse input via INT 33h driver
// 5. Keyboard input via BIOS INT 16h
//
// Why BIOS INT 16h for keyboard instead of direct port I/O (scancode
// reading from port 0x60): BIOS handles typematic repeat, keyboard
// translation tables, and extended key decoding. Direct port I/O would
// require reimplementing all of that, and the DPMI host already hooks
// IRQ1 to feed the BIOS buffer. The BIOS approach is simpler and more
// portable across emulators (DOSBox, 86Box, PCem all handle it correctly).
//
// Why INT 33h for mouse: same rationale — the mouse driver handles
// PS/2 and serial mice transparently, and every DOS emulator provides
// a compatible driver. Polling via function 03h avoids the complexity
// of installing a real-mode callback for mouse events.
#include "dvxPlatform.h"
#include "../dvxPalette.h"
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <signal.h>
// DJGPP-specific headers — this is the ONLY file that includes these
#include <dpmi.h>
#include <go32.h>
#include <pc.h>
#include <sys/nearptr.h>
#include <sys/farptr.h>
// ============================================================
// Prototypes
// ============================================================
static uint32_t estimateClockMhz(void);
static int32_t findBestMode(int32_t requestedW, int32_t requestedH, int32_t preferredBpp, uint16_t *outMode, DisplayT *d);
static void getModeInfo(uint16_t mode, DisplayT *d, int32_t *score, int32_t requestedW, int32_t requestedH, int32_t preferredBpp);
static bool hasCpuid(void);
static int32_t mapLfb(DisplayT *d, uint32_t physAddr);
void platformVideoEnumModes(void (*cb)(int32_t w, int32_t h, int32_t bpp, void *userData), void *userData);
static int32_t setVesaMode(uint16_t mode);
static void sysInfoAppend(const char *fmt, ...);
// ============================================================
// Module state
// ============================================================
// Wheel state: set by platformMouseWheelInit, read by platformMousePoll
// and platformGetSystemInfo. Declared here (above all functions) so
// every function in the file can see them.
static bool sHasMouseWheel = false;
static int32_t sLastWheelDelta = 0;
// Alt+key scan code to ASCII lookup table (indexed by BIOS scan code).
// INT 16h returns these scan codes with ascii=0 for Alt+key combos.
// Using a 256-byte lookup table instead of a switch or if-chain because
// this is called on every keypress and the table fits in a single cache
// line cluster. The designated initializer syntax leaves all other
// entries as zero, which is the "no mapping" sentinel.
static const char sAltScanToAscii[256] = {
// Alt+letters
[0x10] = 'q', [0x11] = 'w', [0x12] = 'e', [0x13] = 'r',
[0x14] = 't', [0x15] = 'y', [0x16] = 'u', [0x17] = 'i',
[0x18] = 'o', [0x19] = 'p', [0x1E] = 'a', [0x1F] = 's',
[0x20] = 'd', [0x21] = 'f', [0x22] = 'g', [0x23] = 'h',
[0x24] = 'j', [0x25] = 'k', [0x26] = 'l', [0x2C] = 'z',
[0x2D] = 'x', [0x2E] = 'c', [0x2F] = 'v', [0x30] = 'b',
[0x31] = 'n', [0x32] = 'm',
// Alt+digits
[0x78] = '1', [0x79] = '2', [0x7A] = '3', [0x7B] = '4',
[0x7C] = '5', [0x7D] = '6', [0x7E] = '7', [0x7F] = '8',
[0x80] = '9', [0x81] = '0',
};
// ============================================================
// findBestMode
// ============================================================
//
// Enumerates all VESA VBE modes and selects the best match for the
// requested resolution and color depth using a scoring algorithm.
//
// The approach is: call VBE function 0x4F00 to get the controller
// info block (which contains a pointer to the mode list), then call
// VBE function 0x4F01 for each mode to get its attributes. Each mode
// is scored by getModeInfo() and the highest-scoring mode wins.
//
// Why scoring instead of exact-match: real VESA BIOSes vary wildly in
// what modes they expose. Some have 640x480x16 but not x32; some only
// have 800x600. The scoring heuristic picks the closest usable mode
// rather than failing outright if the exact requested mode is absent.
//
// All VBE info block reads use DJGPP's far pointer API (_farpeekb/w/l)
// to access the DPMI transfer buffer (__tb), which lives in the first
// 1MB of address space (conventional memory). VBE BIOS calls use
// real-mode interrupts via __dpmi_int(), so all data must pass through
// the transfer buffer.
static int32_t findBestMode(int32_t requestedW, int32_t requestedH, int32_t preferredBpp, uint16_t *outMode, DisplayT *d) {
__dpmi_regs r;
uint16_t bestMode = 0;
int32_t bestScore = -1;
DisplayT bestDisplay;
memset(&bestDisplay, 0, sizeof(bestDisplay));
// Get VBE controller info — the transfer buffer (__tb) is the DJGPP-
// provided region in conventional memory that real-mode BIOS calls
// can read/write. We split it into seg:off for the INT 10h call.
uint32_t infoSeg = __tb >> 4;
uint32_t infoOff = __tb & 0x0F;
// Writing "VBE2" tells the BIOS we want VBE 2.0+ extended info.
// Without this, we'd get VBE 1.x info which lacks LFB addresses.
_farpokeb(_dos_ds, __tb + 0, 'V');
_farpokeb(_dos_ds, __tb + 1, 'B');
_farpokeb(_dos_ds, __tb + 2, 'E');
_farpokeb(_dos_ds, __tb + 3, '2');
memset(&r, 0, sizeof(r));
r.x.ax = 0x4F00;
r.x.es = infoSeg;
r.x.di = infoOff;
__dpmi_int(0x10, &r);
// VBE functions return 0x004F in AX on success. Any other value
// means the function failed or isn't supported.
if (r.x.ax != 0x004F) {
fprintf(stderr, "VBE: Function 0x4F00 failed (AX=0x%04X)\n", r.x.ax);
return -1;
}
// On success the BIOS overwrites "VBE2" with "VESA" in the buffer
char sig[5];
for (int32_t i = 0; i < 4; i++) {
sig[i] = _farpeekb(_dos_ds, __tb + i);
}
sig[4] = '\0';
if (strcmp(sig, "VESA") != 0) {
fprintf(stderr, "VBE: Bad signature '%s'\n", sig);
return -1;
}
// VBE 2.0+ is required for LFB (Linear Frame Buffer) support.
// VBE 1.x only supports bank switching, which we explicitly don't
// implement — the complexity isn't worth it for 486+ targets.
uint16_t vbeVersion = _farpeekw(_dos_ds, __tb + 4);
if (vbeVersion < 0x0200) {
fprintf(stderr, "VBE: Version %d.%d too old (need 2.0+)\n",
vbeVersion >> 8, vbeVersion & 0xFF);
return -1;
}
// The mode list is a far pointer (seg:off) at offset 14 in the info
// block. It points to a null-terminated (0xFFFF) array of mode numbers
// in conventional memory.
uint16_t modeListOff = _farpeekw(_dos_ds, __tb + 14);
uint16_t modeListSeg = _farpeekw(_dos_ds, __tb + 16);
uint32_t modeListAddr = ((uint32_t)modeListSeg << 4) + modeListOff;
// Walk the mode list. Cap at 256 to prevent runaway on corrupt BIOS
// data (real hardware rarely has more than ~50 modes).
for (int32_t i = 0; i < 256; i++) {
uint16_t mode = _farpeekw(_dos_ds, modeListAddr + i * 2);
if (mode == 0xFFFF) {
break;
}
DisplayT candidate;
int32_t score = 0;
memset(&candidate, 0, sizeof(candidate));
getModeInfo(mode, &candidate, &score, requestedW, requestedH, preferredBpp);
if (score > bestScore) {
bestScore = score;
bestMode = mode;
bestDisplay = candidate;
}
}
if (bestScore < 0) {
fprintf(stderr, "VBE: No suitable mode found for %ldx%ld\n", (long)requestedW, (long)requestedH);
return -1;
}
*outMode = bestMode;
*d = bestDisplay;
return 0;
}
// ============================================================
// platformVideoEnumModes
// ============================================================
void platformVideoEnumModes(void (*cb)(int32_t w, int32_t h, int32_t bpp, void *userData), void *userData) {
__dpmi_regs r;
memset(&r, 0, sizeof(r));
r.x.ax = 0x4F00;
r.x.es = __tb >> 4;
r.x.di = __tb & 0x0F;
// Write "VBE2" signature to request VBE 2.0+ info
_farpokeb(_dos_ds, __tb + 0, 'V');
_farpokeb(_dos_ds, __tb + 1, 'B');
_farpokeb(_dos_ds, __tb + 2, 'E');
_farpokeb(_dos_ds, __tb + 3, '2');
__dpmi_int(0x10, &r);
if (r.x.ax != 0x004F) {
return;
}
uint16_t modeListOff = _farpeekw(_dos_ds, __tb + 14);
uint16_t modeListSeg = _farpeekw(_dos_ds, __tb + 16);
uint32_t modeListAddr = ((uint32_t)modeListSeg << 4) + modeListOff;
for (int32_t i = 0; i < 256; i++) {
uint16_t mode = _farpeekw(_dos_ds, modeListAddr + i * 2);
if (mode == 0xFFFF) {
break;
}
memset(&r, 0, sizeof(r));
r.x.ax = 0x4F01;
r.x.cx = mode;
r.x.es = __tb >> 4;
r.x.di = __tb & 0x0F;
__dpmi_int(0x10, &r);
if (r.x.ax != 0x004F) {
continue;
}
uint16_t attr = _farpeekw(_dos_ds, __tb + 0);
// Only report LFB-capable graphics modes
if (!(attr & 0x0080) || !(attr & 0x0010)) {
continue;
}
int32_t w = _farpeekw(_dos_ds, __tb + 18);
int32_t h = _farpeekw(_dos_ds, __tb + 20);
int32_t bpp = _farpeekb(_dos_ds, __tb + 25);
cb(w, h, bpp, userData);
}
}
// ============================================================
// getModeInfo
// ============================================================
//
// Queries VBE mode info (function 0x4F01) for a single mode and
// scores it against the requested parameters. The scoring algorithm:
//
// Base score by bpp: 16-bit=100, 15-bit=90, 32-bit=85, 8-bit=70
// +20 if bpp matches preferredBpp
// +10 if exact resolution match, -10 if oversize
// -1 (rejected) if mode lacks LFB, is text-mode, is below requested
// resolution, or uses an unsupported bpp (e.g. 24-bit)
//
// 16-bit is preferred over 32-bit because it's twice as fast for
// span fill/copy on a 486/Pentium bus (half the bytes). 15-bit scores
// slightly below 16-bit because some VESA BIOSes report 15bpp modes
// as 16bpp with a dead high bit, causing confusion. 8-bit scores
// lowest because palette management adds complexity.
//
// 24-bit is explicitly rejected (not 8/15/16/32) because its 3-byte
// pixels can't use dword-aligned rep stosl fills without masking.
//
// The physical LFB address is temporarily stored in d->lfb as a raw
// integer cast — it will be properly mapped via DPMI in mapLfb() later.
static void getModeInfo(uint16_t mode, DisplayT *d, int32_t *score, int32_t requestedW, int32_t requestedH, int32_t preferredBpp) {
__dpmi_regs r;
*score = -1;
memset(&r, 0, sizeof(r));
r.x.ax = 0x4F01;
r.x.cx = mode;
r.x.es = __tb >> 4;
r.x.di = __tb & 0x0F;
__dpmi_int(0x10, &r);
if (r.x.ax != 0x004F) {
return;
}
// VBE mode attribute word at offset 0:
// bit 7 = LFB available, bit 4 = graphics mode (not text)
// Both are required — we never bank-switch and never want text modes.
uint16_t attr = _farpeekw(_dos_ds, __tb + 0);
if (!(attr & 0x0080)) {
return;
}
if (!(attr & 0x0010)) {
return;
}
int32_t w = _farpeekw(_dos_ds, __tb + 18);
int32_t h = _farpeekw(_dos_ds, __tb + 20);
int32_t bpp = _farpeekb(_dos_ds, __tb + 25);
int32_t pitch = _farpeekw(_dos_ds, __tb + 16);
uint32_t physAddr = _farpeekl(_dos_ds, __tb + 40);
// Must match or exceed requested resolution
if (w < requestedW || h < requestedH) {
return;
}
// Must be a supported bpp
if (bpp != 8 && bpp != 15 && bpp != 16 && bpp != 32) {
return;
}
// Score this mode
int32_t s = 0;
if (bpp == 16) {
s = 100;
} else if (bpp == 15) {
s = 90;
} else if (bpp == 32) {
s = 85;
} else if (bpp == 8) {
s = 70;
}
// Prefer the user's preferred bpp
if (bpp == preferredBpp) {
s += 20;
}
// Exact resolution match is preferred
if (w == requestedW && h == requestedH) {
s += 10;
} else {
s -= 10;
}
*score = s;
// Fill in display info
d->width = w;
d->height = h;
d->pitch = pitch;
d->format.bitsPerPixel = bpp;
d->format.bytesPerPixel = (bpp + 7) / 8;
// Read the channel mask layout from the VBE mode info block.
// These offsets (31-36) define the bit position and size of each
// color channel. This is essential because the channel layout
// varies: some cards use RGB565, others BGR565, etc.
if (bpp >= 15) {
int32_t redSize = _farpeekb(_dos_ds, __tb + 31);
int32_t redPos = _farpeekb(_dos_ds, __tb + 32);
int32_t greenSize = _farpeekb(_dos_ds, __tb + 33);
int32_t greenPos = _farpeekb(_dos_ds, __tb + 34);
int32_t blueSize = _farpeekb(_dos_ds, __tb + 35);
int32_t bluePos = _farpeekb(_dos_ds, __tb + 36);
d->format.redBits = redSize;
d->format.redShift = redPos;
d->format.redMask = ((1U << redSize) - 1) << redPos;
d->format.greenBits = greenSize;
d->format.greenShift = greenPos;
d->format.greenMask = ((1U << greenSize) - 1) << greenPos;
d->format.blueBits = blueSize;
d->format.blueShift = bluePos;
d->format.blueMask = ((1U << blueSize) - 1) << bluePos;
}
// Store physical address in lfb field temporarily (will be remapped)
d->lfb = (uint8_t *)(uintptr_t)physAddr;
}
// ============================================================
// mapLfb
// ============================================================
//
// Maps the video card's physical LFB address into the DPMI linear
// address space, then converts it to a near pointer for direct C
// access.
//
// The mapping process has three steps:
// 1. __dpmi_physical_address_mapping() — asks the DPMI host to
// create a linear address mapping for the physical framebuffer.
// This is necessary because DPMI runs in protected mode with
// paging; physical addresses aren't directly accessible.
// 2. __dpmi_lock_linear_region() — pins the mapped pages so they
// can't be swapped out. The LFB is memory-mapped I/O to the
// video card; paging it would be catastrophic.
// 3. __djgpp_nearptr_enable() — disables DJGPP's default segment
// limit checking so we can use plain C pointers to access the
// LFB address. Without this, all LFB access would require far
// pointer calls (_farpokeb etc.), which are much slower because
// each one involves a segment register load.
//
// Why near pointers: the performance difference is dramatic.
// platformFlushRect() copies thousands of dwords per frame using
// rep movsl — this only works with near pointers. Far pointer access
// would add ~10 cycles per byte and make 60fps impossible on a 486.
//
// The final pointer calculation adds __djgpp_conventional_base, which
// is the offset DJGPP applies to convert linear addresses to near
// pointer addresses (compensating for the DS segment base).
static int32_t mapLfb(DisplayT *d, uint32_t physAddr) {
__dpmi_meminfo info;
uint32_t fbSize = (uint32_t)d->pitch * (uint32_t)d->height;
info.address = physAddr;
info.size = fbSize;
if (__dpmi_physical_address_mapping(&info) != 0) {
fprintf(stderr, "VBE: Failed to map LFB at 0x%08lX\n", (unsigned long)physAddr);
return -1;
}
__dpmi_meminfo lockInfo;
lockInfo.address = info.address;
lockInfo.size = fbSize;
__dpmi_lock_linear_region(&lockInfo);
if (__djgpp_nearptr_enable() == 0) {
fprintf(stderr, "VBE: Failed to enable near pointers\n");
return -1;
}
// Convert linear address to near pointer by adding the DS base offset
d->lfb = (uint8_t *)(info.address + __djgpp_conventional_base);
return 0;
}
// ============================================================
// platformAltScanToChar
// ============================================================
char platformAltScanToChar(int32_t scancode) {
if (scancode < 0 || scancode > 255) {
return 0;
}
return sAltScanToAscii[scancode];
}
// ============================================================
// platformFlushRect
// ============================================================
//
// Copies a dirty rectangle from the system RAM backbuffer to the LFB.
// This is the critical path for display updates — the compositor calls
// it once per dirty rect per frame.
//
// Two code paths:
// 1. Full-width: if the rect spans the entire scanline (rowBytes ==
// pitch), collapse all rows into a single large rep movsl. This
// avoids per-row loop overhead and is the common case for full-
// screen redraws.
// 2. Partial-width: copy each scanline individually with rep movsl,
// advancing src/dst by pitch (not rowBytes) between rows.
//
// rep movsl is used instead of memcpy() because on 486/Pentium, GCC's
// memcpy may not generate the optimal dword-aligned string move, and
// the DJGPP C library's memcpy isn't always tuned for large copies.
// The explicit asm guarantees exactly the instruction sequence we want.
//
// __builtin_expect hints tell GCC to generate branch-free fast paths
// for the common cases (non-zero w/h, no trailing bytes).
void platformFlushRect(const DisplayT *d, const RectT *r) {
int32_t bpp = d->format.bytesPerPixel;
int32_t x = r->x;
int32_t y = r->y;
int32_t w = r->w;
int32_t h = r->h;
if (__builtin_expect(w <= 0 || h <= 0, 0)) {
return;
}
int32_t rowBytes = w * bpp;
int32_t pitch = d->pitch;
uint8_t *src = d->backBuf + y * pitch + x * bpp;
uint8_t *dst = d->lfb + y * pitch + x * bpp;
// Full-width flush: single large copy
if (rowBytes == pitch) {
int32_t totalBytes = pitch * h;
int32_t dwords = totalBytes >> 2;
int32_t remainder = totalBytes & 3;
__asm__ __volatile__ (
"rep movsl"
: "+D"(dst), "+S"(src), "+c"(dwords)
:
: "memory"
);
while (remainder-- > 0) {
*dst++ = *src++;
}
} else {
// Partial scanlines — copy row by row with rep movsd
int32_t dwords = rowBytes >> 2;
int32_t remainder = rowBytes & 3;
for (int32_t i = 0; i < h; i++) {
int32_t dc = dwords;
uint8_t *s = src;
uint8_t *dd = dst;
__asm__ __volatile__ (
"rep movsl"
: "+D"(dd), "+S"(s), "+c"(dc)
:
: "memory"
);
if (__builtin_expect(remainder > 0, 0)) {
int32_t rem = remainder;
while (rem-- > 0) {
*dd++ = *s++;
}
}
src += pitch;
dst += pitch;
}
}
}
// ============================================================
// System information — static buffer and helpers
// ============================================================
static char sSysInfoBuf[PLATFORM_SYSINFO_MAX];
static int32_t sSysInfoPos = 0;
// Formatted append to the system info buffer (newline-terminated).
static void sysInfoAppend(const char *fmt, ...) {
if (sSysInfoPos >= PLATFORM_SYSINFO_MAX - 1) {
return;
}
va_list ap;
va_start(ap, fmt);
int32_t written = vsnprintf(sSysInfoBuf + sSysInfoPos, PLATFORM_SYSINFO_MAX - sSysInfoPos, fmt, ap);
va_end(ap);
if (written > 0) {
sSysInfoPos += written;
}
if (sSysInfoPos < PLATFORM_SYSINFO_MAX - 1) {
sSysInfoBuf[sSysInfoPos] = '\n';
sSysInfoPos++;
sSysInfoBuf[sSysInfoPos] = '\0';
}
}
// ============================================================
// estimateClockMhz — RDTSC calibration via BIOS timer
// ============================================================
//
// Measures TSC ticks over 3 BIOS timer ticks (~165 ms). The BIOS timer
// at 0040:006C increments at 18.2065 Hz (1193182 / 65536 Hz per tick).
// Using 3 ticks instead of 1 reduces jitter from interrupt latency and
// gives a more stable reading.
#define CLOCK_MEAS_TICKS 3
static uint32_t estimateClockMhz(void) {
uint32_t biosTimerAddr = 0x46C; // linear address 0000:046C = 0040:006C
// Wait for tick boundary to synchronize
uint32_t tick0 = _farpeekl(_dos_ds, biosTimerAddr);
while (_farpeekl(_dos_ds, biosTimerAddr) == tick0) {
// spin
}
// Read TSC at tick boundary
uint32_t lo1;
uint32_t hi1;
__asm__ __volatile__ ("rdtsc" : "=a"(lo1), "=d"(hi1));
uint32_t tickStart = _farpeekl(_dos_ds, biosTimerAddr);
// Wait for CLOCK_MEAS_TICKS more ticks
while ((_farpeekl(_dos_ds, biosTimerAddr) - tickStart) < CLOCK_MEAS_TICKS) {
// spin
}
// Read TSC at end
uint32_t lo2;
uint32_t hi2;
__asm__ __volatile__ ("rdtsc" : "=a"(lo2), "=d"(hi2));
uint64_t tsc1 = ((uint64_t)hi1 << 32) | lo1;
uint64_t tsc2 = ((uint64_t)hi2 << 32) | lo2;
uint64_t delta = tsc2 - tsc1;
// Each BIOS tick = 65536 / 1193182 seconds = 54925.4 microseconds
uint32_t mhz = (uint32_t)(delta / (CLOCK_MEAS_TICKS * 54925ULL));
return mhz;
}
// ============================================================
// hasCpuid — check if CPUID instruction is available
// ============================================================
//
// The CPUID instruction exists if bit 21 (ID flag) of EFLAGS can be
// toggled. On a 386 this bit is hardwired to 0. On a 486 without
// CPUID it's also hardwired. Only if we can flip it does CPUID exist.
static bool hasCpuid(void) {
uint32_t before;
uint32_t after;
__asm__ __volatile__ (
"pushfl\n\t"
"popl %%eax\n\t"
"movl %%eax, %0\n\t"
"xorl $0x200000, %%eax\n\t"
"pushl %%eax\n\t"
"popfl\n\t"
"pushfl\n\t"
"popl %%eax\n\t"
"movl %%eax, %1\n\t"
: "=r"(before), "=r"(after)
:
: "eax"
);
__asm__ __volatile__ (
"pushl %0\n\t"
"popfl"
:
: "r"(before)
);
return (before ^ after) & 0x200000;
}
// ============================================================
// platformGetSystemInfo
// ============================================================
//
// Gathers all available hardware information and formats it as a
// human-readable text string. Each section is separated by a blank
// line and headed with === Section ===.
const char *platformGetSystemInfo(const DisplayT *display) {
__dpmi_regs r;
sSysInfoPos = 0;
sSysInfoBuf[0] = '\0';
sysInfoAppend("DVX System Information");
sysInfoAppend("");
// ---- CPU ----
sysInfoAppend("=== CPU ===");
if (!hasCpuid()) {
sysInfoAppend("Processor: 386/486 (no CPUID support)");
} else {
// Vendor string (CPUID leaf 0)
uint32_t maxFunc;
uint32_t ebx;
uint32_t ecx;
uint32_t edx;
char vendor[13];
__asm__ __volatile__ (
"cpuid"
: "=a"(maxFunc), "=b"(ebx), "=c"(ecx), "=d"(edx)
: "a"(0)
);
memcpy(vendor + 0, &ebx, 4);
memcpy(vendor + 4, &edx, 4);
memcpy(vendor + 8, &ecx, 4);
vendor[12] = '\0';
sysInfoAppend("CPU Vendor: %s", vendor);
// Family/model/stepping/features (CPUID leaf 1)
if (maxFunc >= 1) {
uint32_t eax;
uint32_t features;
__asm__ __volatile__ (
"cpuid"
: "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(features)
: "a"(1)
);
int32_t stepping = eax & 0x0F;
int32_t model = (eax >> 4) & 0x0F;
int32_t family = (eax >> 8) & 0x0F;
if (family == 0x0F) {
family += (eax >> 20) & 0xFF;
model += ((eax >> 16) & 0x0F) << 4;
} else if (family == 6) {
model += ((eax >> 16) & 0x0F) << 4;
}
sysInfoAppend("Family: %ld Model: %ld Stepping: %ld", (long)family, (long)model, (long)stepping);
// Feature flags
bool hasFpu = (features & (1U << 0)) != 0;
bool hasTsc = (features & (1U << 4)) != 0;
bool hasMmx = (features & (1U << 23)) != 0;
bool hasSse = (features & (1U << 25)) != 0;
bool hasSse2 = (features & (1U << 26)) != 0;
char featureStr[128];
int32_t fpos = 0;
featureStr[0] = '\0';
if (hasFpu) {
fpos += snprintf(featureStr + fpos, sizeof(featureStr) - fpos, "FPU ");
}
if (hasTsc) {
fpos += snprintf(featureStr + fpos, sizeof(featureStr) - fpos, "TSC ");
}
if (hasMmx) {
fpos += snprintf(featureStr + fpos, sizeof(featureStr) - fpos, "MMX ");
}
if (hasSse) {
fpos += snprintf(featureStr + fpos, sizeof(featureStr) - fpos, "SSE ");
}
if (hasSse2) {
fpos += snprintf(featureStr + fpos, sizeof(featureStr) - fpos, "SSE2 ");
}
if (fpos > 0) {
featureStr[fpos - 1] = '\0';
}
sysInfoAppend("Features: %s", featureStr[0] ? featureStr : "(none)");
// Clock speed via RDTSC (Pentium+ only)
if (hasTsc) {
uint32_t mhz = estimateClockMhz();
if (mhz > 0) {
sysInfoAppend("Clock: ~%lu MHz", (unsigned long)mhz);
}
}
}
// Brand string (CPUID extended leaves 0x80000002-0x80000004)
uint32_t maxExtFunc;
__asm__ __volatile__ (
"cpuid"
: "=a"(maxExtFunc), "=b"(ebx), "=c"(ecx), "=d"(edx)
: "a"(0x80000000U)
);
if (maxExtFunc >= 0x80000004U) {
char brand[49];
uint32_t *b = (uint32_t *)brand;
for (uint32_t func = 0x80000002U; func <= 0x80000004U; func++) {
__asm__ __volatile__ (
"cpuid"
: "=a"(b[0]), "=b"(b[1]), "=c"(b[2]), "=d"(b[3])
: "a"(func)
);
b += 4;
}
brand[48] = '\0';
const char *p = brand;
while (*p == ' ') {
p++;
}
if (*p) {
sysInfoAppend("Brand: %s", p);
}
}
}
// ---- Memory ----
sysInfoAppend("");
sysInfoAppend("=== Memory ===");
__dpmi_free_mem_info memInfo;
if (__dpmi_get_free_memory_information(&memInfo) == 0) {
if (memInfo.largest_available_free_block_in_bytes != 0xFFFFFFFFUL) {
uint32_t largestKb = memInfo.largest_available_free_block_in_bytes / 1024;
sysInfoAppend("Largest free block: %lu KB (%lu MB)", (unsigned long)largestKb, (unsigned long)(largestKb / 1024));
}
if (memInfo.total_number_of_physical_pages != 0xFFFFFFFFUL) {
uint32_t totalKb = memInfo.total_number_of_physical_pages * 4;
sysInfoAppend("Total physical: %lu KB (%lu MB)", (unsigned long)totalKb, (unsigned long)(totalKb / 1024));
}
if (memInfo.total_number_of_free_pages != 0xFFFFFFFFUL) {
uint32_t freeKb = memInfo.total_number_of_free_pages * 4;
sysInfoAppend("Free physical: %lu KB (%lu MB)", (unsigned long)freeKb, (unsigned long)(freeKb / 1024));
}
if (memInfo.linear_address_space_size_in_pages != 0xFFFFFFFFUL) {
uint32_t linearKb = memInfo.linear_address_space_size_in_pages * 4;
sysInfoAppend("Linear address space: %lu KB (%lu MB)", (unsigned long)linearKb, (unsigned long)(linearKb / 1024));
}
if (memInfo.size_of_paging_file_partition_in_pages != 0xFFFFFFFFUL) {
uint32_t pagingKb = memInfo.size_of_paging_file_partition_in_pages * 4;
sysInfoAppend("Paging file: %lu KB (%lu MB)", (unsigned long)pagingKb, (unsigned long)(pagingKb / 1024));
}
if (memInfo.free_linear_address_space_in_pages != 0xFFFFFFFFUL) {
uint32_t freeLinearKb = memInfo.free_linear_address_space_in_pages * 4;
sysInfoAppend("Free linear space: %lu KB", (unsigned long)freeLinearKb);
}
} else {
sysInfoAppend("DPMI memory info unavailable");
}
// ---- DOS / DPMI ----
sysInfoAppend("");
sysInfoAppend("=== DOS ===");
memset(&r, 0, sizeof(r));
r.x.ax = 0x3000;
__dpmi_int(0x21, &r);
sysInfoAppend("DOS Version: %ld.%02ld", (long)(r.x.ax & 0xFF), (long)((r.x.ax >> 8) & 0xFF));
__dpmi_version_ret ver;
if (__dpmi_get_version(&ver) == 0) {
sysInfoAppend("DPMI Version: %d.%02d", ver.major, ver.minor);
sysInfoAppend("DPMI Flags: %s%s%s",
(ver.flags & 0x01) ? "32-bit " : "16-bit ",
(ver.flags & 0x02) ? "V86 " : "",
(ver.flags & 0x04) ? "VirtMem " : "");
sysInfoAppend("CPU Type: %d86", ver.cpu);
}
// ---- Video ----
sysInfoAppend("");
sysInfoAppend("=== Video ===");
// VBE controller info for version, video RAM, OEM string
_farpokeb(_dos_ds, __tb + 0, 'V');
_farpokeb(_dos_ds, __tb + 1, 'B');
_farpokeb(_dos_ds, __tb + 2, 'E');
_farpokeb(_dos_ds, __tb + 3, '2');
memset(&r, 0, sizeof(r));
r.x.ax = 0x4F00;
r.x.es = __tb >> 4;
r.x.di = __tb & 0x0F;
__dpmi_int(0x10, &r);
if (r.x.ax == 0x004F) {
uint16_t vbeVersion = _farpeekw(_dos_ds, __tb + 4);
uint16_t totalMem64k = _farpeekw(_dos_ds, __tb + 18);
sysInfoAppend("VBE Version: %d.%d", vbeVersion >> 8, vbeVersion & 0xFF);
sysInfoAppend("Video memory: %lu KB", (unsigned long)(totalMem64k * 64));
// OEM string (real-mode far pointer at offset 6)
uint16_t oemOff = _farpeekw(_dos_ds, __tb + 6);
uint16_t oemSeg = _farpeekw(_dos_ds, __tb + 8);
uint32_t oemAddr = ((uint32_t)oemSeg << 4) + oemOff;
char oemStr[80];
int32_t oemLen = 0;
while (oemLen < 79) {
char c = _farpeekb(_dos_ds, oemAddr + oemLen);
if (c == 0) {
break;
}
oemStr[oemLen++] = c;
}
oemStr[oemLen] = '\0';
if (oemLen > 0) {
sysInfoAppend("VBE OEM: %s", oemStr);
}
}
if (display) {
sysInfoAppend("Resolution: %ldx%ld", (long)display->width, (long)display->height);
sysInfoAppend("Color depth: %ld bpp", (long)display->format.bitsPerPixel);
sysInfoAppend("Pitch: %ld bytes", (long)display->pitch);
uint32_t fbSize = (uint32_t)display->pitch * (uint32_t)display->height;
sysInfoAppend("Framebuffer: %lu KB", (unsigned long)(fbSize / 1024));
if (display->format.bitsPerPixel >= 15) {
sysInfoAppend("Red: %ld bits @ bit %ld", (long)display->format.redBits, (long)display->format.redShift);
sysInfoAppend("Green: %ld bits @ bit %ld", (long)display->format.greenBits, (long)display->format.greenShift);
sysInfoAppend("Blue: %ld bits @ bit %ld", (long)display->format.blueBits, (long)display->format.blueShift);
}
}
// ---- Mouse ----
sysInfoAppend("");
sysInfoAppend("=== Mouse ===");
memset(&r, 0, sizeof(r));
r.x.ax = 0x0000;
__dpmi_int(0x33, &r);
if (r.x.ax == 0xFFFF) {
sysInfoAppend("Mouse: Detected (%ld buttons)", (long)r.x.bx);
} else {
sysInfoAppend("Mouse: Not detected");
}
sysInfoAppend("Wheel: %s", sHasMouseWheel ? "Yes (CuteMouse Wheel API)" : "No");
// ---- Disk Drives ----
sysInfoAppend("");
sysInfoAppend("=== Disk Drives ===");
for (int32_t drv = 3; drv <= 26; drv++) {
// INT 21h AH=36h: Get disk free space
memset(&r, 0, sizeof(r));
r.x.ax = 0x3600;
r.x.dx = drv;
__dpmi_int(0x21, &r);
if (r.x.ax == 0xFFFF) {
continue;
}
uint32_t sectPerClust = r.x.ax;
uint32_t freeClusters = r.x.bx;
uint32_t bytesPerSect = r.x.cx;
uint32_t totalClusters = r.x.dx;
uint32_t clusterSize = sectPerClust * bytesPerSect;
uint32_t totalMb = (uint32_t)((uint64_t)totalClusters * clusterSize / (1024 * 1024));
uint32_t freeMb = (uint32_t)((uint64_t)freeClusters * clusterSize / (1024 * 1024));
// INT 21h AX=4408h: Check if drive is removable
memset(&r, 0, sizeof(r));
r.x.ax = 0x4408;
r.x.bx = drv;
__dpmi_int(0x21, &r);
const char *driveType = "Unknown";
if (!(r.x.flags & 0x01)) {
driveType = (r.x.ax == 0) ? "Removable" : "Fixed";
}
char letter = 'A' + (drv - 1);
sysInfoAppend("%c: %s %lu MB total %lu MB free", letter, driveType, (unsigned long)totalMb, (unsigned long)freeMb);
}
return sSysInfoBuf;
}
// ============================================================
// platformInit
// ============================================================
void platformInit(void) {
// Disable Ctrl+C/Break so the user can't accidentally kill the
// GUI while in graphics mode (which would leave the display in
// an unusable state without restoring text mode first).
signal(SIGINT, SIG_IGN);
}
// ============================================================
// platformKeyboardGetModifiers
// ============================================================
//
// Returns the current modifier key state via INT 16h function 12h
// (enhanced get extended shift flags). The low byte contains:
// bit 0 = right shift, bit 1 = left shift
// bit 2 = ctrl, bit 3 = alt
// The widget system uses these bits for keyboard accelerators
// (Alt+key) and text editing shortcuts (Ctrl+C/V/X).
int32_t platformKeyboardGetModifiers(void) {
__dpmi_regs r;
memset(&r, 0, sizeof(r));
r.x.ax = 0x1200;
__dpmi_int(0x16, &r);
return r.x.ax & 0xFF;
}
// ============================================================
// platformKeyboardRead
// ============================================================
//
// Non-blocking keyboard read using enhanced INT 16h functions.
//
// Uses the "enhanced" functions (10h/11h) rather than the original
// (00h/01h) because the originals can't distinguish between grey
// and numpad arrow keys, and they don't report F11/F12. The enhanced
// functions have been standard since AT-class machines (1984).
//
// The two-step peek-then-read is necessary because function 10h
// (read key) blocks until a key is available — there's no non-blocking
// read in the BIOS API. Function 11h (check key) peeks without
// consuming, letting us poll without blocking the event loop.
bool platformKeyboardRead(PlatformKeyEventT *evt) {
__dpmi_regs r;
// Peek: function 11h sets ZF if buffer is empty
r.x.ax = 0x1100;
__dpmi_int(0x16, &r);
// Test the Zero Flag (bit 6 of the flags register)
if (r.x.flags & 0x40) {
return false;
}
// Consume: function 10h removes the key from the BIOS buffer.
// AH = scan code, AL = ASCII character (0 for extended keys).
r.x.ax = 0x1000;
__dpmi_int(0x16, &r);
evt->scancode = (r.x.ax >> 8) & 0xFF;
evt->ascii = r.x.ax & 0xFF;
// Enhanced INT 16h uses 0xE0 as the ASCII byte for grey/extended
// keys (arrows, Home, End, Insert, Delete on 101-key keyboards).
// Normalize to 0 so the rest of the codebase can use a single
// "ascii == 0 means extended key, check scancode" convention.
if (evt->ascii == 0xE0) {
evt->ascii = 0;
}
return true;
}
// ============================================================
// platformMouseInit
// ============================================================
//
// Initializes the INT 33h mouse driver. The mouse driver is a TSR
// (or emulated by the DOS environment) that tracks position and
// buttons independently of the application.
//
// We must set the movement range to match our VESA resolution,
// because the default range may be 640x200 (CGA text mode).
// Without this, mouse coordinates would be wrong or clipped.
//
// The hardware cursor is never shown — DVX composites its own
// software cursor on top of the backbuffer. We only use INT 33h
// for position/button state via polling (function 03h).
void platformMouseInit(int32_t screenW, int32_t screenH) {
__dpmi_regs r;
// Function 00h: reset driver, detect mouse hardware
memset(&r, 0, sizeof(r));
r.x.ax = 0x0000;
__dpmi_int(0x33, &r);
// Function 07h: set horizontal min/max range
memset(&r, 0, sizeof(r));
r.x.ax = 0x0007;
r.x.cx = 0;
r.x.dx = screenW - 1;
__dpmi_int(0x33, &r);
// Function 08h: set vertical min/max range
memset(&r, 0, sizeof(r));
r.x.ax = 0x0008;
r.x.cx = 0;
r.x.dx = screenH - 1;
__dpmi_int(0x33, &r);
// Function 04h: warp cursor to center of screen
memset(&r, 0, sizeof(r));
r.x.ax = 0x0004;
r.x.cx = screenW / 2;
r.x.dx = screenH / 2;
__dpmi_int(0x33, &r);
}
// ============================================================
// platformMousePoll
// ============================================================
//
// Reads current mouse state via INT 33h function 03h.
// Returns: CX=X position, DX=Y position, BX=button state
// (bit 0 = left, bit 1 = right, bit 2 = middle).
//
// Polling is used instead of a callback/event model because the
// DVX event loop already runs at frame rate. Installing a real-mode
// callback for mouse events would add DPMI mode-switch overhead
// on every mickeyed movement, which is wasteful when we only sample
// once per frame anyway.
void platformMousePoll(int32_t *mx, int32_t *my, int32_t *buttons) {
__dpmi_regs r;
memset(&r, 0, sizeof(r));
r.x.ax = 0x0003;
__dpmi_int(0x33, &r);
*mx = r.x.cx;
*my = r.x.dx;
*buttons = r.x.bx & 0x07; // BL only: bits 0-2 = left/right/middle
// BH = signed 8-bit wheel counter (cleared on read by the driver).
// Only meaningful if the wheel API was activated via platformMouseWheelInit.
if (sHasMouseWheel) {
sLastWheelDelta = (int32_t)(int8_t)(r.x.bx >> 8);
}
}
// ============================================================
// platformMouseWheelInit
// ============================================================
//
// Detects and activates the CuteMouse Wheel API 1.0 (INT 33h AX=0011h).
// The driver returns AX=574Dh ('WM') if supported, with CX bit 0 set if
// a wheel is physically present. Calling this function also switches the
// driver from "wheelkey" mode (faking keypresses) to real wheel reporting
// via function 03h BH. Must be called after platformMouseInit.
bool platformMouseWheelInit(void) {
__dpmi_regs r;
memset(&r, 0, sizeof(r));
r.x.ax = 0x0011;
__dpmi_int(0x33, &r);
// 0x574D = 'WM' (Wheel Mouse) magic signature
sHasMouseWheel = (r.x.ax == 0x574D) && (r.x.cx & 0x0001);
return sHasMouseWheel;
}
// ============================================================
// platformMouseWheelPoll
// ============================================================
//
// Returns the wheel delta captured by the last platformMousePoll call.
// Positive = scroll down, negative = scroll up. Returns 0 if no wheel
// movement or if the wheel is not supported.
int32_t platformMouseWheelPoll(void) {
return sLastWheelDelta;
}
// ============================================================
// platformSpanCopy8
// ============================================================
//
// Copies 'count' 8-bit pixels from src to dst using dword-aligned
// rep movsl for the bulk transfer.
//
// All span operations (Copy8/16/32, Fill8/16/32) follow the same
// pattern: align to a dword boundary, do the bulk as rep movsl or
// rep stosl, then handle the remainder. This pattern exists because
// on 486/Pentium, misaligned dword moves incur a 3-cycle penalty per
// access. Aligning first ensures the critical rep loop runs at full
// bus speed.
//
// rep movsl moves 4 bytes per iteration with hardware loop decrement,
// which is faster than a C for-loop — the CPU string move pipeline
// optimizes sequential memory access patterns.
void platformSpanCopy8(uint8_t *dst, const uint8_t *src, int32_t count) {
// Align dst to a dword boundary with byte copies
while (((uintptr_t)dst & 3) && count > 0) {
*dst++ = *src++;
count--;
}
if (count >= 4) {
int32_t dwordCount = count >> 2;
__asm__ __volatile__ (
"rep movsl"
: "+D"(dst), "+S"(src), "+c"(dwordCount)
:
: "memory"
);
dst += dwordCount * 4;
src += dwordCount * 4;
}
int32_t rem = count & 3;
while (rem-- > 0) {
*dst++ = *src++;
}
}
// ============================================================
// platformSpanCopy16
// ============================================================
//
// Copies 'count' 16-bit pixels. Since each pixel is 2 bytes, we
// only need to check bit 1 of the address for dword alignment
// (bit 0 is always clear for 16-bit aligned data). A single
// leading pixel copy brings us to a dword boundary, then rep movsl
// copies pixel pairs as dwords.
void platformSpanCopy16(uint8_t *dst, const uint8_t *src, int32_t count) {
// Copy one pixel to reach dword alignment if needed
if (((uintptr_t)dst & 2) && count > 0) {
*(uint16_t *)dst = *(const uint16_t *)src;
dst += 2;
src += 2;
count--;
}
if (count >= 2) {
int32_t dwordCount = count >> 1;
__asm__ __volatile__ (
"rep movsl"
: "+D"(dst), "+S"(src), "+c"(dwordCount)
:
: "memory"
);
dst += dwordCount * 4;
src += dwordCount * 4;
}
if (count & 1) {
*(uint16_t *)dst = *(const uint16_t *)src;
}
}
// ============================================================
// platformSpanCopy32
// ============================================================
//
// 32-bit pixels are inherently dword-aligned, so no alignment
// preamble is needed — straight to rep movsl.
void platformSpanCopy32(uint8_t *dst, const uint8_t *src, int32_t count) {
__asm__ __volatile__ (
"rep movsl"
: "+D"(dst), "+S"(src), "+c"(count)
:
: "memory"
);
}
// ============================================================
// platformSpanFill8
// ============================================================
//
// Fills 'count' 8-bit pixels with a single color value.
// The 8-bit value is replicated into all four bytes of a dword so
// that rep stosl writes 4 identical pixels per iteration. This is
// 4x faster than byte-at-a-time for large fills (window backgrounds,
// screen clears).
void platformSpanFill8(uint8_t *dst, uint32_t color, int32_t count) {
uint8_t c = (uint8_t)color;
uint32_t dword = (uint32_t)c | ((uint32_t)c << 8) | ((uint32_t)c << 16) | ((uint32_t)c << 24);
// Align to 4 bytes — skip if already aligned
if (__builtin_expect((uintptr_t)dst & 3, 0)) {
while (((uintptr_t)dst & 3) && count > 0) {
*dst++ = c;
count--;
}
}
if (count >= 4) {
int32_t dwordCount = count >> 2;
__asm__ __volatile__ (
"rep stosl"
: "+D"(dst), "+c"(dwordCount)
: "a"(dword)
: "memory"
);
dst += dwordCount * 4;
}
int32_t rem = count & 3;
while (rem-- > 0) {
*dst++ = c;
}
}
// ============================================================
// platformSpanFill16
// ============================================================
//
// Fills 'count' 16-bit pixels. Two pixels are packed into a dword
// (low half = first pixel, high half = second pixel) so rep stosl
// writes 2 pixels per iteration.
void platformSpanFill16(uint8_t *dst, uint32_t color, int32_t count) {
uint16_t c = (uint16_t)color;
// Handle odd leading pixel for dword alignment
if (((uintptr_t)dst & 2) && count > 0) {
*(uint16_t *)dst = c;
dst += 2;
count--;
}
// Fill pairs of pixels as 32-bit dwords
if (count >= 2) {
uint32_t dword = ((uint32_t)c << 16) | c;
int32_t dwordCount = count >> 1;
__asm__ __volatile__ (
"rep stosl"
: "+D"(dst), "+c"(dwordCount)
: "a"(dword)
: "memory"
);
dst += dwordCount * 4;
}
// Handle trailing odd pixel
if (count & 1) {
*(uint16_t *)dst = c;
}
}
// ============================================================
// platformSpanFill32
// ============================================================
//
// 32-bit fill is the simplest case — each pixel is already a dword,
// so rep stosl writes exactly one pixel per iteration with no
// alignment or packing concerns.
void platformSpanFill32(uint8_t *dst, uint32_t color, int32_t count) {
__asm__ __volatile__ (
"rep stosl"
: "+D"(dst), "+c"(count)
: "a"(color)
: "memory"
);
}
// ============================================================
// platformValidateFilename — DOS 8.3 filename validation
// ============================================================
//
// Validates that a filename conforms to DOS 8.3 conventions:
// - Base name: 1-8 chars, extension: 0-3 chars, one dot max
// - No spaces or special characters that DOS can't handle
// - Not a reserved device name (CON, PRN, AUX, NUL, COMn, LPTn)
//
// The reserved name check compares the base name only (before the
// dot), case-insensitive, because DOS treats "CON.TXT" the same
// as the CON device — the extension is ignored for device names.
//
// Returns NULL on success, or a human-readable error string on failure.
// On non-DOS platforms, this function would be replaced with one that
// validates for that platform's filesystem rules.
const char *platformValidateFilename(const char *name) {
static const char *reserved[] = {
"CON", "PRN", "AUX", "NUL",
"COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9",
"LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9",
NULL
};
if (!name || name[0] == '\0') {
return "Filename must not be empty.";
}
// Split into base and extension
const char *dot = strrchr(name, '.');
int32_t baseLen;
int32_t extLen;
if (dot) {
baseLen = (int32_t)(dot - name);
extLen = (int32_t)strlen(dot + 1);
} else {
baseLen = (int32_t)strlen(name);
extLen = 0;
}
if (baseLen < 1 || baseLen > 8) {
return "Filename must be 1-8 characters before the extension.";
}
if (extLen > 3) {
return "Extension must be 3 characters or fewer.";
}
// Check for invalid characters
for (const char *p = name; *p; p++) {
if (*p == '.') {
continue;
}
if (*p < '!' || *p > '~') {
return "Filename contains invalid characters.";
}
if (strchr(" \"*+,/:;<=>?[\\]|", *p)) {
return "Filename contains invalid characters.";
}
}
// Check for multiple dots
if (dot && strchr(name, '.') != dot) {
return "Filename may contain only one dot.";
}
// Check reserved device names (compare base only, case-insensitive)
char base[9];
int32_t copyLen = baseLen < 8 ? baseLen : 8;
for (int32_t i = 0; i < copyLen; i++) {
base[i] = toupper((unsigned char)name[i]);
}
base[copyLen] = '\0';
for (const char **r = reserved; *r; r++) {
if (strcmp(base, *r) == 0) {
return "That name is a reserved device name.";
}
}
return NULL;
}
// ============================================================
// platformVideoInit
// ============================================================
//
// Complete video initialization sequence:
// 1. findBestMode() — enumerate VESA modes and pick the best match
// 2. setVesaMode() — actually switch to the chosen mode with LFB
// 3. mapLfb() — DPMI-map the physical framebuffer into linear memory
// 4. Allocate system RAM backbuffer (same size as LFB)
// 5. Set up 8-bit palette if needed
// 6. Initialize clip rect to full display
//
// The backbuffer is allocated in system RAM rather than drawing
// directly to the LFB because: (a) reads from the LFB are extremely
// slow on ISA/VLB/PCI (uncached MMIO), so any compositing that reads
// pixels would crawl; (b) the dirty rect system only flushes changed
// regions, so most of the LFB is never touched per frame.
int32_t platformVideoInit(DisplayT *d, int32_t requestedW, int32_t requestedH, int32_t preferredBpp) {
uint16_t bestMode;
uint32_t physAddr;
memset(d, 0, sizeof(*d));
// Find the best VESA mode
if (findBestMode(requestedW, requestedH, preferredBpp, &bestMode, d) != 0) {
return -1;
}
// Save the physical address before we overwrite it
physAddr = (uint32_t)(uintptr_t)d->lfb;
// Set the mode
if (setVesaMode(bestMode) != 0) {
return -1;
}
// Map the LFB
if (mapLfb(d, physAddr) != 0) {
return -1;
}
// Allocate backbuffer
uint32_t fbSize = (uint32_t)d->pitch * (uint32_t)d->height;
d->backBuf = (uint8_t *)malloc(fbSize);
if (!d->backBuf) {
fprintf(stderr, "VBE: Failed to allocate %lu byte backbuffer\n", (unsigned long)fbSize);
__djgpp_nearptr_disable();
return -1;
}
memset(d->backBuf, 0, fbSize);
// Set up palette for 8-bit mode
if (d->format.bitsPerPixel == 8) {
d->palette = (uint8_t *)malloc(768);
if (!d->palette) {
fprintf(stderr, "VBE: Failed to allocate palette\n");
free(d->backBuf);
d->backBuf = NULL;
__djgpp_nearptr_disable();
return -1;
}
dvxGeneratePalette(d->palette);
platformVideoSetPalette(d->palette, 0, 256);
}
// Initialize clip rect to full display
d->clipX = 0;
d->clipY = 0;
d->clipW = d->width;
d->clipH = d->height;
fprintf(stderr, "VBE: Mode 0x%04X set: %ldx%ldx%ld, pitch=%ld\n",
bestMode, (long)d->width, (long)d->height, (long)d->format.bitsPerPixel, (long)d->pitch);
return 0;
}
// ============================================================
// platformVideoSetPalette
// ============================================================
//
// Programs the VGA DAC palette registers via direct port I/O.
// Port 0x3C8 = write index, port 0x3C9 = data (auto-increments).
//
// The VGA DAC expects 6-bit values (0-63) but our palette stores
// 8-bit values (0-255), hence the >> 2 shift. This is standard
// VGA behavior dating back to the original IBM VGA in 1987.
//
// Direct port I/O is used instead of VBE function 09h (set palette)
// because the VGA DAC ports are faster (no BIOS call overhead) and
// universally compatible — even VBE 3.0 cards still have the standard
// VGA DAC at ports 0x3C8/0x3C9.
void platformVideoSetPalette(const uint8_t *pal, int32_t firstEntry, int32_t count) {
outportb(0x3C8, (uint8_t)firstEntry);
for (int32_t i = 0; i < count; i++) {
int32_t idx = (firstEntry + i) * 3;
outportb(0x3C9, pal[idx + 0] >> 2);
outportb(0x3C9, pal[idx + 1] >> 2);
outportb(0x3C9, pal[idx + 2] >> 2);
}
}
// ============================================================
// platformVideoShutdown
// ============================================================
//
// Tears down the graphics mode and restores standard 80x25 text
// mode (BIOS mode 3) so the user gets their DOS prompt back.
// Also frees the backbuffer, palette, and disables near pointers
// (re-enables DJGPP's segment limit checking for safety).
void platformVideoShutdown(DisplayT *d) {
// INT 10h function 00h, mode 03h = 80x25 color text
__dpmi_regs r;
memset(&r, 0, sizeof(r));
r.x.ax = 0x0003;
__dpmi_int(0x10, &r);
if (d->backBuf) {
free(d->backBuf);
d->backBuf = NULL;
}
if (d->palette) {
free(d->palette);
d->palette = NULL;
}
d->lfb = NULL;
__djgpp_nearptr_disable();
}
// ============================================================
// platformYield
// ============================================================
//
// Cooperative yield to the DPMI host. In a multitasking DOS
// environment (Windows 3.x DOS box, OS/2 VDM, or DESQview),
// this gives other tasks a chance to run. Under a single-tasking
// DPMI server (CWSDPMI) it's essentially a no-op, but it doesn't
// hurt. Called once per event loop iteration when idle.
void platformYield(void) {
__dpmi_yield();
}
// ============================================================
// setVesaMode
// ============================================================
//
// Sets a VBE video mode via function 0x4F02. Bit 14 of the mode
// number tells the BIOS to enable the Linear Frame Buffer instead
// of the default banked mode. This is the only mode we support.
static int32_t setVesaMode(uint16_t mode) {
__dpmi_regs r;
memset(&r, 0, sizeof(r));
r.x.ax = 0x4F02;
r.x.bx = mode | 0x4000; // bit 14 = use LFB
__dpmi_int(0x10, &r);
if (r.x.ax != 0x004F) {
fprintf(stderr, "VBE: Failed to set mode 0x%04X (AX=0x%04X)\n", mode, r.x.ax);
return -1;
}
return 0;
}