// dvxPlatformDos.c — DOS/DJGPP platform implementation for DVX GUI // // All BIOS calls, DPMI functions, port I/O, inline assembly, and // DOS-specific file handling are isolated in this single file. // // This file is the ONLY place where DJGPP headers (dpmi.h, go32.h, // sys/nearptr.h, etc.) appear. Every other DVX module calls through // the dvxPlatform.h interface, so porting to a new OS (Linux/SDL, // Win32, or bare-metal ARM) requires replacing only this file and // nothing else. The abstraction covers five domains: // 1. VESA VBE video init / mode set / LFB mapping // 2. Backbuffer-to-LFB flush using rep movsl // 3. Span fill/copy primitives using inline asm (rep stosl / rep movsl) // 4. Mouse input via INT 33h driver // 5. Keyboard input via BIOS INT 16h // // Why BIOS INT 16h for keyboard instead of direct port I/O (scancode // reading from port 0x60): BIOS handles typematic repeat, keyboard // translation tables, and extended key decoding. Direct port I/O would // require reimplementing all of that, and the DPMI host already hooks // IRQ1 to feed the BIOS buffer. The BIOS approach is simpler and more // portable across emulators (DOSBox, 86Box, PCem all handle it correctly). // // Why INT 33h for mouse: same rationale — the mouse driver handles // PS/2 and serial mice transparently, and every DOS emulator provides // a compatible driver. Polling via function 03h avoids the complexity // of installing a real-mode callback for mouse events. #include "dvxPlatform.h" #include "../dvxPalette.h" #include #include #include #include #include // DJGPP-specific headers — this is the ONLY file that includes these #include #include #include #include #include // ============================================================ // Prototypes // ============================================================ static int32_t findBestMode(int32_t requestedW, int32_t requestedH, int32_t preferredBpp, uint16_t *outMode, DisplayT *d); static void getModeInfo(uint16_t mode, DisplayT *d, int32_t *score, int32_t requestedW, int32_t requestedH, int32_t preferredBpp); static int32_t mapLfb(DisplayT *d, uint32_t physAddr); void platformVideoEnumModes(void (*cb)(int32_t w, int32_t h, int32_t bpp, void *userData), void *userData); static int32_t setVesaMode(uint16_t mode); // Alt+key scan code to ASCII lookup table (indexed by BIOS scan code). // INT 16h returns these scan codes with ascii=0 for Alt+key combos. // Using a 256-byte lookup table instead of a switch or if-chain because // this is called on every keypress and the table fits in a single cache // line cluster. The designated initializer syntax leaves all other // entries as zero, which is the "no mapping" sentinel. static const char sAltScanToAscii[256] = { // Alt+letters [0x10] = 'q', [0x11] = 'w', [0x12] = 'e', [0x13] = 'r', [0x14] = 't', [0x15] = 'y', [0x16] = 'u', [0x17] = 'i', [0x18] = 'o', [0x19] = 'p', [0x1E] = 'a', [0x1F] = 's', [0x20] = 'd', [0x21] = 'f', [0x22] = 'g', [0x23] = 'h', [0x24] = 'j', [0x25] = 'k', [0x26] = 'l', [0x2C] = 'z', [0x2D] = 'x', [0x2E] = 'c', [0x2F] = 'v', [0x30] = 'b', [0x31] = 'n', [0x32] = 'm', // Alt+digits [0x78] = '1', [0x79] = '2', [0x7A] = '3', [0x7B] = '4', [0x7C] = '5', [0x7D] = '6', [0x7E] = '7', [0x7F] = '8', [0x80] = '9', [0x81] = '0', }; // ============================================================ // findBestMode // ============================================================ // // Enumerates all VESA VBE modes and selects the best match for the // requested resolution and color depth using a scoring algorithm. // // The approach is: call VBE function 0x4F00 to get the controller // info block (which contains a pointer to the mode list), then call // VBE function 0x4F01 for each mode to get its attributes. Each mode // is scored by getModeInfo() and the highest-scoring mode wins. // // Why scoring instead of exact-match: real VESA BIOSes vary wildly in // what modes they expose. Some have 640x480x16 but not x32; some only // have 800x600. The scoring heuristic picks the closest usable mode // rather than failing outright if the exact requested mode is absent. // // All VBE info block reads use DJGPP's far pointer API (_farpeekb/w/l) // to access the DPMI transfer buffer (__tb), which lives in the first // 1MB of address space (conventional memory). VBE BIOS calls use // real-mode interrupts via __dpmi_int(), so all data must pass through // the transfer buffer. static int32_t findBestMode(int32_t requestedW, int32_t requestedH, int32_t preferredBpp, uint16_t *outMode, DisplayT *d) { __dpmi_regs r; uint16_t bestMode = 0; int32_t bestScore = -1; DisplayT bestDisplay; memset(&bestDisplay, 0, sizeof(bestDisplay)); // Get VBE controller info — the transfer buffer (__tb) is the DJGPP- // provided region in conventional memory that real-mode BIOS calls // can read/write. We split it into seg:off for the INT 10h call. uint32_t infoSeg = __tb >> 4; uint32_t infoOff = __tb & 0x0F; // Writing "VBE2" tells the BIOS we want VBE 2.0+ extended info. // Without this, we'd get VBE 1.x info which lacks LFB addresses. _farpokeb(_dos_ds, __tb + 0, 'V'); _farpokeb(_dos_ds, __tb + 1, 'B'); _farpokeb(_dos_ds, __tb + 2, 'E'); _farpokeb(_dos_ds, __tb + 3, '2'); memset(&r, 0, sizeof(r)); r.x.ax = 0x4F00; r.x.es = infoSeg; r.x.di = infoOff; __dpmi_int(0x10, &r); // VBE functions return 0x004F in AX on success. Any other value // means the function failed or isn't supported. if (r.x.ax != 0x004F) { fprintf(stderr, "VBE: Function 0x4F00 failed (AX=0x%04X)\n", r.x.ax); return -1; } // On success the BIOS overwrites "VBE2" with "VESA" in the buffer char sig[5]; for (int32_t i = 0; i < 4; i++) { sig[i] = _farpeekb(_dos_ds, __tb + i); } sig[4] = '\0'; if (strcmp(sig, "VESA") != 0) { fprintf(stderr, "VBE: Bad signature '%s'\n", sig); return -1; } // VBE 2.0+ is required for LFB (Linear Frame Buffer) support. // VBE 1.x only supports bank switching, which we explicitly don't // implement — the complexity isn't worth it for 486+ targets. uint16_t vbeVersion = _farpeekw(_dos_ds, __tb + 4); if (vbeVersion < 0x0200) { fprintf(stderr, "VBE: Version %d.%d too old (need 2.0+)\n", vbeVersion >> 8, vbeVersion & 0xFF); return -1; } // The mode list is a far pointer (seg:off) at offset 14 in the info // block. It points to a null-terminated (0xFFFF) array of mode numbers // in conventional memory. uint16_t modeListOff = _farpeekw(_dos_ds, __tb + 14); uint16_t modeListSeg = _farpeekw(_dos_ds, __tb + 16); uint32_t modeListAddr = ((uint32_t)modeListSeg << 4) + modeListOff; // Walk the mode list. Cap at 256 to prevent runaway on corrupt BIOS // data (real hardware rarely has more than ~50 modes). for (int32_t i = 0; i < 256; i++) { uint16_t mode = _farpeekw(_dos_ds, modeListAddr + i * 2); if (mode == 0xFFFF) { break; } DisplayT candidate; int32_t score = 0; memset(&candidate, 0, sizeof(candidate)); getModeInfo(mode, &candidate, &score, requestedW, requestedH, preferredBpp); if (score > bestScore) { bestScore = score; bestMode = mode; bestDisplay = candidate; } } if (bestScore < 0) { fprintf(stderr, "VBE: No suitable mode found for %ldx%ld\n", (long)requestedW, (long)requestedH); return -1; } *outMode = bestMode; *d = bestDisplay; return 0; } // ============================================================ // platformVideoEnumModes // ============================================================ void platformVideoEnumModes(void (*cb)(int32_t w, int32_t h, int32_t bpp, void *userData), void *userData) { __dpmi_regs r; memset(&r, 0, sizeof(r)); r.x.ax = 0x4F00; r.x.es = __tb >> 4; r.x.di = __tb & 0x0F; // Write "VBE2" signature to request VBE 2.0+ info _farpokeb(_dos_ds, __tb + 0, 'V'); _farpokeb(_dos_ds, __tb + 1, 'B'); _farpokeb(_dos_ds, __tb + 2, 'E'); _farpokeb(_dos_ds, __tb + 3, '2'); __dpmi_int(0x10, &r); if (r.x.ax != 0x004F) { return; } uint16_t modeListOff = _farpeekw(_dos_ds, __tb + 14); uint16_t modeListSeg = _farpeekw(_dos_ds, __tb + 16); uint32_t modeListAddr = ((uint32_t)modeListSeg << 4) + modeListOff; for (int32_t i = 0; i < 256; i++) { uint16_t mode = _farpeekw(_dos_ds, modeListAddr + i * 2); if (mode == 0xFFFF) { break; } memset(&r, 0, sizeof(r)); r.x.ax = 0x4F01; r.x.cx = mode; r.x.es = __tb >> 4; r.x.di = __tb & 0x0F; __dpmi_int(0x10, &r); if (r.x.ax != 0x004F) { continue; } uint16_t attr = _farpeekw(_dos_ds, __tb + 0); // Only report LFB-capable graphics modes if (!(attr & 0x0080) || !(attr & 0x0010)) { continue; } int32_t w = _farpeekw(_dos_ds, __tb + 18); int32_t h = _farpeekw(_dos_ds, __tb + 20); int32_t bpp = _farpeekb(_dos_ds, __tb + 25); cb(w, h, bpp, userData); } } // ============================================================ // getModeInfo // ============================================================ // // Queries VBE mode info (function 0x4F01) for a single mode and // scores it against the requested parameters. The scoring algorithm: // // Base score by bpp: 16-bit=100, 15-bit=90, 32-bit=85, 8-bit=70 // +20 if bpp matches preferredBpp // +10 if exact resolution match, -10 if oversize // -1 (rejected) if mode lacks LFB, is text-mode, is below requested // resolution, or uses an unsupported bpp (e.g. 24-bit) // // 16-bit is preferred over 32-bit because it's twice as fast for // span fill/copy on a 486/Pentium bus (half the bytes). 15-bit scores // slightly below 16-bit because some VESA BIOSes report 15bpp modes // as 16bpp with a dead high bit, causing confusion. 8-bit scores // lowest because palette management adds complexity. // // 24-bit is explicitly rejected (not 8/15/16/32) because its 3-byte // pixels can't use dword-aligned rep stosl fills without masking. // // The physical LFB address is temporarily stored in d->lfb as a raw // integer cast — it will be properly mapped via DPMI in mapLfb() later. static void getModeInfo(uint16_t mode, DisplayT *d, int32_t *score, int32_t requestedW, int32_t requestedH, int32_t preferredBpp) { __dpmi_regs r; *score = -1; memset(&r, 0, sizeof(r)); r.x.ax = 0x4F01; r.x.cx = mode; r.x.es = __tb >> 4; r.x.di = __tb & 0x0F; __dpmi_int(0x10, &r); if (r.x.ax != 0x004F) { return; } // VBE mode attribute word at offset 0: // bit 7 = LFB available, bit 4 = graphics mode (not text) // Both are required — we never bank-switch and never want text modes. uint16_t attr = _farpeekw(_dos_ds, __tb + 0); if (!(attr & 0x0080)) { return; } if (!(attr & 0x0010)) { return; } int32_t w = _farpeekw(_dos_ds, __tb + 18); int32_t h = _farpeekw(_dos_ds, __tb + 20); int32_t bpp = _farpeekb(_dos_ds, __tb + 25); int32_t pitch = _farpeekw(_dos_ds, __tb + 16); uint32_t physAddr = _farpeekl(_dos_ds, __tb + 40); // Must match or exceed requested resolution if (w < requestedW || h < requestedH) { return; } // Must be a supported bpp if (bpp != 8 && bpp != 15 && bpp != 16 && bpp != 32) { return; } // Score this mode int32_t s = 0; if (bpp == 16) { s = 100; } else if (bpp == 15) { s = 90; } else if (bpp == 32) { s = 85; } else if (bpp == 8) { s = 70; } // Prefer the user's preferred bpp if (bpp == preferredBpp) { s += 20; } // Exact resolution match is preferred if (w == requestedW && h == requestedH) { s += 10; } else { s -= 10; } *score = s; // Fill in display info d->width = w; d->height = h; d->pitch = pitch; d->format.bitsPerPixel = bpp; d->format.bytesPerPixel = (bpp + 7) / 8; // Read the channel mask layout from the VBE mode info block. // These offsets (31-36) define the bit position and size of each // color channel. This is essential because the channel layout // varies: some cards use RGB565, others BGR565, etc. if (bpp >= 15) { int32_t redSize = _farpeekb(_dos_ds, __tb + 31); int32_t redPos = _farpeekb(_dos_ds, __tb + 32); int32_t greenSize = _farpeekb(_dos_ds, __tb + 33); int32_t greenPos = _farpeekb(_dos_ds, __tb + 34); int32_t blueSize = _farpeekb(_dos_ds, __tb + 35); int32_t bluePos = _farpeekb(_dos_ds, __tb + 36); d->format.redBits = redSize; d->format.redShift = redPos; d->format.redMask = ((1U << redSize) - 1) << redPos; d->format.greenBits = greenSize; d->format.greenShift = greenPos; d->format.greenMask = ((1U << greenSize) - 1) << greenPos; d->format.blueBits = blueSize; d->format.blueShift = bluePos; d->format.blueMask = ((1U << blueSize) - 1) << bluePos; } // Store physical address in lfb field temporarily (will be remapped) d->lfb = (uint8_t *)(uintptr_t)physAddr; } // ============================================================ // mapLfb // ============================================================ // // Maps the video card's physical LFB address into the DPMI linear // address space, then converts it to a near pointer for direct C // access. // // The mapping process has three steps: // 1. __dpmi_physical_address_mapping() — asks the DPMI host to // create a linear address mapping for the physical framebuffer. // This is necessary because DPMI runs in protected mode with // paging; physical addresses aren't directly accessible. // 2. __dpmi_lock_linear_region() — pins the mapped pages so they // can't be swapped out. The LFB is memory-mapped I/O to the // video card; paging it would be catastrophic. // 3. __djgpp_nearptr_enable() — disables DJGPP's default segment // limit checking so we can use plain C pointers to access the // LFB address. Without this, all LFB access would require far // pointer calls (_farpokeb etc.), which are much slower because // each one involves a segment register load. // // Why near pointers: the performance difference is dramatic. // platformFlushRect() copies thousands of dwords per frame using // rep movsl — this only works with near pointers. Far pointer access // would add ~10 cycles per byte and make 60fps impossible on a 486. // // The final pointer calculation adds __djgpp_conventional_base, which // is the offset DJGPP applies to convert linear addresses to near // pointer addresses (compensating for the DS segment base). static int32_t mapLfb(DisplayT *d, uint32_t physAddr) { __dpmi_meminfo info; uint32_t fbSize = (uint32_t)d->pitch * (uint32_t)d->height; info.address = physAddr; info.size = fbSize; if (__dpmi_physical_address_mapping(&info) != 0) { fprintf(stderr, "VBE: Failed to map LFB at 0x%08lX\n", (unsigned long)physAddr); return -1; } __dpmi_meminfo lockInfo; lockInfo.address = info.address; lockInfo.size = fbSize; __dpmi_lock_linear_region(&lockInfo); if (__djgpp_nearptr_enable() == 0) { fprintf(stderr, "VBE: Failed to enable near pointers\n"); return -1; } // Convert linear address to near pointer by adding the DS base offset d->lfb = (uint8_t *)(info.address + __djgpp_conventional_base); return 0; } // ============================================================ // platformAltScanToChar // ============================================================ char platformAltScanToChar(int32_t scancode) { if (scancode < 0 || scancode > 255) { return 0; } return sAltScanToAscii[scancode]; } // ============================================================ // platformFlushRect // ============================================================ // // Copies a dirty rectangle from the system RAM backbuffer to the LFB. // This is the critical path for display updates — the compositor calls // it once per dirty rect per frame. // // Two code paths: // 1. Full-width: if the rect spans the entire scanline (rowBytes == // pitch), collapse all rows into a single large rep movsl. This // avoids per-row loop overhead and is the common case for full- // screen redraws. // 2. Partial-width: copy each scanline individually with rep movsl, // advancing src/dst by pitch (not rowBytes) between rows. // // rep movsl is used instead of memcpy() because on 486/Pentium, GCC's // memcpy may not generate the optimal dword-aligned string move, and // the DJGPP C library's memcpy isn't always tuned for large copies. // The explicit asm guarantees exactly the instruction sequence we want. // // __builtin_expect hints tell GCC to generate branch-free fast paths // for the common cases (non-zero w/h, no trailing bytes). void platformFlushRect(const DisplayT *d, const RectT *r) { int32_t bpp = d->format.bytesPerPixel; int32_t x = r->x; int32_t y = r->y; int32_t w = r->w; int32_t h = r->h; if (__builtin_expect(w <= 0 || h <= 0, 0)) { return; } int32_t rowBytes = w * bpp; int32_t pitch = d->pitch; uint8_t *src = d->backBuf + y * pitch + x * bpp; uint8_t *dst = d->lfb + y * pitch + x * bpp; // Full-width flush: single large copy if (rowBytes == pitch) { int32_t totalBytes = pitch * h; int32_t dwords = totalBytes >> 2; int32_t remainder = totalBytes & 3; __asm__ __volatile__ ( "rep movsl" : "+D"(dst), "+S"(src), "+c"(dwords) : : "memory" ); while (remainder-- > 0) { *dst++ = *src++; } } else { // Partial scanlines — copy row by row with rep movsd int32_t dwords = rowBytes >> 2; int32_t remainder = rowBytes & 3; for (int32_t i = 0; i < h; i++) { int32_t dc = dwords; uint8_t *s = src; uint8_t *dd = dst; __asm__ __volatile__ ( "rep movsl" : "+D"(dd), "+S"(s), "+c"(dc) : : "memory" ); if (__builtin_expect(remainder > 0, 0)) { int32_t rem = remainder; while (rem-- > 0) { *dd++ = *s++; } } src += pitch; dst += pitch; } } } // ============================================================ // platformInit // ============================================================ void platformInit(void) { // Disable Ctrl+C/Break so the user can't accidentally kill the // GUI while in graphics mode (which would leave the display in // an unusable state without restoring text mode first). signal(SIGINT, SIG_IGN); } // ============================================================ // platformKeyboardGetModifiers // ============================================================ // // Returns the current modifier key state via INT 16h function 12h // (enhanced get extended shift flags). The low byte contains: // bit 0 = right shift, bit 1 = left shift // bit 2 = ctrl, bit 3 = alt // The widget system uses these bits for keyboard accelerators // (Alt+key) and text editing shortcuts (Ctrl+C/V/X). int32_t platformKeyboardGetModifiers(void) { __dpmi_regs r; memset(&r, 0, sizeof(r)); r.x.ax = 0x1200; __dpmi_int(0x16, &r); return r.x.ax & 0xFF; } // ============================================================ // platformKeyboardRead // ============================================================ // // Non-blocking keyboard read using enhanced INT 16h functions. // // Uses the "enhanced" functions (10h/11h) rather than the original // (00h/01h) because the originals can't distinguish between grey // and numpad arrow keys, and they don't report F11/F12. The enhanced // functions have been standard since AT-class machines (1984). // // The two-step peek-then-read is necessary because function 10h // (read key) blocks until a key is available — there's no non-blocking // read in the BIOS API. Function 11h (check key) peeks without // consuming, letting us poll without blocking the event loop. bool platformKeyboardRead(PlatformKeyEventT *evt) { __dpmi_regs r; // Peek: function 11h sets ZF if buffer is empty r.x.ax = 0x1100; __dpmi_int(0x16, &r); // Test the Zero Flag (bit 6 of the flags register) if (r.x.flags & 0x40) { return false; } // Consume: function 10h removes the key from the BIOS buffer. // AH = scan code, AL = ASCII character (0 for extended keys). r.x.ax = 0x1000; __dpmi_int(0x16, &r); evt->scancode = (r.x.ax >> 8) & 0xFF; evt->ascii = r.x.ax & 0xFF; // Enhanced INT 16h uses 0xE0 as the ASCII byte for grey/extended // keys (arrows, Home, End, Insert, Delete on 101-key keyboards). // Normalize to 0 so the rest of the codebase can use a single // "ascii == 0 means extended key, check scancode" convention. if (evt->ascii == 0xE0) { evt->ascii = 0; } return true; } // ============================================================ // platformMouseInit // ============================================================ // // Initializes the INT 33h mouse driver. The mouse driver is a TSR // (or emulated by the DOS environment) that tracks position and // buttons independently of the application. // // We must set the movement range to match our VESA resolution, // because the default range may be 640x200 (CGA text mode). // Without this, mouse coordinates would be wrong or clipped. // // The hardware cursor is never shown — DVX composites its own // software cursor on top of the backbuffer. We only use INT 33h // for position/button state via polling (function 03h). void platformMouseInit(int32_t screenW, int32_t screenH) { __dpmi_regs r; // Function 00h: reset driver, detect mouse hardware memset(&r, 0, sizeof(r)); r.x.ax = 0x0000; __dpmi_int(0x33, &r); // Function 07h: set horizontal min/max range memset(&r, 0, sizeof(r)); r.x.ax = 0x0007; r.x.cx = 0; r.x.dx = screenW - 1; __dpmi_int(0x33, &r); // Function 08h: set vertical min/max range memset(&r, 0, sizeof(r)); r.x.ax = 0x0008; r.x.cx = 0; r.x.dx = screenH - 1; __dpmi_int(0x33, &r); // Function 04h: warp cursor to center of screen memset(&r, 0, sizeof(r)); r.x.ax = 0x0004; r.x.cx = screenW / 2; r.x.dx = screenH / 2; __dpmi_int(0x33, &r); } // ============================================================ // platformMousePoll // ============================================================ // // Reads current mouse state via INT 33h function 03h. // Returns: CX=X position, DX=Y position, BX=button state // (bit 0 = left, bit 1 = right, bit 2 = middle). // // Polling is used instead of a callback/event model because the // DVX event loop already runs at frame rate. Installing a real-mode // callback for mouse events would add DPMI mode-switch overhead // on every mickeyed movement, which is wasteful when we only sample // once per frame anyway. void platformMousePoll(int32_t *mx, int32_t *my, int32_t *buttons) { __dpmi_regs r; memset(&r, 0, sizeof(r)); r.x.ax = 0x0003; __dpmi_int(0x33, &r); *mx = r.x.cx; *my = r.x.dx; *buttons = r.x.bx; } // ============================================================ // platformSpanCopy8 // ============================================================ // // Copies 'count' 8-bit pixels from src to dst using dword-aligned // rep movsl for the bulk transfer. // // All span operations (Copy8/16/32, Fill8/16/32) follow the same // pattern: align to a dword boundary, do the bulk as rep movsl or // rep stosl, then handle the remainder. This pattern exists because // on 486/Pentium, misaligned dword moves incur a 3-cycle penalty per // access. Aligning first ensures the critical rep loop runs at full // bus speed. // // rep movsl moves 4 bytes per iteration with hardware loop decrement, // which is faster than a C for-loop — the CPU string move pipeline // optimizes sequential memory access patterns. void platformSpanCopy8(uint8_t *dst, const uint8_t *src, int32_t count) { // Align dst to a dword boundary with byte copies while (((uintptr_t)dst & 3) && count > 0) { *dst++ = *src++; count--; } if (count >= 4) { int32_t dwordCount = count >> 2; __asm__ __volatile__ ( "rep movsl" : "+D"(dst), "+S"(src), "+c"(dwordCount) : : "memory" ); dst += dwordCount * 4; src += dwordCount * 4; } int32_t rem = count & 3; while (rem-- > 0) { *dst++ = *src++; } } // ============================================================ // platformSpanCopy16 // ============================================================ // // Copies 'count' 16-bit pixels. Since each pixel is 2 bytes, we // only need to check bit 1 of the address for dword alignment // (bit 0 is always clear for 16-bit aligned data). A single // leading pixel copy brings us to a dword boundary, then rep movsl // copies pixel pairs as dwords. void platformSpanCopy16(uint8_t *dst, const uint8_t *src, int32_t count) { // Copy one pixel to reach dword alignment if needed if (((uintptr_t)dst & 2) && count > 0) { *(uint16_t *)dst = *(const uint16_t *)src; dst += 2; src += 2; count--; } if (count >= 2) { int32_t dwordCount = count >> 1; __asm__ __volatile__ ( "rep movsl" : "+D"(dst), "+S"(src), "+c"(dwordCount) : : "memory" ); dst += dwordCount * 4; src += dwordCount * 4; } if (count & 1) { *(uint16_t *)dst = *(const uint16_t *)src; } } // ============================================================ // platformSpanCopy32 // ============================================================ // // 32-bit pixels are inherently dword-aligned, so no alignment // preamble is needed — straight to rep movsl. void platformSpanCopy32(uint8_t *dst, const uint8_t *src, int32_t count) { __asm__ __volatile__ ( "rep movsl" : "+D"(dst), "+S"(src), "+c"(count) : : "memory" ); } // ============================================================ // platformSpanFill8 // ============================================================ // // Fills 'count' 8-bit pixels with a single color value. // The 8-bit value is replicated into all four bytes of a dword so // that rep stosl writes 4 identical pixels per iteration. This is // 4x faster than byte-at-a-time for large fills (window backgrounds, // screen clears). void platformSpanFill8(uint8_t *dst, uint32_t color, int32_t count) { uint8_t c = (uint8_t)color; uint32_t dword = (uint32_t)c | ((uint32_t)c << 8) | ((uint32_t)c << 16) | ((uint32_t)c << 24); // Align to 4 bytes — skip if already aligned if (__builtin_expect((uintptr_t)dst & 3, 0)) { while (((uintptr_t)dst & 3) && count > 0) { *dst++ = c; count--; } } if (count >= 4) { int32_t dwordCount = count >> 2; __asm__ __volatile__ ( "rep stosl" : "+D"(dst), "+c"(dwordCount) : "a"(dword) : "memory" ); dst += dwordCount * 4; } int32_t rem = count & 3; while (rem-- > 0) { *dst++ = c; } } // ============================================================ // platformSpanFill16 // ============================================================ // // Fills 'count' 16-bit pixels. Two pixels are packed into a dword // (low half = first pixel, high half = second pixel) so rep stosl // writes 2 pixels per iteration. void platformSpanFill16(uint8_t *dst, uint32_t color, int32_t count) { uint16_t c = (uint16_t)color; // Handle odd leading pixel for dword alignment if (((uintptr_t)dst & 2) && count > 0) { *(uint16_t *)dst = c; dst += 2; count--; } // Fill pairs of pixels as 32-bit dwords if (count >= 2) { uint32_t dword = ((uint32_t)c << 16) | c; int32_t dwordCount = count >> 1; __asm__ __volatile__ ( "rep stosl" : "+D"(dst), "+c"(dwordCount) : "a"(dword) : "memory" ); dst += dwordCount * 4; } // Handle trailing odd pixel if (count & 1) { *(uint16_t *)dst = c; } } // ============================================================ // platformSpanFill32 // ============================================================ // // 32-bit fill is the simplest case — each pixel is already a dword, // so rep stosl writes exactly one pixel per iteration with no // alignment or packing concerns. void platformSpanFill32(uint8_t *dst, uint32_t color, int32_t count) { __asm__ __volatile__ ( "rep stosl" : "+D"(dst), "+c"(count) : "a"(color) : "memory" ); } // ============================================================ // platformValidateFilename — DOS 8.3 filename validation // ============================================================ // // Validates that a filename conforms to DOS 8.3 conventions: // - Base name: 1-8 chars, extension: 0-3 chars, one dot max // - No spaces or special characters that DOS can't handle // - Not a reserved device name (CON, PRN, AUX, NUL, COMn, LPTn) // // The reserved name check compares the base name only (before the // dot), case-insensitive, because DOS treats "CON.TXT" the same // as the CON device — the extension is ignored for device names. // // Returns NULL on success, or a human-readable error string on failure. // On non-DOS platforms, this function would be replaced with one that // validates for that platform's filesystem rules. const char *platformValidateFilename(const char *name) { static const char *reserved[] = { "CON", "PRN", "AUX", "NUL", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9", NULL }; if (!name || name[0] == '\0') { return "Filename must not be empty."; } // Split into base and extension const char *dot = strrchr(name, '.'); int32_t baseLen; int32_t extLen; if (dot) { baseLen = (int32_t)(dot - name); extLen = (int32_t)strlen(dot + 1); } else { baseLen = (int32_t)strlen(name); extLen = 0; } if (baseLen < 1 || baseLen > 8) { return "Filename must be 1-8 characters before the extension."; } if (extLen > 3) { return "Extension must be 3 characters or fewer."; } // Check for invalid characters for (const char *p = name; *p; p++) { if (*p == '.') { continue; } if (*p < '!' || *p > '~') { return "Filename contains invalid characters."; } if (strchr(" \"*+,/:;<=>?[\\]|", *p)) { return "Filename contains invalid characters."; } } // Check for multiple dots if (dot && strchr(name, '.') != dot) { return "Filename may contain only one dot."; } // Check reserved device names (compare base only, case-insensitive) char base[9]; int32_t copyLen = baseLen < 8 ? baseLen : 8; for (int32_t i = 0; i < copyLen; i++) { base[i] = toupper((unsigned char)name[i]); } base[copyLen] = '\0'; for (const char **r = reserved; *r; r++) { if (strcmp(base, *r) == 0) { return "That name is a reserved device name."; } } return NULL; } // ============================================================ // platformVideoInit // ============================================================ // // Complete video initialization sequence: // 1. findBestMode() — enumerate VESA modes and pick the best match // 2. setVesaMode() — actually switch to the chosen mode with LFB // 3. mapLfb() — DPMI-map the physical framebuffer into linear memory // 4. Allocate system RAM backbuffer (same size as LFB) // 5. Set up 8-bit palette if needed // 6. Initialize clip rect to full display // // The backbuffer is allocated in system RAM rather than drawing // directly to the LFB because: (a) reads from the LFB are extremely // slow on ISA/VLB/PCI (uncached MMIO), so any compositing that reads // pixels would crawl; (b) the dirty rect system only flushes changed // regions, so most of the LFB is never touched per frame. int32_t platformVideoInit(DisplayT *d, int32_t requestedW, int32_t requestedH, int32_t preferredBpp) { uint16_t bestMode; uint32_t physAddr; memset(d, 0, sizeof(*d)); // Find the best VESA mode if (findBestMode(requestedW, requestedH, preferredBpp, &bestMode, d) != 0) { return -1; } // Save the physical address before we overwrite it physAddr = (uint32_t)(uintptr_t)d->lfb; // Set the mode if (setVesaMode(bestMode) != 0) { return -1; } // Map the LFB if (mapLfb(d, physAddr) != 0) { return -1; } // Allocate backbuffer uint32_t fbSize = (uint32_t)d->pitch * (uint32_t)d->height; d->backBuf = (uint8_t *)malloc(fbSize); if (!d->backBuf) { fprintf(stderr, "VBE: Failed to allocate %lu byte backbuffer\n", (unsigned long)fbSize); __djgpp_nearptr_disable(); return -1; } memset(d->backBuf, 0, fbSize); // Set up palette for 8-bit mode if (d->format.bitsPerPixel == 8) { d->palette = (uint8_t *)malloc(768); if (!d->palette) { fprintf(stderr, "VBE: Failed to allocate palette\n"); free(d->backBuf); d->backBuf = NULL; __djgpp_nearptr_disable(); return -1; } dvxGeneratePalette(d->palette); platformVideoSetPalette(d->palette, 0, 256); } // Initialize clip rect to full display d->clipX = 0; d->clipY = 0; d->clipW = d->width; d->clipH = d->height; fprintf(stderr, "VBE: Mode 0x%04X set: %ldx%ldx%ld, pitch=%ld\n", bestMode, (long)d->width, (long)d->height, (long)d->format.bitsPerPixel, (long)d->pitch); return 0; } // ============================================================ // platformVideoSetPalette // ============================================================ // // Programs the VGA DAC palette registers via direct port I/O. // Port 0x3C8 = write index, port 0x3C9 = data (auto-increments). // // The VGA DAC expects 6-bit values (0-63) but our palette stores // 8-bit values (0-255), hence the >> 2 shift. This is standard // VGA behavior dating back to the original IBM VGA in 1987. // // Direct port I/O is used instead of VBE function 09h (set palette) // because the VGA DAC ports are faster (no BIOS call overhead) and // universally compatible — even VBE 3.0 cards still have the standard // VGA DAC at ports 0x3C8/0x3C9. void platformVideoSetPalette(const uint8_t *pal, int32_t firstEntry, int32_t count) { outportb(0x3C8, (uint8_t)firstEntry); for (int32_t i = 0; i < count; i++) { int32_t idx = (firstEntry + i) * 3; outportb(0x3C9, pal[idx + 0] >> 2); outportb(0x3C9, pal[idx + 1] >> 2); outportb(0x3C9, pal[idx + 2] >> 2); } } // ============================================================ // platformVideoShutdown // ============================================================ // // Tears down the graphics mode and restores standard 80x25 text // mode (BIOS mode 3) so the user gets their DOS prompt back. // Also frees the backbuffer, palette, and disables near pointers // (re-enables DJGPP's segment limit checking for safety). void platformVideoShutdown(DisplayT *d) { // INT 10h function 00h, mode 03h = 80x25 color text __dpmi_regs r; memset(&r, 0, sizeof(r)); r.x.ax = 0x0003; __dpmi_int(0x10, &r); if (d->backBuf) { free(d->backBuf); d->backBuf = NULL; } if (d->palette) { free(d->palette); d->palette = NULL; } d->lfb = NULL; __djgpp_nearptr_disable(); } // ============================================================ // platformYield // ============================================================ // // Cooperative yield to the DPMI host. In a multitasking DOS // environment (Windows 3.x DOS box, OS/2 VDM, or DESQview), // this gives other tasks a chance to run. Under a single-tasking // DPMI server (CWSDPMI) it's essentially a no-op, but it doesn't // hurt. Called once per event loop iteration when idle. void platformYield(void) { __dpmi_yield(); } // ============================================================ // setVesaMode // ============================================================ // // Sets a VBE video mode via function 0x4F02. Bit 14 of the mode // number tells the BIOS to enable the Linear Frame Buffer instead // of the default banked mode. This is the only mode we support. static int32_t setVesaMode(uint16_t mode) { __dpmi_regs r; memset(&r, 0, sizeof(r)); r.x.ax = 0x4F02; r.x.bx = mode | 0x4000; // bit 14 = use LFB __dpmi_int(0x10, &r); if (r.x.ax != 0x004F) { fprintf(stderr, "VBE: Failed to set mode 0x%04X (AX=0x%04X)\n", mode, r.x.ax); return -1; } return 0; }