// ============================================================================ // windrv.c - Main driver interface // // Implements the public windrv.h API by coordinating the NE loader, // thunking layer, and Windows API stubs to load and use Windows 3.x // display drivers from DOS programs compiled with DJGPP. // ============================================================================ #include #include #include #include #include #include #include #include #include #include #include #include "windrv.h" #include "wintypes.h" #include "winddi.h" #include "neformat.h" #include "neload.h" #include "thunk.h" #include "winstub.h" #include "log.h" // ============================================================================ // Driver instance structure (opaque handle) // ============================================================================ struct WdrvDriverS { NeModuleT neMod; char filePath[256]; // DDI entry point addresses (16-bit selector:offset) struct { uint16_t sel; uint16_t off; bool present; } ddiEntry[DDI_MAX_ORDINAL]; // Device info from Enable (style=0) call GdiInfo16T gdiInfo; bool gdiInfoValid; // GDI objects embedded within DGROUP. // Windows 3.x drivers expect all GDI objects (PDEVICE, brush, // drawMode) to share the same segment, because in Win3.1 they // are all in the global GDI heap. When the driver does e.g. // "lds si, lpBrush" it expects DS to still cover DGROUP. // We achieve this by allocating objects at offsets within the // DGROUP segment, so every far pointer uses autoDataSel. uint32_t dgroupObjBase; // Start offset of object area in DGROUP // Physical device structure (within DGROUP) uint16_t pdevOff; // Offset within DGROUP uint32_t pdevLinear; // Linear address for C access uint32_t pdevSize; // Allocated size // Logical brush (within DGROUP, input to RealizeObject) uint16_t logBrushOff; uint32_t logBrushLinear; // Physical brush (within DGROUP, output of RealizeObject) uint16_t brushOff; uint32_t brushLinear; uint32_t brushRealizedColor; // Color of last realized brush bool brushRealized; // Logical pen (within DGROUP, input to RealizeObject) uint16_t logPenOff; uint32_t logPenLinear; // Physical pen (within DGROUP, output of RealizeObject) uint16_t penOff; uint32_t penLinear; uint32_t penRealizedColor; bool penRealized; // Physical color (within DGROUP, output of ColorInfo) uint16_t physColorOff; uint32_t physColorLinear; // Draw mode (within DGROUP) uint16_t drawModeOff; uint32_t drawModeLinear; // Current state bool enabled; uint32_t currentColor; // Video RAM mapping void *vramPtr; uint32_t vramPhysAddr; uint32_t vramSize; uint32_t vramLinear; int32_t pitch; // Display Y offset: the S3 driver writes an 8x8 color brush pattern // to a fixed VRAM location (~(144,1)-(151,8)) during dithered fills. // We shift the CRTC display start down by this many scanlines so the // scratch area is off-screen, and add the offset to all Y coordinates. int16_t dispYOffset; bool isS3; }; // ============================================================================ // Global state // ============================================================================ static ThunkContextT gThunkCtx; static StubContextT gStubCtx; static bool gInitialized = false; static int32_t gLastError = WDRV_OK; static bool gDebug = false; static bool gIsS3 = false; // Forward declarations static FarPtr16T importResolver(const char *moduleName, uint16_t ordinal, const char *funcName); static bool resolveDriverEntries(struct WdrvDriverS *drv); static bool extendDgroupForObjects(struct WdrvDriverS *drv); static bool allocPDevice(struct WdrvDriverS *drv); static bool allocDrawMode(struct WdrvDriverS *drv); static bool allocBrushBuffers(struct WdrvDriverS *drv); static bool allocPenBuffers(struct WdrvDriverS *drv); static bool realizeBrush(struct WdrvDriverS *drv, uint32_t color); static bool realizePen(struct WdrvDriverS *drv, uint32_t color); static uint32_t colorToPhys(struct WdrvDriverS *drv, uint32_t colorRef); static void setDisplayStart(struct WdrvDriverS *drv, uint32_t byteOffset); static void freeDrawObjects(struct WdrvDriverS *drv); static uint16_t alloc16BitBlock(uint32_t size, uint32_t *linearOut); static void free16BitBlock(uint16_t sel, uint32_t linear); static void setError(int32_t err); static void waitForEngine(void); static void dbg(const char *fmt, ...); static void patchPrologs(NeModuleT *mod); static void patchVflatdStackBug(NeModuleT *mod); static void patchVflatdBypassCall(NeModuleT *mod); static bool installInt10hReflector(void); static void removeInt10hReflector(void); static bool installDpmi300Proxy(void); static void removeDpmi300Proxy(void); static bool patchDoInt10h(struct WdrvDriverS *drv); static bool patchBiosDataAccess(struct WdrvDriverS *drv); static void patchWinFlags(struct WdrvDriverS *drv, uint16_t oldFlags, uint16_t newFlags); static bool installInt2FhHandler(void); static void removeInt2FhHandler(void); static bool installExceptionCapture(void); static void removeExceptionCapture(void); // ============================================================================ // INT 10h (Video BIOS) reflector // // Win 3.x display drivers call INT 10h for video mode setting and BIOS // queries. In protected mode, these calls won't reach the real-mode BIOS // unless we intercept them and use DPMI to simulate a real-mode interrupt. // ============================================================================ static __dpmi_paddr gOldInt10hVec; static bool gInt10hInstalled = false; // Globals for the raw INT 10h handler assembly stub. // Non-static so the asm symbols (prefixed with _) are accessible. uint16_t gInt10hDsSel; // DJGPP DS selector uint32_t gInt10hSavedSS; // Interrupted SS uint32_t gInt10hSavedESP; // Interrupted ESP uint32_t gInt10hSavedFS; // Interrupted FS uint8_t gInt10hStack[4096] __attribute__((aligned(16))); // Handler stack uint32_t gInt10hStackTop; // Top of handler stack // ============================================================================ // Exception capture - captures primary fault CS:EIP before DJGPP's handler // (which may itself crash handling exceptions from 16-bit code). // // DPMI 0.9 exception frame on the locked exception stack: // ESP+0x00: Return EIP (to DPMI host, for RETF) // ESP+0x04: Return CS // ESP+0x08: Error code // ESP+0x0C: Faulting EIP // ESP+0x10: Faulting CS // ESP+0x14: Faulting EFLAGS // ESP+0x18: Faulting ESP // ESP+0x1C: Faulting SS // ============================================================================ volatile uint32_t gFaultCaptured = 0; volatile uint32_t gFaultNum = 0; volatile uint32_t gFaultErr = 0; volatile uint32_t gFaultEIP = 0; volatile uint32_t gFaultCS = 0; volatile uint32_t gFaultESP = 0; volatile uint32_t gFaultSS = 0; volatile uint32_t gFaultEAX = 0; volatile uint32_t gFaultEBX = 0; volatile uint32_t gFaultECX = 0; volatile uint32_t gFaultEDX = 0; volatile uint32_t gFaultESI = 0; volatile uint32_t gFaultEDI = 0; volatile uint32_t gFaultEBP = 0; volatile uint32_t gFaultDS = 0; volatile uint32_t gFaultES = 0; uint8_t gFaultStack[4096] __attribute__((aligned(16))); uint32_t gFaultStackTop; // Packed 48-bit far pointers for chaining to old exception handlers. // ljmp indirect reads offset32 + selector16 (6 bytes), so no padding allowed. typedef struct __attribute__((packed)) { uint32_t offset; uint16_t selector; } FarPtr48T; FarPtr48T gOldExc0dFar; FarPtr48T gOldExc0eFar; static __dpmi_paddr gOldExc0D; static __dpmi_paddr gOldExc0E; static bool gExcCaptureInstalled = false; static __dpmi_paddr gOldInt2FhVec; FarPtr48T gOldInt2FhFar; static bool gInt2FhInstalled = false; // Saved register state for the raw INT 10h handler. // Layout matches the save/restore sequence in the assembly stub. typedef struct __attribute__((packed)) { uint32_t edi; // +0 uint32_t esi; // +4 uint32_t ebp; // +8 uint32_t _reserved; // +12 (alignment padding) uint32_t ebx; // +16 uint32_t edx; // +20 uint32_t ecx; // +24 uint32_t eax; // +28 uint32_t es; // +32 (zero-extended from 16-bit) uint32_t ds; // +36 (zero-extended from 16-bit) uint32_t eip; // +40 (from IRET frame) uint32_t cs; // +44 (from IRET frame) uint32_t eflags; // +48 (from IRET frame) } Int10FrameT; // Non-static so the asm symbol _gInt10Frame is accessible. Int10FrameT gInt10Frame; // Worker function called from the assembly stub. // Non-static so the asm symbol _int10hWorker is accessible. void int10hWorker(Int10FrameT *frame) { __dpmi_regs rRegs; memset(&rRegs, 0, sizeof(rRegs)); uint16_t func = (uint16_t)frame->eax; // NOTE: No dbg()/logErr() here — file I/O from the INT 10h handler corrupts // callback state (observed: GlobalDOSAlloc params garbled after 4F15h stub). rRegs.x.ax = (uint16_t)frame->eax; rRegs.x.bx = (uint16_t)frame->ebx; rRegs.x.cx = (uint16_t)frame->ecx; rRegs.x.dx = (uint16_t)frame->edx; rRegs.x.si = (uint16_t)frame->esi; rRegs.x.di = (uint16_t)frame->edi; rRegs.x.bp = (uint16_t)frame->ebp; // VBE Set Mode: translate S3 OEM modes to VESA standard modes. if (func == 0x4F02) { uint16_t origBX = rRegs.x.bx; uint16_t modeNum = origBX & 0x3FFF; uint16_t flags = origBX & 0xC000; uint16_t vesaMode = modeNum; switch (modeNum) { case 0x0201: vesaMode = 0x0101; break; // 640x480x256 case 0x0202: vesaMode = 0x0103; break; // 800x600x256 case 0x0203: vesaMode = 0x0103; break; // 800x600x256 case 0x0204: vesaMode = 0x0105; break; // 1024x768x256 case 0x0205: vesaMode = 0x0105; break; // 1024x768x256 } if (vesaMode != modeNum) { rRegs.x.bx = flags | vesaMode; logErr("INT10: VBE mode 0x%04X -> 0x%04X (S3 OEM -> VESA)\n", origBX, rRegs.x.bx); } } // Stub out VBE functions we cannot support. if (func == 0x4F0A || func == 0x4F15) { frame->eax = (frame->eax & 0xFFFF0000) | 0x0100; return; } // ================================================================ // Translate ES for real-mode reflection. // // The driver's ES is a PM selector. Real-mode INT 10h expects a // real-mode paragraph segment. If ES points to conventional memory // (<1MB), compute the real-mode segment directly. If ES points to // extended memory (>=1MB), bounce through the DOS transfer buffer. // // Only specific sub-functions use ES as a buffer pointer. Each // function family uses a different offset register: // VBE 4Fxx: ES:DI // AH=10h: ES:DX (palette) // AH=11h: ES:BP (font data) // AH=1Bh: ES:DI (state info) // ================================================================ uint16_t pmES = (uint16_t)frame->es; bool useTB = false; uint32_t tb = 0; uint32_t copySize = 0; bool copyIn = false; // PM -> transfer buffer before INT bool copyOut = false; // transfer buffer -> PM after INT // Identify which offset register this function uses, and determine // the exact copy size and direction. pmOff holds the PM-side // offset from the appropriate register; offReg identifies which // real-mode register to update after translation. // 0 = DI, 1 = DX, 2 = BP uint16_t pmOff = 0; int offReg = 0; bool needsES = false; uint8_t ah = (uint8_t)(func >> 8); uint8_t al = (uint8_t)(func & 0xFF); if ((func & 0xFF00) == 0x4F00) { // VBE functions — ES:DI offReg = 0; pmOff = rRegs.x.di; if (al == 0x00) { needsES = true; copyIn = true; copyOut = true; copySize = 512; } else if (al == 0x01) { needsES = true; copyOut = true; copySize = 256; } else if (al == 0x04) { needsES = true; copyIn = true; copyOut = true; copySize = 1024; } else if (al == 0x09) { needsES = true; copyIn = true; copySize = rRegs.x.cx * 4; if (copySize > 4096) { copySize = 4096; } } } else if (ah == 0x10) { // Palette functions — ES:DX offReg = 1; pmOff = rRegs.x.dx; if (al == 0x02) { // Set All Palette Registers: 17 bytes (16 regs + overscan) needsES = true; copyIn = true; copySize = 17; } else if (al == 0x09) { // Read All Palette Registers: 17 bytes needsES = true; copyOut = true; copySize = 17; } else if (al == 0x12) { // Set Block of DAC Color Registers: CX * 3 bytes needsES = true; copyIn = true; copySize = rRegs.x.cx * 3; if (copySize > 4096) { copySize = 4096; } } else if (al == 0x17) { // Read Block of DAC Color Registers: CX * 3 bytes needsES = true; copyOut = true; copySize = rRegs.x.cx * 3; if (copySize > 4096) { copySize = 4096; } } } else if (ah == 0x11) { // Character generator — ES:BP offReg = 2; pmOff = rRegs.x.bp; if (al == 0x00 || al == 0x10) { // Load User Font: CX chars * BH bytes/char needsES = true; copyIn = true; copySize = rRegs.x.cx * (rRegs.x.bx >> 8); if (copySize > 8192) { copySize = 8192; } } // AL=20/21 set interrupt vectors to ES:BP — the address must // point at resident data, not a temporary buffer, so skip. } else if (ah == 0x1B) { // Functionality/State Info — ES:DI, 64-byte buffer offReg = 0; pmOff = rRegs.x.di; needsES = true; copyOut = true; copySize = 64; } if (pmES != 0 && needsES && copySize > 0) { unsigned long esBase; __dpmi_get_segment_base_address(pmES, &esBase); if (esBase < 0x100000) { // Conventional memory: compute real-mode ES + offset directly. uint32_t linear = esBase + pmOff; rRegs.x.es = (uint16_t)(linear >> 4); uint16_t rmOff = (uint16_t)(linear & 0x0F); if (offReg == 0) { rRegs.x.di = rmOff; } else if (offReg == 1) { rRegs.x.dx = rmOff; } else { rRegs.x.bp = rmOff; } } else { // Extended memory: bounce through the DOS transfer buffer. tb = _go32_info_block.linear_address_of_transfer_buffer; if (copyIn) { movedata(pmES, pmOff, _dos_ds, tb, copySize); } rRegs.x.es = (uint16_t)(tb >> 4); uint16_t rmOff = (uint16_t)(tb & 0x0F); if (offReg == 0) { rRegs.x.di = rmOff; } else if (offReg == 1) { rRegs.x.dx = rmOff; } else { rRegs.x.bp = rmOff; } useTB = true; } } __dpmi_simulate_real_mode_interrupt(0x10, &rRegs); if (useTB && copyOut) { movedata(_dos_ds, tb, pmES, pmOff, copySize); } // Update return registers. frame->eax = (frame->eax & 0xFFFF0000) | rRegs.x.ax; frame->ebx = (frame->ebx & 0xFFFF0000) | rRegs.x.bx; frame->ecx = (frame->ecx & 0xFFFF0000) | rRegs.x.cx; frame->edx = (frame->edx & 0xFFFF0000) | rRegs.x.dx; frame->esi = (frame->esi & 0xFFFF0000) | rRegs.x.si; frame->ebp = (frame->ebp & 0xFFFF0000) | rRegs.x.bp; frame->eflags = (frame->eflags & 0xFFFF0000) | rRegs.x.flags; if (!needsES) { // No ES translation was done — pass through real-mode DI frame->edi = (frame->edi & 0xFFFF0000) | rRegs.x.di; } // Log VBE failures if ((func & 0xFF00) == 0x4F00) { uint16_t retAX = (uint16_t)frame->eax; if (retAX != 0x004F) { logErr("INT10: VBE func %04X returned AX=%04X (FAILED)\n", func, retAX); } } } // Raw INT 10h handler stub in assembly. // // The _go32_dpmi_allocate_iret_wrapper mechanism fails when an interrupt // fires during 16-bit code execution — software interrupts are dispatched // on the CURRENT stack (DPMI spec), so the wrapper tries to build its // _go32_dpmi_registers structure on the 16-bit stack with a different // SS base, producing an invalid pointer (observed: regs=0x7a2, page fault). // // This handler avoids the problem by: // 1. Saving ALL registers to a global structure using CS-relative // addressing (CS base == DS base in DJGPP) // 2. Switching SS:ESP to a dedicated 32-bit handler stack in DJGPP's // data segment (so SS base == DS base, safe for C library calls) // 3. Calling the C worker function // 4. Restoring SS:ESP and all registers from the global structure // 5. Returning via IRET // // NOT re-entrant — uses global state. Acceptable because the handler // doesn't enable interrupts, and INT 10h is a software interrupt that // cannot nest (our worker uses DPMI INT 31h, not INT 10h). // // Uses FS for writes (code segments are read-only in protected mode). // CS-relative reads are fine (readable code segment). __asm__( " .text\n" " .p2align 4\n" " .globl _int10hRawHandler\n" "_int10hRawHandler:\n" // ---- Save original FS, then load FS with our writable DS selector ---- " pushl %eax\n" " pushl %ecx\n" " xorl %eax, %eax\n" " movw %fs, %ax\n" " movw %cs:_gInt10hDsSel, %cx\n" " movw %cx, %fs\n" " movl %eax, %fs:_gInt10hSavedFS\n" " popl %ecx\n" " popl %eax\n" // ---- Save all GP registers to global frame via FS (writable) ---- " movl %eax, %fs:_gInt10Frame+28\n" " movl %ecx, %fs:_gInt10Frame+24\n" " movl %edx, %fs:_gInt10Frame+20\n" " movl %ebx, %fs:_gInt10Frame+16\n" " movl %ebp, %fs:_gInt10Frame+8\n" " movl %esi, %fs:_gInt10Frame+4\n" " movl %edi, %fs:_gInt10Frame+0\n" // ---- Save segment registers (zero-extended to 32 bits) ---- " xorl %eax, %eax\n" " movw %es, %ax\n" " movl %eax, %fs:_gInt10Frame+32\n" " movw %ds, %ax\n" " movl %eax, %fs:_gInt10Frame+36\n" // ---- Save IRET frame from interrupted stack (SS:ESP) ---- " movl (%esp), %eax\n" " movl %eax, %fs:_gInt10Frame+40\n" " movl 4(%esp), %eax\n" " movl %eax, %fs:_gInt10Frame+44\n" " movl 8(%esp), %eax\n" " movl %eax, %fs:_gInt10Frame+48\n" // ---- Save SS:ESP and switch to DJGPP handler stack ---- " movl %esp, %fs:_gInt10hSavedESP\n" " xorl %eax, %eax\n" " movw %ss, %ax\n" " movl %eax, %fs:_gInt10hSavedSS\n" " movw %cs:_gInt10hDsSel, %ax\n" " movw %ax, %ds\n" " movw %ax, %es\n" " movw %ax, %ss\n" " movl _gInt10hStackTop, %esp\n" // ---- Call C worker: int10hWorker(&gInt10Frame) ---- " leal _gInt10Frame, %eax\n" " pushl %eax\n" " call _int10hWorker\n" " addl $4, %esp\n" // ---- Restore SS:ESP (back to interrupted code's stack) ---- " movl %cs:_gInt10hSavedESP, %eax\n" " movl %cs:_gInt10hSavedSS, %ecx\n" " movw %cx, %ss\n" " movl %eax, %esp\n" // ---- Write modified EFLAGS back to IRET frame on stack ---- // The C worker updates frame->eflags with real-mode return flags // (e.g. CF for VBE success/failure). Write it back so IRET uses it. " movl %cs:_gInt10Frame+48, %eax\n" " movl %eax, 8(%esp)\n" // ---- Restore GP registers from global frame (CS reads OK) ---- " movl %cs:_gInt10Frame+0, %edi\n" " movl %cs:_gInt10Frame+4, %esi\n" " movl %cs:_gInt10Frame+8, %ebp\n" " movl %cs:_gInt10Frame+16, %ebx\n" " movl %cs:_gInt10Frame+20, %edx\n" " movl %cs:_gInt10Frame+24, %ecx\n" // ---- Restore segment registers (FS/GS always set to DGROUP) ---- " movl %cs:_gCbDgroupSel, %eax\n" " movw %ax, %fs\n" " movw %ax, %gs\n" " movl %cs:_gInt10Frame+32, %eax\n" " movw %ax, %es\n" " movl %cs:_gInt10Frame+36, %eax\n" " movw %ax, %ds\n" // ---- Restore EAX last (was used as scratch) ---- " movl %cs:_gInt10Frame+28, %eax\n" " iret\n" ); // ============================================================================ // DPMI 0x300h (Simulate Real Mode Interrupt) proxy // // The VBESVGA driver's DoInt10h calls DPMI INT 31h AX=0300h from 16-bit // code to perform real-mode INT 10h for VBE BIOS calls. CWSDPMI does // not correctly service this DPMI function when the INT 31h originates // from a 16-bit code segment inside a 32-bit DPMI client. // // Fix: after the driver's entry point has been called (which patches // DoInt10h for 386 via SetupInt10h), we change the single "CD 31" // (INT 31h) instruction in DoInt10h to "CD 64" (INT 64h). Our INT 64h // handler reads the Real Mode Call Structure (RMCS) that DoInt10h built // on the 16-bit stack, calls __dpmi_simulate_real_mode_interrupt from // 32-bit code (which CWSDPMI handles correctly), and writes the results // back to the RMCS so DoInt10h can unpack them normally. // ============================================================================ #define DPMI300_INT_NUM 0x64 static __dpmi_paddr gOldDpmi300Vec; static bool gDpmi300Installed = false; // Globals for the raw handler assembly stub uint16_t gDpmi300DsSel; uint32_t gDpmi300SavedSS; uint32_t gDpmi300SavedESP; uint32_t gDpmi300SavedFS; uint32_t gDpmi300SavedDS; uint32_t gDpmi300SavedES; uint32_t gDpmi300SavedGS; uint32_t gDpmi300RmcsSel; // ES at time of interrupt (RMCS segment) uint32_t gDpmi300RmcsEdi; // EDI at time of interrupt (RMCS offset) uint32_t gDpmi300IntNum; // EBX at time of interrupt (BL=int number) uint8_t gDpmi300Stack[4096] __attribute__((aligned(16))); uint32_t gDpmi300StackTop; // Worker: reads RMCS, performs real-mode interrupt, writes results back. // The DPMI RMCS layout is byte-compatible with DJGPP's __dpmi_regs (50 bytes). void dpmi300Worker(void) { uint16_t rmcsSel = (uint16_t)gDpmi300RmcsSel; uint32_t rmcsOff = gDpmi300RmcsEdi; uint8_t intNum = (uint8_t)gDpmi300IntNum; __dpmi_regs regs; memset(®s, 0, sizeof(regs)); movedata(rmcsSel, rmcsOff, _my_ds(), (unsigned)®s, 50); dbg("DPMI300: INT %02Xh AX=%04X BX=%04X ES=%04X DI=%04X SS:SP=%04X:%04X\n", intNum, regs.x.ax, regs.x.bx, regs.x.es, regs.x.di, regs.x.ss, regs.x.sp); __dpmi_simulate_real_mode_interrupt(intNum, ®s); dbg("DPMI300: result AX=%04X\n", regs.x.ax); // Dump VBE info buffer contents for VBE 4F00h if (intNum == 0x10 && regs.x.ax == 0x004F) { uint32_t bufLin = (uint32_t)regs.x.es * 16 + regs.x.di; uint8_t hdr[32]; dosmemget(bufLin, 32, hdr); dbg("DPMI300: VBE buf[0..3]=%c%c%c%c ver=%02X%02X modes=%02X%02X:%02X%02X\n", hdr[0], hdr[1], hdr[2], hdr[3], hdr[5], hdr[4], hdr[0x0F], hdr[0x0E], hdr[0x11], hdr[0x10]); // Mode list pointer at offset 0x0E: offset(word) + segment(word) uint16_t modesOff = hdr[0x0E] | ((uint16_t)hdr[0x0F] << 8); uint16_t modesSeg = hdr[0x10] | ((uint16_t)hdr[0x11] << 8); dbg("DPMI300: VBE modes ptr %04X:%04X (buf at %04X:%04X)\n", modesSeg, modesOff, regs.x.es, regs.x.di); // Read first 16 mode numbers uint32_t modesLin = (uint32_t)modesSeg * 16 + modesOff; uint16_t modes[16]; dosmemget(modesLin, 32, modes); dbg("DPMI300: VBE modes:"); for (int i = 0; i < 16 && modes[i] != 0xFFFF; i++) { dbg(" %03X", modes[i]); } dbg("\n"); } movedata(_my_ds(), (unsigned)®s, rmcsSel, rmcsOff, 50); } extern void dpmi300RawHandler(void); // Raw INT 64h handler. Same save/restore pattern as the INT 10h reflector // but simpler: we only need the RMCS pointer (ES:EDI) and interrupt number // (BL) from the interrupted context. All GP and segment registers are // preserved across the call — the only visible side effect is that the // RMCS on the driver's stack is updated and the carry flag is cleared. __asm__( " .text\n" " .p2align 4\n" " .globl _dpmi300RawHandler\n" "_dpmi300RawHandler:\n" // ---- Save FS, load FS with our DS selector ---- " pushl %eax\n" " pushl %ecx\n" " xorl %eax, %eax\n" " movw %fs, %ax\n" " movw %cs:_gDpmi300DsSel, %cx\n" " movw %cx, %fs\n" " movl %eax, %fs:_gDpmi300SavedFS\n" // ---- Save communication values: ES (RMCS sel), EDI, EBX ---- " xorl %eax, %eax\n" " movw %es, %ax\n" " movl %eax, %fs:_gDpmi300SavedES\n" " movl %eax, %fs:_gDpmi300RmcsSel\n" " movl %edi, %fs:_gDpmi300RmcsEdi\n" " movl %ebx, %fs:_gDpmi300IntNum\n" // ---- Save remaining segment registers ---- " xorl %eax, %eax\n" " movw %ds, %ax\n" " movl %eax, %fs:_gDpmi300SavedDS\n" " movw %gs, %ax\n" " movl %eax, %fs:_gDpmi300SavedGS\n" // ---- Restore scratch, then PUSHAL to save all GP regs ---- " popl %ecx\n" " popl %eax\n" " pushal\n" // ---- Save interrupted SS:ESP and switch to handler stack ---- " movw %cs:_gDpmi300DsSel, %ax\n" " movw %ax, %fs\n" " movl %esp, %fs:_gDpmi300SavedESP\n" " xorl %eax, %eax\n" " movw %ss, %ax\n" " movl %eax, %fs:_gDpmi300SavedSS\n" " movw %fs:_gDpmi300DsSel, %ax\n" " movw %ax, %ds\n" " movw %ax, %es\n" " movw %ax, %ss\n" " movl _gDpmi300StackTop, %esp\n" // ---- Call C worker ---- " call _dpmi300Worker\n" // ---- Restore interrupted SS:ESP ---- " movl %cs:_gDpmi300SavedSS, %ecx\n" " movl %cs:_gDpmi300SavedESP, %eax\n" " movw %cx, %ss\n" " movl %eax, %esp\n" // ---- POPAL to restore all GP registers ---- " popal\n" // ---- Restore segment registers ---- " pushl %eax\n" " movl %cs:_gDpmi300SavedFS, %eax\n" " movw %ax, %fs\n" " movl %cs:_gDpmi300SavedGS, %eax\n" " movw %ax, %gs\n" " movl %cs:_gDpmi300SavedES, %eax\n" " movw %ax, %es\n" " movl %cs:_gDpmi300SavedDS, %eax\n" " movw %ax, %ds\n" " popl %eax\n" // ---- Clear carry flag in IRET frame EFLAGS (success) ---- " andl $0xFFFFFFFE, 8(%esp)\n" " iret\n" ); // Worker function for exception handler — logs full diagnostics and exits. // Non-static so the asm symbol _faultWorker is accessible. void faultWorker(void) { logErr("\n=== EXCEPTION #%" PRIu32 " ===\n", gFaultNum); logErr(" CS:EIP = %04" PRIX32 ":%08" PRIX32 " error=%04" PRIX32 "\n", gFaultCS, gFaultEIP, gFaultErr); logErr(" SS:ESP = %04" PRIX32 ":%08" PRIX32 "\n", gFaultSS, gFaultESP); logErr(" eax=%08" PRIX32 " ebx=%08" PRIX32 " ecx=%08" PRIX32 " edx=%08" PRIX32 "\n", gFaultEAX, gFaultEBX, gFaultECX, gFaultEDX); logErr(" esi=%08" PRIX32 " edi=%08" PRIX32 " ebp=%08" PRIX32 "\n", gFaultESI, gFaultEDI, gFaultEBP); logErr(" ds=%04" PRIX32 " es=%04" PRIX32 "\n", gFaultDS, gFaultES); // Dump instruction bytes at CS:EIP using _farpeekb // (movedata fails on 16-bit code segments in fault context) uint16_t faultSel = (uint16_t)gFaultCS; uint32_t faultOff = gFaultEIP; unsigned long csBase; if (__dpmi_get_segment_base_address(faultSel, &csBase) == 0) { unsigned csLimit = __dpmi_get_segment_limit(faultSel); logErr(" cs: base=%08lX limit=%04X\n", csBase, csLimit); logErr(" code:"); for (int i = 0; i < 16 && (faultOff + i) <= csLimit; i++) { logErr(" %02X", _farpeekb(faultSel, faultOff + i)); } logErr("\n"); } // Dump segment info for DS and ES unsigned long dsBase; unsigned long esBase; if ((uint16_t)gFaultDS != 0 && __dpmi_get_segment_base_address((uint16_t)gFaultDS, &dsBase) == 0) { logErr(" ds: base=%08lX\n", dsBase); } if ((uint16_t)gFaultES != 0 && __dpmi_get_segment_base_address((uint16_t)gFaultES, &esBase) == 0) { logErr(" es: base=%08lX\n", esBase); } // Dump 32 words from the faulting stack using _farpeekw if ((uint16_t)gFaultSS != 0) { unsigned ssLimit = __dpmi_get_segment_limit((uint16_t)gFaultSS); logErr(" ss: limit=%04X\n", ssLimit); logErr(" stack:"); for (int i = 0; i < 32 && (gFaultESP + i * 2 + 1) <= ssLimit; i++) { if (i == 16) { logErr("\n "); } logErr(" %04X", _farpeekw((uint16_t)gFaultSS, gFaultESP + i * 2)); } logErr("\n"); } // Exit cleanly via DOS __asm__ volatile ("movl $0x4CFF, %%eax; int $0x21" ::: "eax"); __builtin_unreachable(); } // Raw exception handlers for GPF (#13) and PF (#14). // // These capture fault state (GP registers, segment registers, instruction // bytes) then switch to a private stack and call faultWorker() to log // full diagnostics and exit cleanly (avoiding secondary crashes from // DJGPP's handler trying to process faults from 16-bit code). // // DPMI exception frame on stack: // ESP+0x00: Return EIP (to DPMI host, for RETF) // ESP+0x04: Return CS // ESP+0x08: Error code // ESP+0x0C: Faulting EIP // ESP+0x10: Faulting CS // ESP+0x14: Faulting EFLAGS // ESP+0x18: Faulting ESP // ESP+0x1C: Faulting SS // // After pushing EAX, offsets shift by +4. __asm__( " .text\n" " .p2align 4\n" " .globl _exc0dRawHandler\n" "_exc0dRawHandler:\n" " pushl %eax\n" " movw %cs:_gInt10hDsSel, %ax\n" " movw %ax, %fs\n" " cmpl $0, %fs:_gFaultCaptured\n" " jne 1f\n" // First fault — capture everything " movl $1, %fs:_gFaultCaptured\n" " movl $13, %fs:_gFaultNum\n" // Save GP registers via FS " popl %eax\n" " movl %eax, %fs:_gFaultEAX\n" " movl %ebx, %fs:_gFaultEBX\n" " movl %ecx, %fs:_gFaultECX\n" " movl %edx, %fs:_gFaultEDX\n" " movl %esi, %fs:_gFaultESI\n" " movl %edi, %fs:_gFaultEDI\n" " movl %ebp, %fs:_gFaultEBP\n" " xorl %eax, %eax\n" " movw %ds, %ax\n" " movl %eax, %fs:_gFaultDS\n" " movw %es, %ax\n" " movl %eax, %fs:_gFaultES\n" // Save exception frame fields (no pushed EAX shift now) " movl 0x08(%esp), %eax\n" " movl %eax, %fs:_gFaultErr\n" " movl 0x0C(%esp), %eax\n" " movl %eax, %fs:_gFaultEIP\n" " movl 0x10(%esp), %eax\n" " movl %eax, %fs:_gFaultCS\n" " movl 0x18(%esp), %eax\n" " movl %eax, %fs:_gFaultESP\n" " movl 0x1C(%esp), %eax\n" " movl %eax, %fs:_gFaultSS\n" // Switch to our private stack and call faultWorker " movw %fs:_gInt10hDsSel, %ax\n" " movw %ax, %ds\n" " movw %ax, %es\n" " movw %ax, %ss\n" " movl _gFaultStackTop, %esp\n" " call _faultWorker\n" // faultWorker doesn't return, but just in case: " hlt\n" "1:\n" // Secondary fault — chain to old handler " popl %eax\n" " ljmp *%cs:_gOldExc0dFar\n" ); __asm__( " .text\n" " .p2align 4\n" " .globl _exc0eRawHandler\n" "_exc0eRawHandler:\n" " pushl %eax\n" " movw %cs:_gInt10hDsSel, %ax\n" " movw %ax, %fs\n" " cmpl $0, %fs:_gFaultCaptured\n" " jne 1f\n" // First fault — capture everything " movl $1, %fs:_gFaultCaptured\n" " movl $14, %fs:_gFaultNum\n" " popl %eax\n" " movl %eax, %fs:_gFaultEAX\n" " movl %ebx, %fs:_gFaultEBX\n" " movl %ecx, %fs:_gFaultECX\n" " movl %edx, %fs:_gFaultEDX\n" " movl %esi, %fs:_gFaultESI\n" " movl %edi, %fs:_gFaultEDI\n" " movl %ebp, %fs:_gFaultEBP\n" " xorl %eax, %eax\n" " movw %ds, %ax\n" " movl %eax, %fs:_gFaultDS\n" " movw %es, %ax\n" " movl %eax, %fs:_gFaultES\n" " movl 0x08(%esp), %eax\n" " movl %eax, %fs:_gFaultErr\n" " movl 0x0C(%esp), %eax\n" " movl %eax, %fs:_gFaultEIP\n" " movl 0x10(%esp), %eax\n" " movl %eax, %fs:_gFaultCS\n" " movl 0x18(%esp), %eax\n" " movl %eax, %fs:_gFaultESP\n" " movl 0x1C(%esp), %eax\n" " movl %eax, %fs:_gFaultSS\n" " movw %fs:_gInt10hDsSel, %ax\n" " movw %ax, %ds\n" " movw %ax, %es\n" " movw %ax, %ss\n" " movl _gFaultStackTop, %esp\n" " call _faultWorker\n" " hlt\n" "1:\n" " popl %eax\n" " ljmp *%cs:_gOldExc0eFar\n" ); // Raw INT 2Fh handler for Windows API emulation. // // Windows 3.x display drivers call INT 2Fh to check for the Windows // Enhanced Mode environment. Without this handler, the calls are // reflected to real mode where DOS returns "not installed", causing // the driver's initialization to fail. // // Handled functions: // AX=1600h: Windows Enhanced Mode installation check // Returns AL=03h, AH=0Ah (Windows 3.10 Enhanced Mode) // AX=4000h-400Ah: Virtual DMA Services (VDS) // Returns carry clear (success, no-op) // AX=4010h+: Windows/386 VMM API calls // Returns AX=0 (not present, proceed normally) // // All other INT 2Fh calls are chained to the previous handler. // This handler modifies only AX and returns via IRET, so no stack // switching is needed (unlike the INT 10h handler). extern void int2FhRawHandler(void); __asm__( " .text\n" " .p2align 4\n" " .globl _int2FhRawHandler\n" "_int2FhRawHandler:\n" " cmpw $0x1600, %ax\n" " je 1f\n" " cmpb $0x40, %ah\n" " je 3f\n" " ljmp *%cs:_gOldInt2FhFar\n" "1:\n" // Windows 3.10 Enhanced Mode is "running" " movw $0x0A03, %ax\n" " iret\n" "3:\n" // AH=40h: VDS and Windows/386 API calls // VDS calls (AL=00h-0Ah): return carry clear (success, no-op) // VMM calls (AL=10h+): return AX=0 (not present) " cmpb $0x0A, %al\n" " jbe 4f\n" // VMM/Win386 API: not present " xorw %ax, %ax\n" " iret\n" "4:\n" // VDS: success (carry clear) " clc\n" " iret\n" ); // ============================================================================ // Library initialization // ============================================================================ int32_t wdrvInit(void) { if (gInitialized) { return WDRV_OK; } // Initialize the thunking layer if (!thunkInit(&gThunkCtx)) { setError(WDRV_ERR_THUNK_FAILED); return gLastError; } // Initialize the Windows API stubs if (!stubInit(&gStubCtx, &gThunkCtx)) { thunkShutdown(&gThunkCtx); setError(WDRV_ERR_INIT); return gLastError; } // Install PM interrupt reflector for INT 10h. // CWSDPMI's default reflection doesn't work correctly when the // interrupt fires from 16-bit code segments (stack frame mismatch). if (!installInt10hReflector()) { logErr("windrv: warning: could not install INT 10h reflector\n"); } // Install DPMI 0x300h proxy on INT 64h. // CWSDPMI doesn't correctly handle INT 31h AX=0300h (simulate real-mode // interrupt) when called from 16-bit code segments within a 32-bit DPMI // client. DoInt10h in the VBESVGA driver calls INT 31h from 16-bit code // to perform VBE calls. We redirect those to our proxy which performs the // same operation from 32-bit code via __dpmi_simulate_real_mode_interrupt. if (!installDpmi300Proxy()) { logErr("windrv: warning: could not install DPMI 300h proxy\n"); } // Install PM handler for INT 2Fh (Windows API emulation). // The driver calls INT 2Fh AX=1600h to check for Windows Enhanced // Mode. Without this, the check fails and Enable() returns 0. // This raw handler only intercepts specific AX values and chains // to the old handler for everything else, so it's safe for // DJGPP/CWSDPMI internal INT 2Fh usage. if (!installInt2FhHandler()) { logErr("windrv: warning: could not install INT 2Fh handler\n"); } // Install exception capture to diagnose primary fault CS:EIP // (must be after installInt10hReflector which sets gInt10hDsSel) if (!installExceptionCapture()) { logErr("windrv: warning: could not install exception capture\n"); } // Enable near pointer access for direct memory operations if (__djgpp_nearptr_enable() == 0) { logErr("windrv: warning: near pointer access not available\n"); } gInitialized = true; setError(WDRV_OK); return WDRV_OK; } void wdrvShutdown(void) { if (!gInitialized) { return; } removeExceptionCapture(); removeInt2FhHandler(); removeDpmi300Proxy(); removeInt10hReflector(); stubShutdown(&gStubCtx); thunkShutdown(&gThunkCtx); __djgpp_nearptr_disable(); gInitialized = false; } // ============================================================================ // Driver loading // ============================================================================ WdrvHandleT wdrvLoadDriver(const char *driverPath) { if (!gInitialized) { setError(WDRV_ERR_INIT); return NULL; } struct WdrvDriverS *drv = (struct WdrvDriverS *)calloc(1, sizeof(struct WdrvDriverS)); if (!drv) { setError(WDRV_ERR_NO_MEMORY); return NULL; } strncpy(drv->filePath, driverPath, sizeof(drv->filePath) - 1); // Load the NE module if (gDebug) { neSetDebug(true); } if (!neLoadModule(&drv->neMod, driverPath, importResolver)) { setError(WDRV_ERR_LOAD_FAILED); free(drv); return NULL; } stubSetModule(&gStubCtx, &drv->neMod); if (gDebug) { neDumpModule(&drv->neMod); } // Extend DGROUP to include space for GDI objects (PDEVICE, brush, etc.) if (!extendDgroupForObjects(drv)) { setError(WDRV_ERR_NO_MEMORY); neUnloadModule(&drv->neMod); free(drv); return NULL; } // Set the driver's DGROUP selector so the thunk loads DS correctly gThunkCtx.dgroupSel = drv->neMod.autoDataSel; dbg("windrv: DGROUP selector = 0x%04X\n", gThunkCtx.dgroupSel); // Patch Windows PROLOG_0 sequences in all code segments. // In real Windows, the module loader converts the 3-byte prolog // "mov ax, ds; nop" (8C D8 90) to "mov ax, " (B8 xx xx) // so that AX always gets the correct DGROUP selector regardless of // the current DS value at function entry. Without this, internal // near/far calls within the driver (where AX has been clobbered) // will fault when the prolog tries to load DS from AX. patchPrologs(&drv->neMod); // Patch the VFLATD initialization routine's stack imbalance bug. // The function at seg5:0x2368 pushes 20 bytes of intermediate values // during API calls but never cleans them before ret. In Windows 3.x // the caller restores SP from BP so this is harmless, but our thunk // relies on a clean ret. patchVflatdStackBug(&drv->neMod); // Bypass the VFLATD API call for framebuffer mapping. // The driver checks [8889] to choose between VFLATD (VxD call through // a far pointer at [0D76]) and DPMI (INT 31h to map physical memory). // Since VFLATD isn't available, force the DPMI path which uses standard // DPMI functions (0800h, 0007h, 0008h) that CWSDPMI supports. patchVflatdBypassCall(&drv->neMod); // Resolve DDI entry points if (!resolveDriverEntries(drv)) { setError(WDRV_ERR_NO_ENTRY); neUnloadModule(&drv->neMod); free(drv); return NULL; } // Verify that at least Enable and Disable are present if (!drv->ddiEntry[DDI_ORD_ENABLE].present || !drv->ddiEntry[DDI_ORD_DISABLE].present) { logErr("windrv: driver missing Enable (%d) or Disable (%d)\n", drv->ddiEntry[DDI_ORD_ENABLE].present, drv->ddiEntry[DDI_ORD_DISABLE].present); setError(WDRV_ERR_NO_ENTRY); neUnloadModule(&drv->neMod); free(drv); return NULL; } // Patch DoInt10h's INT 31h -> INT 64h BEFORE calling the entry point. // The entry point calls SetupInt10h which self-modifies the Code segment // (patches PUSHAD/POPAD on 386). We patch first so that when the entry // point later calls DoInt10h for VBE queries, it uses our proxy. patchDoInt10h(drv); patchBiosDataAccess(drv); // Call the NE module entry point (driver_initialization). // This runs the driver's one-time init code: // - SetupInt10h: allocates a real-mode stack for VBE INT 10h calls // - dev_initialization: sets ScreenSelector, checks CPU type, VDD query // Without this, DoInt10h uses an uninitialized stack and all VBE calls // fail, causing the driver's Enable to hit its fatal error path. if (drv->neMod.neHeader.entryPointCS != 0) { uint16_t epSegIdx = drv->neMod.neHeader.entryPointCS - 1; if (epSegIdx < drv->neMod.segmentCount) { uint16_t epSel = drv->neMod.segments[epSegIdx].selector; uint16_t epOff = drv->neMod.neHeader.entryPointIP; dbg("windrv: calling entry point at %04X:%04X\n", epSel, epOff); uint32_t epResult = thunkCall16(&gThunkCtx, epSel, epOff, NULL, 0); dbg("windrv: entry point returned %u\n", (uint16_t)epResult); } } setError(WDRV_OK); return drv; } void wdrvUnloadDriver(WdrvHandleT handle) { if (!handle) { return; } freeDrawObjects(handle); // PDEVICE and other objects are in DGROUP - freed by neUnloadModule neUnloadModule(&handle->neMod); free(handle); } int32_t wdrvGetInfo(WdrvHandleT handle, WdrvInfoT *info) { if (!handle) { return WDRV_ERR_NOT_LOADED; } memset(info, 0, sizeof(WdrvInfoT)); memcpy(info->driverName, handle->neMod.moduleName, sizeof(info->driverName) - 1); info->driverName[sizeof(info->driverName) - 1] = '\0'; // If we've queried GDIINFO, fill in from that if (handle->gdiInfoValid) { info->driverVersion = handle->gdiInfo.dpVersion; info->maxWidth = handle->gdiInfo.dpHorzRes; info->maxHeight = handle->gdiInfo.dpVertRes; info->maxBpp = handle->gdiInfo.dpBitsPixel * handle->gdiInfo.dpPlanes; info->numColors = handle->gdiInfo.dpNumColors; info->rasterCaps = handle->gdiInfo.dpRaster; } info->hasBitBlt = handle->ddiEntry[DDI_ORD_BITBLT].present; info->hasOutput = handle->ddiEntry[DDI_ORD_OUTPUT].present; info->hasPixel = handle->ddiEntry[DDI_ORD_PIXEL].present; info->hasStretchBlt = handle->ddiEntry[DDI_ORD_STRETCHBLT].present; info->hasExtTextOut = handle->ddiEntry[DDI_ORD_EXTTEXTOUT].present; info->hasSetPalette = handle->ddiEntry[DDI_ORD_SETPALETTE].present; info->hasSetCursor = handle->ddiEntry[DDI_ORD_SETCURSOR].present; return WDRV_OK; } // ============================================================================ // Mode setting // ============================================================================ int32_t wdrvEnable(WdrvHandleT handle, int32_t width, int32_t height, int32_t bpp) { if (!handle) { return WDRV_ERR_NOT_LOADED; } (void)width; (void)height; (void)bpp; // Allocate the PDEVICE structure if (!allocPDevice(handle)) { setError(WDRV_ERR_NO_MEMORY); return gLastError; } // Allocate draw mode and physical objects if (!allocDrawMode(handle)) { setError(WDRV_ERR_NO_MEMORY); return gLastError; } if (!allocBrushBuffers(handle)) { setError(WDRV_ERR_NO_MEMORY); return gLastError; } if (!allocPenBuffers(handle)) { setError(WDRV_ERR_NO_MEMORY); return gLastError; } // ================================================================ // Enable the display driver (DDK standard order). // // WORD PASCAL Enable(LPDEVICE lpDevice, WORD style, // LPSTR lpDeviceType, LPSTR lpOutputFile, // LPGDIINFO lpData) // // Per the DDK and VBESVGA source, the correct call order is: // // Step 1: Enable(gdiInfoBuf, style=1/InquireInfo) — returns GDIINFO // lpDevice is a GDIINFO-sized buffer (NOT the PDEVICE). // The driver reads SYSTEM.INI settings and returns mode info. // // Step 2: Enable(pdevBuf, style=0/EnableDevice) — initializes device // lpDevice is the PDEVICE buffer. The driver copies its // physical device template there and sets the video mode. // ================================================================ // Allocate a 16-bit "DISPLAY" string for lpDeviceType uint32_t devTypeLin; uint16_t devTypeSel = alloc16BitBlock(16, &devTypeLin); if (devTypeSel) { memcpy((void *)devTypeLin, "DISPLAY", 8); } uint16_t dgSel = handle->neMod.autoDataSel; uint16_t params[9]; // ================================================================ // Step 1: Enable(style=1/InquireInfo) — get GDIINFO // // lpDevice = separate GDIINFO buffer (driver writes GDIINFO here). // [0x8894] starts at 0x00, so S3 driver runs full mode selection // (reads SCREEN-SIZE, COLOR-FORMAT, etc. from SYSTEM.INI). // ================================================================ // Allocate 256 bytes — some drivers (e.g. S3) write extended // GDIINFO fields beyond the standard 108-byte structure. uint32_t gdiInfoLinear; uint16_t gdiInfoSel = alloc16BitBlock(256, &gdiInfoLinear); if (gdiInfoSel == 0) { if (devTypeSel) { free16BitBlock(devTypeSel, devTypeLin); } setError(WDRV_ERR_NO_MEMORY); return gLastError; } params[0] = gdiInfoSel; // lpDevice = GDIINFO buffer (NOT PDEVICE!) params[1] = 0; params[2] = ENABLE_ENABLE; // style = 1 (InquireInfo) params[3] = devTypeSel; // lpDeviceType = "DISPLAY" params[4] = 0; params[5] = 0; // lpOutputFile = NULL params[6] = 0; params[7] = 0; // lpData = NULL params[8] = 0; dbg("windrv: calling Enable(style=1, InquireInfo)\n"); uint32_t result = thunkCall16(&gThunkCtx, handle->ddiEntry[DDI_ORD_ENABLE].sel, handle->ddiEntry[DDI_ORD_ENABLE].off, params, 9); logErr("windrv: Enable(style=1) returned %u\n", (uint16_t)result); // Read GDIINFO from the buffer memcpy(&handle->gdiInfo, (void *)gdiInfoLinear, sizeof(GdiInfo16T)); handle->gdiInfoValid = true; free16BitBlock(gdiInfoSel, gdiInfoLinear); logErr("windrv: GDIINFO: %dx%d %dbpp %dplanes, PDEVICE size=%d\n", handle->gdiInfo.dpHorzRes, handle->gdiInfo.dpVertRes, handle->gdiInfo.dpBitsPixel, handle->gdiInfo.dpPlanes, handle->gdiInfo.dpDEVICEsize); // ================================================================ // For VGA-class drivers (1bpp, 4 planes), repatch __WINFLAGS from // WF_ENHANCED to WF_STANDARD. VGA.DRV's physical_enable hangs in // Enhanced mode because it tries to communicate with the VDD. // ================================================================ if (handle->gdiInfoValid && handle->gdiInfo.dpBitsPixel == 1 && handle->gdiInfo.dpPlanes == 4) { uint16_t enhFlags = WF_PMODE | WF_CPU386 | WF_ENHANCED; uint16_t stdFlags = WF_PMODE | WF_CPU386 | WF_STANDARD; patchWinFlags(handle, enhFlags, stdFlags); } // ================================================================ // Step 2: Enable(style=0/EnableDevice) — initialize PDEVICE + mode // // lpDevice = the PDEVICE buffer. The driver copies its physical // device template there and calls physical_enable (sets INT 10h // video mode, initializes hardware). // ================================================================ params[0] = dgSel; params[1] = handle->pdevOff; params[2] = ENABLE_INQUIRE; // style = 0 (EnableDevice) params[3] = devTypeSel; // lpDeviceType = "DISPLAY" params[4] = 0; params[5] = 0; // lpOutputFile = NULL params[6] = 0; params[7] = 0; // lpData = NULL params[8] = 0; dbg("windrv: calling Enable(style=0, EnableDevice)\n"); result = thunkCall16(&gThunkCtx, handle->ddiEntry[DDI_ORD_ENABLE].sel, handle->ddiEntry[DDI_ORD_ENABLE].off, params, 9); logErr("windrv: Enable(style=0) returned %u\n", (uint16_t)result); if (devTypeSel) { free16BitBlock(devTypeSel, devTypeLin); } if ((uint16_t)result == 0) { setError(WDRV_ERR_ENABLE_FAILED); return gLastError; } // Log PDEVICE after EnableDevice { uint16_t crtcBase = (inportb(0x3CC) & 0x01) ? 0x3D4 : 0x3B4; outportb(crtcBase, 0x13); uint8_t cr13 = inportb(crtcBase + 1); logErr("windrv: CR13 after Enable(style=0): 0x%02X (pitch=%u)\n", cr13, (uint16_t)cr13 * 8); } { DibPDevice16T *pd = (DibPDevice16T *)handle->pdevLinear; logErr("windrv: PDEVICE: deType=0x%04X deWidth=%u deHeight=%u " "deWidthBytes=%u dePlanes=%u deBitsPixel=%u\n", pd->deType, pd->deWidth, pd->deHeight, pd->deWidthBytes, pd->dePlanes, pd->deBitsPixel); // Dump all PDEVICE bytes uint8_t *pdb = (uint8_t *)handle->pdevLinear; uint32_t pdSize = handle->pdevSize < 64 ? handle->pdevSize : 64; logErr("windrv: PDEVICE hex (%lu bytes):", (unsigned long)pdSize); for (uint32_t bi = 0; bi < pdSize; bi++) { logErr(" %02X", pdb[bi]); } logErr("\n"); // If EnableDevice left deWidth/deHeight/deBitsPixel as zero, // fill them from GDIINFO if (pd->deWidth == 0 && handle->gdiInfoValid) { pd->deWidth = (uint16_t)handle->gdiInfo.dpHorzRes; } if (pd->deHeight == 0 && handle->gdiInfoValid) { pd->deHeight = (uint16_t)handle->gdiInfo.dpVertRes; } if (pd->deBitsPixel == 0 && handle->gdiInfoValid) { pd->deBitsPixel = (uint8_t)handle->gdiInfo.dpBitsPixel; } } // Query current VBE mode for diagnostics { __dpmi_regs vr; memset(&vr, 0, sizeof(vr)); vr.x.ax = 0x4F03; // VBE Return Current VBE Mode __dpmi_int(0x10, &vr); logErr("windrv: VBE current mode: AX=%04X BX=%04X (mode=0x%03X)\n", vr.x.ax, vr.x.bx, vr.x.bx & 0x3FFF); uint16_t crtcBase = (inportb(0x3CC) & 0x01) ? 0x3D4 : 0x3B4; outportb(crtcBase, 0x13); uint8_t cr13 = inportb(crtcBase + 1); logErr("windrv: CR13 after Enable complete: 0x%02X (pitch=%u)\n", cr13, (uint16_t)cr13 * 8); // Read display start address (CR0C:CR0D + S3 extensions CR31, CR51, CR69) outportb(crtcBase, 0x0C); uint8_t cr0c = inportb(crtcBase + 1); outportb(crtcBase, 0x0D); uint8_t cr0d = inportb(crtcBase + 1); outportb(crtcBase, 0x31); uint8_t cr31 = inportb(crtcBase + 1); outportb(crtcBase, 0x51); uint8_t cr51 = inportb(crtcBase + 1); uint32_t dispStart = ((uint32_t)cr0c << 8) | cr0d; dispStart |= ((uint32_t)(cr31 & 0x30)) << 12; // bits 17:16 dispStart |= ((uint32_t)(cr51 & 0x03)) << 18; // bits 19:18 logErr("windrv: display start: CR0C=0x%02X CR0D=0x%02X CR31=0x%02X CR51=0x%02X -> offset 0x%lX (byte %lu)\n", cr0c, cr0d, cr31, cr51, (unsigned long)dispStart, (unsigned long)(dispStart * 4)); } // Check that our pre-allocated PDEVICE is large enough if (handle->gdiInfo.dpDEVICEsize > 0 && (uint32_t)handle->gdiInfo.dpDEVICEsize > handle->pdevSize) { logErr("windrv: PDEVICE too small (%u < %d), max is %d\n", (unsigned)handle->pdevSize, handle->gdiInfo.dpDEVICEsize, PDEVICE_MAX_SIZE); setError(WDRV_ERR_NO_MEMORY); return gLastError; } // Try to set up a default draw mode DrawMode16T *dm = (DrawMode16T *)handle->drawModeLinear; dm->rop2 = R2_COPYPEN; dm->bkMode = BM_OPAQUE; dm->bkColor = 0x00FFFFFF; dm->textColor = 0x00000000; // Map video RAM for direct access. // Query VBE to get the linear framebuffer physical address and total // VRAM size, then map the FULL VRAM via DPMI 0800h. The driver's own // Enable only maps the visible framebuffer, but other DDI functions // (e.g. SetPalette) access off-screen VRAM areas that need to be mapped. handle->vramPhysAddr = 0xA0000; handle->vramSize = 0x10000; { // Get current VBE mode number __dpmi_regs vr; memset(&vr, 0, sizeof(vr)); vr.x.ax = 0x4F03; __dpmi_int(0x10, &vr); uint16_t curMode = vr.x.bx & 0x3FFF; if (vr.x.ax == 0x004F && curMode >= 0x100) { // Query VBE controller info for total VRAM unsigned long tbuf = __tb & 0xFFFFF; uint16_t tbSeg = (uint16_t)(tbuf >> 4); uint16_t tbOff = (uint16_t)(tbuf & 0x0F); memset(&vr, 0, sizeof(vr)); vr.x.ax = 0x4F00; vr.x.es = tbSeg; vr.x.di = tbOff; // Write "VBE2" signature to get VBE 2.0+ info dosmemput("VBE2", 4, tbuf); __dpmi_int(0x10, &vr); uint32_t totalVram = 0; if (vr.x.ax == 0x004F) { uint16_t mem64k; dosmemget(tbuf + 0x12, 2, &mem64k); totalVram = (uint32_t)mem64k * 65536UL; dbg("windrv: VBE total VRAM: %" PRIu32 " bytes (%" PRIu32 " KB)\n", totalVram, totalVram / 1024); } // Query mode info for LFB physical base memset(&vr, 0, sizeof(vr)); vr.x.ax = 0x4F01; vr.x.cx = curMode; vr.x.es = tbSeg; vr.x.di = tbOff; __dpmi_int(0x10, &vr); if (vr.x.ax == 0x004F) { uint32_t physBase; dosmemget(tbuf + 0x28, 4, &physBase); dbg("windrv: VBE LFB physical base: 0x%08lX\n", (unsigned long)physBase); if (physBase != 0) { handle->vramPhysAddr = physBase; // Map at least 4MB even if VBE reports less — drivers // access off-screen VRAM (cursor masks, palette tables, // pattern caches) beyond the visible framebuffer. if (totalVram < 4UL * 1024 * 1024) { totalVram = 4UL * 1024 * 1024; } handle->vramSize = totalVram; dbg("windrv: VRAM size after fixup: 0x%lX\n", (unsigned long)handle->vramSize); } } } } // Map physical VRAM for direct access __dpmi_meminfo mi; mi.address = handle->vramPhysAddr; mi.size = handle->vramSize; if (__dpmi_physical_address_mapping(&mi) == 0) { handle->vramLinear = mi.address; handle->vramPtr = (void *)(mi.address + __djgpp_conventional_base); dbg("windrv: mapped VRAM: phys=0x%08lX size=0x%lX linear=0x%08lX\n", (unsigned long)handle->vramPhysAddr, (unsigned long)handle->vramSize, (unsigned long)handle->vramLinear); } handle->pitch = handle->gdiInfo.dpHorzRes * ((handle->gdiInfo.dpBitsPixel + 7) / 8); // Realize a default white brush if (handle->ddiEntry[DDI_ORD_REALIZEOBJECT].present) { if (!realizeBrush(handle, 0x00FFFFFF)) { dbg("windrv: warning: initial RealizeObject(brush) failed\n"); } } // Check if this is a hardware (S3-style) or software (DIB) driver. // deType == 0xFFFF indicates a DIB engine / software renderer. DibPDevice16T *pd = (DibPDevice16T *)handle->pdevLinear; bool isHardwareDriver = (pd->deType >= 0); // Detect S3 hardware by probing the chip ID register (CR30). // Only S3 chips need cursor disable and display start offset. outportb(0x3D4, 0x38); outportb(0x3D5, 0x48); // unlock S3 registers outportb(0x3D4, 0x30); uint8_t cr30 = inportb(0x3D5); bool isS3 = (cr30 >= 0x81 && cr30 <= 0xE1); handle->isS3 = isS3; gIsS3 = isS3; dbg("windrv: S3 chip ID probe: CR30=0x%02X isS3=%d\n", cr30, isS3); // VGA-class drivers (1bpp, 4 planes) run as basic VGA even on S3 // hardware — they don't use the S3 accelerator or scratch area. bool isVgaClass = handle->gdiInfoValid && handle->gdiInfo.dpBitsPixel == 1 && handle->gdiInfo.dpPlanes == 4; if (isHardwareDriver && isS3 && !isVgaClass) { // Disable the hardware cursor. S3 Trio64 (and compatible) drivers // may enable a default cursor during Enable that we don't manage. // CR45 bit 0 = hardware cursor enable on S3. outportb(0x3D4, 0x45); outportb(0x3D5, inportb(0x3D5) & ~0x01); // Shift the visible display down by 10 scanlines so the S3 driver's // pattern scratch area at VRAM (144,1)-(151,8) is off-screen. // All drawing Y coordinates are offset by dispYOffset to compensate. handle->dispYOffset = 10; setDisplayStart(handle, (uint32_t)handle->dispYOffset * handle->pitch); } else { // Non-S3 hardware, VGA-class, or software/DIB driver: no S3 // scratch area, no display start shift. handle->dispYOffset = 0; } handle->enabled = true; setError(WDRV_OK); return WDRV_OK; } int32_t wdrvDisable(WdrvHandleT handle) { if (!handle || !handle->enabled) { return WDRV_ERR_NOT_ENABLED; } // Call Disable(lpDevice) // VOID PASCAL Disable(LPDEVICE lpDevice) // 1 far pointer = 2 words uint16_t params[2]; params[0] = handle->neMod.autoDataSel; // lpDevice seg (DGROUP) params[1] = handle->pdevOff; // lpDevice off dbg("windrv: calling Disable()\n"); // Reset display start to 0 before Disable restores text mode if (handle->dispYOffset != 0) { setDisplayStart(handle, 0); handle->dispYOffset = 0; } waitForEngine(); thunkCall16(&gThunkCtx, handle->ddiEntry[DDI_ORD_DISABLE].sel, handle->ddiEntry[DDI_ORD_DISABLE].off, params, 2); dbg("windrv: Disable() returned\n"); handle->enabled = false; setError(WDRV_OK); return WDRV_OK; } // ============================================================================ // Drawing operations // ============================================================================ int32_t wdrvBitBlt(WdrvHandleT handle, WdrvBitBltParamsT *p) { if (!handle || !handle->enabled) { logErr("windrv: BitBlt: not enabled (handle=%p enabled=%d)\n", (void *)handle, handle ? handle->enabled : -1); return WDRV_ERR_NOT_ENABLED; } if (!handle->ddiEntry[DDI_ORD_BITBLT].present) { logErr("windrv: BitBlt: not present\n"); return WDRV_ERR_UNSUPPORTED; } // BOOL PASCAL BitBlt(LPDEVICE lpDstDev, WORD DstX, WORD DstY, // LPDEVICE lpSrcDev, WORD SrcX, WORD SrcY, // WORD xExt, WORD yExt, DWORD Rop3, // LPBRUSH lpBrush, LPDRAWMODE lpDrawMode) // // Pascal push order (left to right): // lpDstDev(2w), DstX(1w), DstY(1w), // lpSrcDev(2w), SrcX(1w), SrcY(1w), // xExt(1w), yExt(1w), Rop3(2w), // lpBrush(2w), lpDrawMode(2w) // Total: 16 words uint16_t dgSel = handle->neMod.autoDataSel; uint16_t params[16]; int i = 0; // Determine if the ROP uses the source. The 8-bit ROP is in bits 23-16. // If flipping the source bit doesn't change any output bit, source is // not used and lpSrcDev must be NULL per the DDI spec. uint8_t rop8 = (uint8_t)(p->rop3 >> 16); bool ropNeedsSrc = (((rop8 >> 2) ^ rop8) & 0x33) != 0; // lpDstDev params[i++] = dgSel; params[i++] = handle->pdevOff; // DstX, DstY (offset Y into hidden-scanline region) params[i++] = (uint16_t)p->dstX; params[i++] = (uint16_t)(p->dstY + handle->dispYOffset); // lpSrcDev (NULL for pattern-only ROPs, screen PDEVICE otherwise) if (ropNeedsSrc) { params[i++] = dgSel; params[i++] = handle->pdevOff; } else { params[i++] = 0; params[i++] = 0; } // SrcX, SrcY (offset Y for screen-to-screen blits) params[i++] = (uint16_t)p->srcX; params[i++] = (uint16_t)(p->srcY + handle->dispYOffset); // xExt, yExt params[i++] = (uint16_t)p->width; params[i++] = (uint16_t)p->height; // Rop3 (DWORD: high word first in Pascal push order) params[i++] = (uint16_t)(p->rop3 >> 16); params[i++] = (uint16_t)(p->rop3 & 0xFFFF); // lpBrush params[i++] = dgSel; params[i++] = handle->brushOff; // lpDrawMode params[i++] = dgSel; params[i++] = handle->drawModeOff; dbg("windrv: BitBlt dst=%04X:%04X (%d,%d) src=%04X:%04X (%d,%d) %dx%d rop=0x%08lX brush=%04X:%04X dm=%04X:%04X\n", dgSel, handle->pdevOff, p->dstX, p->dstY, ropNeedsSrc ? dgSel : 0, ropNeedsSrc ? handle->pdevOff : 0, p->srcX, p->srcY, p->width, p->height, (unsigned long)p->rop3, dgSel, handle->brushOff, dgSel, handle->drawModeOff); waitForEngine(); uint32_t result = thunkCall16(&gThunkCtx, handle->ddiEntry[DDI_ORD_BITBLT].sel, handle->ddiEntry[DDI_ORD_BITBLT].off, params, i); waitForEngine(); dbg("windrv: BitBlt returned %lu\n", (unsigned long)(result & 0xFFFF)); return ((int16_t)(result & 0xFFFF)) ? WDRV_OK : WDRV_ERR_UNSUPPORTED; } int32_t wdrvFillRect(WdrvHandleT handle, int16_t x, int16_t y, int16_t w, int16_t h, uint32_t color) { if (!handle || !handle->enabled) { return WDRV_ERR_NOT_ENABLED; } // Realize brush with the requested color if (!handle->brushRealized || handle->brushRealizedColor != color) { if (handle->ddiEntry[DDI_ORD_REALIZEOBJECT].present) { realizeBrush(handle, color); } } // If driver supports BitBlt, use PATCOPY if (handle->ddiEntry[DDI_ORD_BITBLT].present) { WdrvBitBltParamsT bp; memset(&bp, 0, sizeof(bp)); bp.dstX = x; bp.dstY = y; bp.srcX = 0; bp.srcY = 0; bp.width = w; bp.height = h; bp.rop3 = PATCOPY; return wdrvBitBlt(handle, &bp); } // Fall back to Output with rectangle if (handle->ddiEntry[DDI_ORD_OUTPUT].present) { // Allocate 16-bit memory for the point array and pen // Output(lpDstDev, style, count, lpPoints, lpPen, lpBrush, lpDrawMode, lpClipRect) // For rectangle: style=OS_RECTANGLE, count=2 (top-left, bottom-right) // Build 2-point rectangle (offset Y into hidden-scanline region) Point16T pts[2]; pts[0].x = x; pts[0].y = y + handle->dispYOffset; pts[1].x = x + w; pts[1].y = y + h + handle->dispYOffset; uint32_t ptsLinear; uint16_t ptsSel = alloc16BitBlock(sizeof(pts), &ptsLinear); if (ptsSel == 0) { return WDRV_ERR_NO_MEMORY; } memcpy((void *)ptsLinear, pts, sizeof(pts)); // Output params (Pascal order): // lpDstDev(2w), style(1w), count(1w), lpPoints(2w), // lpPen(2w), lpBrush(2w), lpDrawMode(2w), lpClipRect(2w) // Total: 14 words uint16_t dgSel = handle->neMod.autoDataSel; uint16_t params[14]; int i = 0; params[i++] = dgSel; // lpDstDev seg params[i++] = handle->pdevOff; // lpDstDev off params[i++] = OS_RECTANGLE; // style params[i++] = 2; // count params[i++] = ptsSel; // lpPoints seg params[i++] = 0; // lpPoints off params[i++] = 0; // lpPen seg (NULL) params[i++] = 0; // lpPen off params[i++] = dgSel; // lpBrush seg params[i++] = handle->brushOff; // lpBrush off params[i++] = dgSel; // lpDrawMode seg params[i++] = handle->drawModeOff; // lpDrawMode off params[i++] = 0; // lpClipRect seg (NULL = no clip) params[i++] = 0; // lpClipRect off waitForEngine(); uint32_t result = thunkCall16(&gThunkCtx, handle->ddiEntry[DDI_ORD_OUTPUT].sel, handle->ddiEntry[DDI_ORD_OUTPUT].off, params, i); free16BitBlock(ptsSel, ptsLinear); return ((int16_t)(result & 0xFFFF)) ? WDRV_OK : WDRV_ERR_UNSUPPORTED; } return WDRV_ERR_UNSUPPORTED; } int32_t wdrvSetPixel(WdrvHandleT handle, int16_t x, int16_t y, uint32_t color) { if (!handle || !handle->enabled) { return WDRV_ERR_NOT_ENABLED; } if (!handle->ddiEntry[DDI_ORD_PIXEL].present) { return WDRV_ERR_UNSUPPORTED; } // DWORD PASCAL Pixel(LPDEVICE lpDevice, WORD x, WORD y, // DWORD color, LPDRAWMODE lpDrawMode) // Pascal push order: // lpDevice(2w), x(1w), y(1w), color(2w), lpDrawMode(2w) // Total: 8 words // Convert COLORREF to physical color via ColorInfo DDI uint32_t physColor = colorToPhys(handle, color); // Set draw mode to COPYPEN for setting pixels DrawMode16T *dm = (DrawMode16T *)handle->drawModeLinear; dm->rop2 = R2_COPYPEN; uint16_t dgSel = handle->neMod.autoDataSel; uint16_t params[8]; int i = 0; params[i++] = dgSel; // lpDevice seg params[i++] = handle->pdevOff; // lpDevice off params[i++] = (uint16_t)x; // x params[i++] = (uint16_t)(y + handle->dispYOffset); // y (offset) params[i++] = (uint16_t)(physColor >> 16); // color high params[i++] = (uint16_t)(physColor); // color low params[i++] = dgSel; // lpDrawMode seg params[i++] = handle->drawModeOff; // lpDrawMode off waitForEngine(); thunkCall16(&gThunkCtx, handle->ddiEntry[DDI_ORD_PIXEL].sel, handle->ddiEntry[DDI_ORD_PIXEL].off, params, i); waitForEngine(); return WDRV_OK; } uint32_t wdrvGetPixel(WdrvHandleT handle, int16_t x, int16_t y) { if (!handle || !handle->enabled) { return 0; } if (!handle->ddiEntry[DDI_ORD_PIXEL].present) { return 0; } // Pixel with color = -1 (0xFFFFFFFF) reads instead of writes DrawMode16T *dm = (DrawMode16T *)handle->drawModeLinear; dm->rop2 = R2_COPYPEN; uint16_t dgSel = handle->neMod.autoDataSel; uint16_t params[8]; int i = 0; params[i++] = dgSel; params[i++] = handle->pdevOff; params[i++] = (uint16_t)x; params[i++] = (uint16_t)(y + handle->dispYOffset); params[i++] = 0xFFFF; // color = -1 means "get pixel" params[i++] = 0xFFFF; params[i++] = dgSel; params[i++] = handle->drawModeOff; return thunkCall16(&gThunkCtx, handle->ddiEntry[DDI_ORD_PIXEL].sel, handle->ddiEntry[DDI_ORD_PIXEL].off, params, i); } int32_t wdrvPolyline(WdrvHandleT handle, Point16T *points, int16_t count, uint32_t color) { if (!handle || !handle->enabled) { return WDRV_ERR_NOT_ENABLED; } if (!handle->ddiEntry[DDI_ORD_OUTPUT].present) { return WDRV_ERR_UNSUPPORTED; } // Realize a physical pen (driver expects RealizeObject output, not a logical pen) if (!handle->penRealized || handle->penRealizedColor != color) { if (!realizePen(handle, color)) { return WDRV_ERR_UNSUPPORTED; } } // Allocate 16-bit memory for the point array, offsetting Y coordinates uint32_t ptsSize = count * sizeof(Point16T); uint32_t ptsLinear; uint16_t ptsSel = alloc16BitBlock(ptsSize, &ptsLinear); if (ptsSel == 0) { return WDRV_ERR_NO_MEMORY; } memcpy((void *)ptsLinear, points, ptsSize); { Point16T *dst = (Point16T *)ptsLinear; for (int16_t pi = 0; pi < count; pi++) { dst[pi].y += handle->dispYOffset; } } // Output(lpDstDev, style, count, lpPoints, lpPen, lpBrush, lpDrawMode, lpClipRect) uint16_t dgSel = handle->neMod.autoDataSel; uint16_t params[14]; int i = 0; params[i++] = dgSel; params[i++] = handle->pdevOff; params[i++] = OS_POLYLINE; params[i++] = count; params[i++] = ptsSel; params[i++] = 0; params[i++] = dgSel; // lpPen in DGROUP (physical pen) params[i++] = handle->penOff; params[i++] = 0; // lpBrush = NULL params[i++] = 0; params[i++] = dgSel; params[i++] = handle->drawModeOff; params[i++] = 0; // lpClipRect = NULL params[i++] = 0; waitForEngine(); uint32_t result = thunkCall16(&gThunkCtx, handle->ddiEntry[DDI_ORD_OUTPUT].sel, handle->ddiEntry[DDI_ORD_OUTPUT].off, params, i); waitForEngine(); free16BitBlock(ptsSel, ptsLinear); return ((int16_t)(result & 0xFFFF)) ? WDRV_OK : WDRV_ERR_UNSUPPORTED; } int32_t wdrvRectangle(WdrvHandleT handle, int16_t x, int16_t y, int16_t w, int16_t h, uint32_t color) { // Use Output with OS_RECTANGLE for outlined rectangle if (!handle || !handle->enabled) { return WDRV_ERR_NOT_ENABLED; } if (!handle->ddiEntry[DDI_ORD_OUTPUT].present) { return WDRV_ERR_UNSUPPORTED; } // Realize a physical pen (driver expects RealizeObject output, not a logical pen) if (!handle->penRealized || handle->penRealizedColor != color) { if (!realizePen(handle, color)) { return WDRV_ERR_UNSUPPORTED; } } Point16T pts[2]; pts[0].x = x; pts[0].y = y + handle->dispYOffset; pts[1].x = x + w; pts[1].y = y + h + handle->dispYOffset; uint32_t ptsLinear; uint16_t ptsSel = alloc16BitBlock(sizeof(pts), &ptsLinear); if (ptsSel == 0) { return WDRV_ERR_NO_MEMORY; } memcpy((void *)ptsLinear, pts, sizeof(pts)); uint16_t dgSel = handle->neMod.autoDataSel; uint16_t params[14]; int i = 0; params[i++] = dgSel; params[i++] = handle->pdevOff; params[i++] = OS_RECTANGLE; params[i++] = 2; params[i++] = ptsSel; params[i++] = 0; params[i++] = dgSel; // lpPen in DGROUP (physical pen) params[i++] = handle->penOff; params[i++] = dgSel; params[i++] = handle->brushOff; params[i++] = dgSel; params[i++] = handle->drawModeOff; params[i++] = 0; // lpClipRect = NULL params[i++] = 0; waitForEngine(); uint32_t result = thunkCall16(&gThunkCtx, handle->ddiEntry[DDI_ORD_OUTPUT].sel, handle->ddiEntry[DDI_ORD_OUTPUT].off, params, i); waitForEngine(); free16BitBlock(ptsSel, ptsLinear); return ((int16_t)(result & 0xFFFF)) ? WDRV_OK : WDRV_ERR_UNSUPPORTED; } // ============================================================================ // Palette operations // ============================================================================ int32_t wdrvSetPalette(WdrvHandleT handle, int32_t startIndex, int32_t count, const uint8_t *colors) { if (!handle || !handle->enabled) { return WDRV_ERR_NOT_ENABLED; } if (!handle->ddiEntry[DDI_ORD_SETPALETTE].present) { return WDRV_ERR_UNSUPPORTED; } // SetPalette(nStartIndex:WORD, nNumEntries:WORD, lpPalette:DWORD) // Pascal order: nStartIndex(1w), nNumEntries(1w), lpPalette(2w) // Total: 4 words // Allocate 16-bit memory for the palette data uint32_t palSize = count * 4; // RGBQUAD per entry uint32_t palLinear; uint16_t palSel = alloc16BitBlock(palSize, &palLinear); if (palSel == 0) { return WDRV_ERR_NO_MEMORY; } memcpy((void *)palLinear, colors, palSize); uint16_t params[4]; params[0] = (uint16_t)startIndex; params[1] = (uint16_t)count; params[2] = palSel; params[3] = 0; thunkCall16(&gThunkCtx, handle->ddiEntry[DDI_ORD_SETPALETTE].sel, handle->ddiEntry[DDI_ORD_SETPALETTE].off, params, 4); free16BitBlock(palSel, palLinear); return WDRV_OK; } // ============================================================================ // Framebuffer access // ============================================================================ void *wdrvGetFramebuffer(WdrvHandleT handle) { if (!handle || !handle->enabled) { return NULL; } return handle->vramPtr; } int32_t wdrvGetPitch(WdrvHandleT handle) { if (!handle || !handle->enabled) { return 0; } return handle->pitch; } // ============================================================================ // Error handling // ============================================================================ int32_t wdrvGetLastError(void) { return gLastError; } const char *wdrvGetLastErrorString(void) { switch (gLastError) { case WDRV_OK: return "no error"; case WDRV_ERR_INIT: return "initialization failed"; case WDRV_ERR_NO_DPMI: return "DPMI not available"; case WDRV_ERR_FILE_NOT_FOUND: return "file not found"; case WDRV_ERR_BAD_FORMAT: return "not a valid NE executable"; case WDRV_ERR_LOAD_FAILED: return "failed to load driver"; case WDRV_ERR_NO_MEMORY: return "out of memory"; case WDRV_ERR_RELOC_FAILED: return "relocation failed"; case WDRV_ERR_NO_ENTRY: return "required DDI entry not found"; case WDRV_ERR_ENABLE_FAILED: return "driver Enable() failed"; case WDRV_ERR_THUNK_FAILED: return "thunk setup failed"; case WDRV_ERR_NOT_LOADED: return "no driver loaded"; case WDRV_ERR_NOT_ENABLED: return "driver not enabled"; case WDRV_ERR_UNSUPPORTED: return "operation not supported"; default: return "unknown error"; } } void wdrvSetDebug(bool enable) { gDebug = enable; neSetDebug(enable); thunkSetDebug(enable); stubSetDebug(enable); } void wdrvDumpSegmentBases(WdrvHandleT handle) { if (!handle) { return; } logErr("=== NE Module Segment Bases ===\n"); for (int i = 0; i < handle->neMod.segmentCount; i++) { LoadedSegT *seg = &handle->neMod.segments[i]; unsigned long base = 0; __dpmi_get_segment_base_address(seg->selector, &base); unsigned long limit = __dpmi_get_segment_limit(seg->selector); logErr(" seg[%d] sel=%04X base=0x%08lX limit=0x%08lX size=%" PRIu32 " %s\n", i + 1, seg->selector, base, limit, seg->size, seg->isCode ? "CODE" : "DATA"); } unsigned long dgBase = 0; __dpmi_get_segment_base_address(handle->neMod.autoDataSel, &dgBase); logErr(" DGROUP sel=%04X base=0x%08lX\n", handle->neMod.autoDataSel, dgBase); logErr(" pdevOff=%04X brushOff=%04X drawModeOff=%04X\n", handle->pdevOff, handle->brushOff, handle->drawModeOff); logErr(" dgroupObjBase=0x%" PRIX32 " pdevLinear=0x%" PRIX32 "\n", handle->dgroupObjBase, handle->pdevLinear); } // ============================================================================ // Internal implementation // ============================================================================ static FarPtr16T importResolver(const char *moduleName, uint16_t ordinal, const char *funcName) { return stubResolveImport(&gStubCtx, moduleName, ordinal, funcName); } static bool resolveDriverEntries(struct WdrvDriverS *drv) { // Resolve all known DDI ordinals static const uint16_t ddiOrdinals[] = { DDI_ORD_BITBLT, DDI_ORD_COLORINFO, DDI_ORD_CONTROL, DDI_ORD_DISABLE, DDI_ORD_ENABLE, DDI_ORD_ENUMDFFONTS, DDI_ORD_ENUMOBJ, DDI_ORD_OUTPUT, DDI_ORD_PIXEL, DDI_ORD_REALIZEOBJECT, DDI_ORD_STRBLT, DDI_ORD_SCANLR, DDI_ORD_DEVICEMODE, DDI_ORD_EXTTEXTOUT, DDI_ORD_GETCHARWIDTH, DDI_ORD_DEVICEBITMAP, DDI_ORD_FASTBORDER, DDI_ORD_SETATTRIBUTE, DDI_ORD_DIBTODEVICE, DDI_ORD_CREATEBITMAP, DDI_ORD_DELETEBITMAP, DDI_ORD_SELECTBITMAP, DDI_ORD_BITMAPBITS, DDI_ORD_RECLIP, DDI_ORD_GETPALETTE, DDI_ORD_SETPALETTE, DDI_ORD_SETPALETTETRANS, DDI_ORD_UPDATECOLORS, DDI_ORD_STRETCHBLT, DDI_ORD_STRETCHDIBITS, DDI_ORD_SELECTPALETTE, DDI_ORD_INQUIRE, DDI_ORD_SETCURSOR, DDI_ORD_MOVECURSOR, DDI_ORD_CHECKCRSR, 0 // Sentinel }; int found = 0; for (int i = 0; ddiOrdinals[i] != 0; i++) { uint16_t ord = ddiOrdinals[i]; uint16_t seg; uint16_t off; uint16_t sel; if (neLookupExport(&drv->neMod, ord, &seg, &off, &sel)) { drv->ddiEntry[ord].sel = sel; drv->ddiEntry[ord].off = off; drv->ddiEntry[ord].present = true; found++; dbg("windrv: DDI ord %u -> %04X:%04X\n", ord, sel, off); } } dbg("windrv: resolved %d DDI entry points\n", found); return found > 0; } // Extend DGROUP to include space for GDI objects. // Layout within the extension area (16-byte aligned): // +0x0000: PDEVICE (4096 bytes) // +0x1000: PhysBrush (128 bytes) // +0x1080: LogBrush (16 bytes) // +0x1090: DrawMode (48 bytes) // +0x10C0: PhysPen (128 bytes) // +0x1140: LogPen (16 bytes) // Total: 0x1150 bytes #define DGROUP_OBJ_PDEV_OFF 0x0000 #define DGROUP_OBJ_BRUSH_OFF 0x1000 #define DGROUP_OBJ_LOGBRUSH_OFF 0x1080 #define DGROUP_OBJ_DRAWMODE_OFF 0x1090 #define DGROUP_OBJ_PEN_OFF 0x10C0 #define DGROUP_OBJ_LOGPEN_OFF 0x1140 #define DGROUP_OBJ_PHYSCOLOR_OFF 0x1150 #define DGROUP_OBJ_TOTAL_SIZE 0x1158 static bool extendDgroupForObjects(struct WdrvDriverS *drv) { int dgIdx = drv->neMod.neHeader.autoDataSegIndex - 1; if (dgIdx < 0 || dgIdx >= drv->neMod.segmentCount) { logErr("windrv: no DGROUP segment\n"); return false; } uint32_t oldSize = drv->neMod.segments[dgIdx].size; // Align object area start to 16 bytes uint32_t objBase = (oldSize + 15) & ~15; // The S3 driver uses DGROUP offsets well beyond the initial data for // graphics engine working buffers (e.g., 0xA6E8, 0xBEE8). In Windows // 3.x, DGROUP is typically the full 64K segment. Extend to 64K to // ensure the driver has all the working space it expects. uint32_t targetSize = 0x10000; if (objBase + DGROUP_OBJ_TOTAL_SIZE > targetSize) { logErr("windrv: DGROUP objects don't fit in 64K\n"); return false; } uint32_t extraBytes = targetSize - oldSize; uint32_t oldSizeOut; if (!neExtendSegment(&drv->neMod, dgIdx, extraBytes, &oldSizeOut)) { return false; } uint32_t dgLinear = drv->neMod.segments[dgIdx].linearAddr; // Initialize DGROUP stack management fields if needed. In real Windows, // KERNEL sets these during module loading. VGA.DRV ships with // [0x0A]=0xFFFF which its stack check function interprets as "no stack // space available", causing all deep functions (BitBlt, etc.) to fail. // Only patch if the original data has the 0xFFFF sentinel. { uint16_t *dgWords = (uint16_t *)dgLinear; if (dgWords[5] == 0xFFFF) { dgWords[5] = (uint16_t)objBase; // [0x0A] pStackBot dbg("windrv: patched DGROUP stack bottom [0x0A] from FFFF to %04X\n", (uint16_t)objBase); } if (dgWords[4] == 0xFFFF) { dgWords[4] = 0xFFFE; // [0x08] pStackMin } } drv->dgroupObjBase = objBase; drv->pdevOff = (uint16_t)(objBase + DGROUP_OBJ_PDEV_OFF); drv->pdevLinear = dgLinear + objBase + DGROUP_OBJ_PDEV_OFF; drv->pdevSize = PDEVICE_MAX_SIZE; drv->brushOff = (uint16_t)(objBase + DGROUP_OBJ_BRUSH_OFF); drv->brushLinear = dgLinear + objBase + DGROUP_OBJ_BRUSH_OFF; drv->logBrushOff = (uint16_t)(objBase + DGROUP_OBJ_LOGBRUSH_OFF); drv->logBrushLinear = dgLinear + objBase + DGROUP_OBJ_LOGBRUSH_OFF; drv->drawModeOff = (uint16_t)(objBase + DGROUP_OBJ_DRAWMODE_OFF); drv->drawModeLinear = dgLinear + objBase + DGROUP_OBJ_DRAWMODE_OFF; drv->penOff = (uint16_t)(objBase + DGROUP_OBJ_PEN_OFF); drv->penLinear = dgLinear + objBase + DGROUP_OBJ_PEN_OFF; drv->logPenOff = (uint16_t)(objBase + DGROUP_OBJ_LOGPEN_OFF); drv->logPenLinear = dgLinear + objBase + DGROUP_OBJ_LOGPEN_OFF; drv->physColorOff = (uint16_t)(objBase + DGROUP_OBJ_PHYSCOLOR_OFF); drv->physColorLinear = dgLinear + objBase + DGROUP_OBJ_PHYSCOLOR_OFF; dbg("windrv: DGROUP extended by %" PRIu32 " bytes (old=%" PRIu32 " new=%" PRIu32 ")\n", extraBytes, oldSize, drv->neMod.segments[dgIdx].size); dbg("windrv: DGROUP objects: pdev=%04X brush=%04X logBrush=%04X drawMode=%04X pen=%04X logPen=%04X\n", drv->pdevOff, drv->brushOff, drv->logBrushOff, drv->drawModeOff, drv->penOff, drv->logPenOff); return true; } static bool allocPDevice(struct WdrvDriverS *drv) { // PDEVICE is pre-allocated within DGROUP by extendDgroupForObjects memset((void *)drv->pdevLinear, 0, drv->pdevSize); return true; } static bool allocDrawMode(struct WdrvDriverS *drv) { // DrawMode is pre-allocated within DGROUP DrawMode16T *dm = (DrawMode16T *)drv->drawModeLinear; memset(dm, 0, sizeof(DrawMode16T)); dm->rop2 = R2_COPYPEN; dm->bkMode = BM_OPAQUE; return true; } static bool allocBrushBuffers(struct WdrvDriverS *drv) { // Both brushes are pre-allocated within DGROUP LogBrush16T *lb = (LogBrush16T *)drv->logBrushLinear; memset(lb, 0, sizeof(LogBrush16T)); lb->lbStyle = BS_SOLID; lb->lbColor = 0x00FFFFFF; memset((void *)drv->brushLinear, 0, PHYS_OBJ_MAX_SIZE); drv->brushRealized = false; return true; } static bool allocPenBuffers(struct WdrvDriverS *drv) { // Both pens are pre-allocated within DGROUP LogPen16T *lp = (LogPen16T *)drv->logPenLinear; memset(lp, 0, sizeof(LogPen16T)); lp->lopnStyle = PS_SOLID; lp->lopnWidth.x = 1; lp->lopnWidth.y = 0; lp->lopnColor = 0x00000000; memset((void *)drv->penLinear, 0, PHYS_OBJ_MAX_SIZE); drv->penRealized = false; return true; } static uint32_t colorToPhys(struct WdrvDriverS *drv, uint32_t colorRef) { if (!drv->ddiEntry[DDI_ORD_COLORINFO].present) { return colorRef; } // DWORD PASCAL ColorInfo(LPDEVICE lpDevice, DWORD dwColorIn, // LPDWORD lpPhysColor) // Pascal push order: lpDevice(2w), dwColorIn(2w), lpPhysColor(2w) uint16_t dgSel = drv->neMod.autoDataSel; uint16_t params[6]; params[0] = dgSel; // lpDevice seg params[1] = drv->pdevOff; // lpDevice off params[2] = (uint16_t)(colorRef >> 16); // dwColorIn high params[3] = (uint16_t)(colorRef); // dwColorIn low params[4] = dgSel; // lpPhysColor seg params[5] = drv->physColorOff; // lpPhysColor off // Clear the output buffer *(uint32_t *)drv->physColorLinear = 0; waitForEngine(); thunkCall16(&gThunkCtx, drv->ddiEntry[DDI_ORD_COLORINFO].sel, drv->ddiEntry[DDI_ORD_COLORINFO].off, params, 6); waitForEngine(); uint32_t physColor = *(uint32_t *)drv->physColorLinear; dbg("windrv: ColorInfo(0x%06lX) -> phys 0x%08lX\n", (unsigned long)colorRef, (unsigned long)physColor); return physColor; } static void setDisplayStart(struct WdrvDriverS *drv, uint32_t byteOffset) { (void)drv; // S3 display start address is in units of 4 bytes (DWORDs). // CR0C:CR0D = bits 15:0, CR31[5:4] = bits 17:16, CR51[1:0] = bits 19:18 uint32_t startAddr = byteOffset / 4; uint16_t crtcBase = (inportb(0x3CC) & 0x01) ? 0x3D4 : 0x3B4; // Unlock S3 registers outportb(crtcBase, 0x38); outportb(crtcBase + 1, 0x48); outportb(crtcBase, 0x39); outportb(crtcBase + 1, 0xA5); // Write display start address bits 15:0 outportb(crtcBase, 0x0D); outportb(crtcBase + 1, (uint8_t)(startAddr & 0xFF)); outportb(crtcBase, 0x0C); outportb(crtcBase + 1, (uint8_t)((startAddr >> 8) & 0xFF)); // Write bits 17:16 to CR31 outportb(crtcBase, 0x31); uint8_t cr31 = inportb(crtcBase + 1); cr31 = (cr31 & ~0x30) | (uint8_t)(((startAddr >> 16) & 0x03) << 4); outportb(crtcBase + 1, cr31); // Write bits 19:18 to CR51 outportb(crtcBase, 0x51); uint8_t cr51 = inportb(crtcBase + 1); cr51 = (cr51 & ~0x03) | (uint8_t)((startAddr >> 18) & 0x03); outportb(crtcBase + 1, cr51); dbg("windrv: display start set to byte offset %lu (reg=0x%lX)\n", (unsigned long)byteOffset, (unsigned long)startAddr); } static bool realizeBrush(struct WdrvDriverS *drv, uint32_t color) { if (!drv->ddiEntry[DDI_ORD_REALIZEOBJECT].present) { return false; } uint16_t dgSel = drv->neMod.autoDataSel; // Set up the logical brush LogBrush16T *lb = (LogBrush16T *)drv->logBrushLinear; lb->lbStyle = BS_SOLID; lb->lbColor = color; lb->lbHatch = 0; // Clear the physical brush buffer memset((void *)drv->brushLinear, 0, PHYS_OBJ_MAX_SIZE); // RealizeObject(lpDevice, nStyle, lpInObj, lpOutObj, lpTextXForm) // Pascal push order: left-to-right uint16_t params[9]; params[0] = dgSel; // lpDevice seg params[1] = drv->pdevOff; // lpDevice off params[2] = OBJ_BRUSH; // nStyle params[3] = dgSel; // lpInObj seg params[4] = drv->logBrushOff; // lpInObj off params[5] = dgSel; // lpOutObj seg params[6] = drv->brushOff; // lpOutObj off params[7] = 0; // lpTextXForm seg (NULL) params[8] = 0; // lpTextXForm off (NULL) waitForEngine(); uint32_t result = thunkCall16(&gThunkCtx, drv->ddiEntry[DDI_ORD_REALIZEOBJECT].sel, drv->ddiEntry[DDI_ORD_REALIZEOBJECT].off, params, 9); waitForEngine(); dbg("windrv: RealizeObject(brush, color=0x%06lX) returned %d\n", (unsigned long)color, (int16_t)(result & 0xFFFF)); if ((int16_t)(result & 0xFFFF) > 0) { drv->brushRealized = true; drv->brushRealizedColor = color; // Dump the first 16 bytes of the realized brush uint8_t *bdata = (uint8_t *)drv->brushLinear; dbg("windrv: brush[0..15]:"); for (int k = 0; k < 16; k++) { dbg(" %02X", bdata[k]); } dbg("\n"); return true; } return false; } static bool realizePen(struct WdrvDriverS *drv, uint32_t color) { if (!drv->ddiEntry[DDI_ORD_REALIZEOBJECT].present) { return false; } uint16_t dgSel = drv->neMod.autoDataSel; // Set up the logical pen LogPen16T *lp = (LogPen16T *)drv->logPenLinear; lp->lopnStyle = PS_SOLID; lp->lopnWidth.x = 1; lp->lopnWidth.y = 0; lp->lopnColor = color; // Clear the physical pen buffer memset((void *)drv->penLinear, 0, PHYS_OBJ_MAX_SIZE); // RealizeObject(lpDevice, nStyle, lpInObj, lpOutObj, lpTextXForm) // Pascal push order: left-to-right uint16_t params[9]; params[0] = dgSel; // lpDevice seg params[1] = drv->pdevOff; // lpDevice off params[2] = OBJ_PEN; // nStyle params[3] = dgSel; // lpInObj seg params[4] = drv->logPenOff; // lpInObj off params[5] = dgSel; // lpOutObj seg params[6] = drv->penOff; // lpOutObj off params[7] = 0; // lpTextXForm seg (NULL) params[8] = 0; // lpTextXForm off (NULL) waitForEngine(); uint32_t result = thunkCall16(&gThunkCtx, drv->ddiEntry[DDI_ORD_REALIZEOBJECT].sel, drv->ddiEntry[DDI_ORD_REALIZEOBJECT].off, params, 9); waitForEngine(); dbg("windrv: RealizeObject(pen, color=0x%06lX) returned %d\n", (unsigned long)color, (int16_t)(result & 0xFFFF)); if ((int16_t)(result & 0xFFFF) > 0) { drv->penRealized = true; drv->penRealizedColor = color; // Dump the first 16 bytes of the realized pen uint8_t *pdata = (uint8_t *)drv->penLinear; dbg("windrv: pen[0..15]:"); for (int k = 0; k < 16; k++) { dbg(" %02X", pdata[k]); } dbg("\n"); return true; } return false; } static void freeDrawObjects(struct WdrvDriverS *drv) { // Objects are embedded in DGROUP - freed when module is unloaded drv->brushRealized = false; drv->penRealized = false; } static uint16_t alloc16BitBlock(uint32_t size, uint32_t *linearOut) { uint8_t *mem = (uint8_t *)calloc(1, size); if (!mem) { return 0; } uint32_t ptrVal = (uint32_t)mem; int sel = __dpmi_allocate_ldt_descriptors(1); if (sel < 0) { free(mem); return 0; } // True linear address = DJGPP pointer + DS base __dpmi_set_segment_base_address(sel, ptrVal + __djgpp_base_address); __dpmi_set_segment_limit(sel, size - 1); __dpmi_set_descriptor_access_rights(sel, 0x00F2); // 16-bit data RW *linearOut = ptrVal; return (uint16_t)sel; } static void free16BitBlock(uint16_t sel, uint32_t linear) { if (sel) { __dpmi_free_ldt_descriptor(sel); } if (linear) { free((void *)linear); } } static void setError(int32_t err) { gLastError = err; } static void waitForEngine(void) { if (!gIsS3) { return; } // Wait for the S3 graphics engine to become idle by polling GP_STAT. // Bit 9 (0x0200) = hardware busy. for (int i = 0; i < 100000; i++) { uint16_t stat = inportw(0x9AE8); if (!(stat & 0x0200)) { break; } } } // Declared in file-scope asm above extern void int10hRawHandler(void); static bool installInt10hReflector(void) { // Save DJGPP's DS selector for the assembly stub. // The stub uses CS-relative addressing to load this value since // DS is undefined on PM interrupt handler entry. gInt10hDsSel = _my_ds(); gInt10hStackTop = (uint32_t)gInt10hStack + sizeof(gInt10hStack); __dpmi_get_protected_mode_interrupt_vector(0x10, &gOldInt10hVec); __dpmi_paddr newVec; newVec.offset32 = (unsigned long)int10hRawHandler; newVec.selector = _my_cs(); if (__dpmi_set_protected_mode_interrupt_vector(0x10, &newVec) != 0) { return false; } gInt10hInstalled = true; return true; } static void removeInt10hReflector(void) { if (gInt10hInstalled) { __dpmi_set_protected_mode_interrupt_vector(0x10, &gOldInt10hVec); gInt10hInstalled = false; } } static bool installDpmi300Proxy(void) { gDpmi300DsSel = _my_ds(); gDpmi300StackTop = (uint32_t)gDpmi300Stack + sizeof(gDpmi300Stack); __dpmi_get_protected_mode_interrupt_vector(DPMI300_INT_NUM, &gOldDpmi300Vec); __dpmi_paddr newVec; newVec.offset32 = (unsigned long)dpmi300RawHandler; newVec.selector = _my_cs(); if (__dpmi_set_protected_mode_interrupt_vector(DPMI300_INT_NUM, &newVec) != 0) { return false; } gDpmi300Installed = true; dbg("windrv: DPMI 300h proxy installed on INT %02Xh\n", DPMI300_INT_NUM); return true; } // Search a loaded driver's code segments for the DoInt10h INT 31h instruction // and patch it to use our proxy interrupt instead. DoInt10h builds a RMCS on // the stack and then does: // mov ax, 0300h ; B8 00 03 // ... // int 31h ; CD 31 // We find "CD 31" within a small window after "B8 00 03" and change the 0x31 // to DPMI300_INT_NUM (0x64). static bool patchDoInt10h(struct WdrvDriverS *drv) { bool patched = false; for (int s = 0; s < drv->neMod.segmentCount; s++) { if (!drv->neMod.segments[s].isCode) { continue; } uint16_t sel = drv->neMod.segments[s].selector; uint32_t lin = drv->neMod.segments[s].linearAddr; uint32_t size = drv->neMod.segments[s].size; // Scan for "B8 00 03" (mov ax, 0300h) for (uint32_t i = 0; i + 2 < size; i++) { uint8_t b0 = *(uint8_t *)(lin + i); uint8_t b1 = *(uint8_t *)(lin + i + 1); uint8_t b2 = *(uint8_t *)(lin + i + 2); if (b0 != 0xB8 || b1 != 0x00 || b2 != 0x03) { continue; } // Found "mov ax, 0300h" at offset i. Search ahead for "CD 31". uint32_t searchEnd = i + 24; if (searchEnd > size - 1) { searchEnd = size - 1; } for (uint32_t j = i + 3; j + 1 <= searchEnd; j++) { uint8_t c0 = *(uint8_t *)(lin + j); uint8_t c1 = *(uint8_t *)(lin + j + 1); if (c0 == 0xCD && c1 == 0x31) { // Create a data alias for the code segment so we can write uint16_t dataSel = __dpmi_create_alias_descriptor(sel); if (dataSel == 0) { logErr("windrv: patchDoInt10h: cannot create alias for seg %d\n", s); break; } // Patch 0x31 -> DPMI300_INT_NUM _farpokeb(dataSel, j + 1, DPMI300_INT_NUM); // Verify uint8_t verify = _farpeekb(sel, j + 1); dbg("windrv: patched INT 31h -> INT %02Xh at seg%d:%04" PRIX32 " (verify: %02X)\n", DPMI300_INT_NUM, s + 1, j, verify); __dpmi_free_ldt_descriptor(dataSel); patched = true; break; } } } } if (!patched) { dbg("windrv: patchDoInt10h: no INT 31h found after MOV AX,0300h\n"); } return patched; } // Patch hardcoded "mov ax, 0040h; mov es, ax" in driver code segments. // // physical_enable in VGA.ASM loads ES with the literal value 0x0040 to // access the BIOS data area. In real Windows 3.1, selector 0x0040 either // maps to 0040:0000 or is trapped by the VDD. Under CWSDPMI, 0x0040 is // an invalid ring-0 GDT selector that causes a GPF. // // We scan for the byte pattern B8 40 00 8E C0 (mov ax,0040h; mov es,ax) // and patch the immediate to our biosDataSel from the stub context. static bool patchBiosDataAccess(struct WdrvDriverS *drv) { uint16_t biosSel = gStubCtx.biosDataSel; if (biosSel == 0) { logErr("windrv: patchBiosDataAccess: no biosDataSel\n"); return false; } bool patched = false; for (int s = 0; s < drv->neMod.segmentCount; s++) { if (!drv->neMod.segments[s].isCode) { continue; } uint16_t sel = drv->neMod.segments[s].selector; uint32_t lin = drv->neMod.segments[s].linearAddr; uint32_t size = drv->neMod.segments[s].size; for (uint32_t i = 0; i + 4 < size; i++) { uint8_t *p = (uint8_t *)(lin + i); // B8 40 00 8E C0 = mov ax, 0040h; mov es, ax if (p[0] == 0xB8 && p[1] == 0x40 && p[2] == 0x00 && p[3] == 0x8E && p[4] == 0xC0) { uint16_t dataSel = __dpmi_create_alias_descriptor(sel); if (dataSel == 0) { logErr("windrv: patchBiosDataAccess: cannot create alias for seg %d\n", s); break; } _farpokeb(dataSel, i + 1, (uint8_t)(biosSel & 0xFF)); _farpokeb(dataSel, i + 2, (uint8_t)(biosSel >> 8)); uint8_t v0 = _farpeekb(sel, i + 1); uint8_t v1 = _farpeekb(sel, i + 2); logErr("windrv: patched mov ax,0040h -> mov ax,%04Xh at seg%d:%04" PRIX32 " (verify: %02X %02X)\n", biosSel, s + 1, i, v0, v1); __dpmi_free_ldt_descriptor(dataSel); patched = true; } } } if (!patched) { dbg("windrv: patchBiosDataAccess: pattern not found (OK for some drivers)\n"); } return patched; } // Repatch __WINFLAGS in all driver segments. // // The NE loader patches __WINFLAGS (KERNEL.178) into the driver's code/data // segments at relocation time. After Enable(style=1) reveals the driver type, // we may need to change WF_ENHANCED to WF_STANDARD for VGA-class drivers // whose Enable(style=0) hangs waiting for a VDD that doesn't exist. // // We scan all segments for the 16-bit word pattern and replace it. static void patchWinFlags(struct WdrvDriverS *drv, uint16_t oldFlags, uint16_t newFlags) { if (oldFlags == newFlags) { return; } uint8_t oldLo = (uint8_t)(oldFlags & 0xFF); uint8_t oldHi = (uint8_t)(oldFlags >> 8); uint8_t newLo = (uint8_t)(newFlags & 0xFF); uint8_t newHi = (uint8_t)(newFlags >> 8); int count = 0; for (int s = 0; s < drv->neMod.segmentCount; s++) { uint16_t sel = drv->neMod.segments[s].selector; uint32_t lin = drv->neMod.segments[s].linearAddr; uint32_t size = drv->neMod.segments[s].size; bool isCode = drv->neMod.segments[s].isCode; if (size < 2) { continue; } // Need a writable alias for code segments uint16_t dataSel = 0; if (isCode) { dataSel = __dpmi_create_alias_descriptor(sel); if (dataSel == 0) { continue; } } for (uint32_t i = 0; i + 1 < size; i++) { uint8_t *p = (uint8_t *)(lin + i); if (p[0] == oldLo && p[1] == oldHi) { if (isCode) { _farpokeb(dataSel, i, newLo); _farpokeb(dataSel, i + 1, newHi); } else { p[0] = newLo; p[1] = newHi; } count++; } } if (dataSel != 0) { __dpmi_free_ldt_descriptor(dataSel); } } if (count > 0) { dbg("windrv: patched %d __WINFLAGS locations: 0x%04X -> 0x%04X\n", count, oldFlags, newFlags); } } static void removeDpmi300Proxy(void) { if (gDpmi300Installed) { __dpmi_set_protected_mode_interrupt_vector(DPMI300_INT_NUM, &gOldDpmi300Vec); gDpmi300Installed = false; } } // Declared in file-scope asm above extern void exc0dRawHandler(void); extern void exc0eRawHandler(void); static bool installExceptionCapture(void) { // Initialize fault handler stack gFaultStackTop = (uint32_t)gFaultStack + sizeof(gFaultStack); // Get old exception handlers __dpmi_get_processor_exception_handler_vector(0x0D, &gOldExc0D); __dpmi_get_processor_exception_handler_vector(0x0E, &gOldExc0E); // Copy to packed far pointers for asm indirect far jumps gOldExc0dFar.offset = (uint32_t)gOldExc0D.offset32; gOldExc0dFar.selector = (uint16_t)gOldExc0D.selector; gOldExc0eFar.offset = (uint32_t)gOldExc0E.offset32; gOldExc0eFar.selector = (uint16_t)gOldExc0E.selector; // Install our handlers __dpmi_paddr newVec; newVec.selector = _my_cs(); newVec.offset32 = (unsigned long)exc0dRawHandler; if (__dpmi_set_processor_exception_handler_vector(0x0D, &newVec) != 0) { return false; } newVec.offset32 = (unsigned long)exc0eRawHandler; if (__dpmi_set_processor_exception_handler_vector(0x0E, &newVec) != 0) { __dpmi_set_processor_exception_handler_vector(0x0D, &gOldExc0D); return false; } gExcCaptureInstalled = true; return true; } static void removeExceptionCapture(void) { if (gExcCaptureInstalled) { __dpmi_set_processor_exception_handler_vector(0x0D, &gOldExc0D); __dpmi_set_processor_exception_handler_vector(0x0E, &gOldExc0E); gExcCaptureInstalled = false; } } static bool installInt2FhHandler(void) { __dpmi_paddr oldVec; __dpmi_get_protected_mode_interrupt_vector(0x2F, &oldVec); gOldInt2FhVec = oldVec; gOldInt2FhFar.offset = oldVec.offset32; gOldInt2FhFar.selector = oldVec.selector; __dpmi_paddr newVec; newVec.offset32 = (unsigned long)int2FhRawHandler; newVec.selector = _my_cs(); if (__dpmi_set_protected_mode_interrupt_vector(0x2F, &newVec) != 0) { return false; } gInt2FhInstalled = true; return true; } static void removeInt2FhHandler(void) { if (gInt2FhInstalled) { __dpmi_set_protected_mode_interrupt_vector(0x2F, &gOldInt2FhVec); gInt2FhInstalled = false; } } static void dbg(const char *fmt, ...) { if (!gDebug) { return; } va_list ap; va_start(ap, fmt); logErrV(fmt, ap); va_end(ap); } // Patch Windows PROLOG_0 sequences in all code segments. // // The Windows 3.x module loader converts the 3-byte function prolog // 8C D8 90 (mov ax, ds ; nop) // to // B8 xx xx (mov ax, ) // // This ensures AX holds the correct DGROUP selector when the function // body executes "push ds ; mov ds, ax" for FAR entry. // // However, NEAR calls enter at offset+3 (skipping the mov ax), so AX // may be clobbered. Since DS is always DGROUP at both entry paths // (the relay sets it for far calls, the caller preserves it for near // calls), the "mov ds, ax" is redundant. We NOP it out so the // function simply does "push ds" (saving DGROUP for the epilog) and // continues with DS already correct. // // Full original 10-byte prolog: // 8C D8 90 mov ax, ds ; nop offset+0 (far entry) // 45 inc bp offset+3 (near entry) // 55 push bp offset+4 // 8B EC mov bp, sp offset+5 // 1E push ds offset+7 // 8E D8 mov ds, ax offset+8 // // Patched: // B8 xx xx mov ax, DGROUP offset+0 (for far entry AX) // 45 inc bp offset+3 // 55 push bp offset+4 // 8B EC mov bp, sp offset+5 // 1E push ds offset+7 // 90 90 nop ; nop offset+8 (DS already correct) // Patch Win16 PROLOG_0/PROLOG_1 function prologs and their matching epilogs. // // Win16 PROLOG_0 functions use `inc bp` to mark far frames for stack walking // and `dec bp` in the epilog to undo it. The Windows kernel needs these odd // BP markers for stack traversal and memory management, but our DOS environment // has no such requirement. Leaving them in causes frame pointer corruption // when the odd BP propagates through the call chain. // // Prolog pattern (two variants): // 8C D8 90 45 55 8B EC [1E 8E D8] mov ax,ds; nop; inc bp; push bp; mov bp,sp; [push ds; mov ds,ax] // B8 XX XX 45 55 8B EC [1E 8E D8] mov ax,IMMED; inc bp; push bp; mov bp,sp; [push ds; mov ds,ax] // // Epilog pattern: // 5D 4D CB pop bp; dec bp; retf // 5D 4D C3 pop bp; dec bp; ret // // Patches applied: // - 8C D8 90 → B8 DGROUP_LO DGROUP_HI (load correct DGROUP selector) // - 45 → 90 (NOP out inc bp) // - 8E D8 → 90 90 (NOP out mov ds,ax — DS already set by thunk) // - 4D → 90 in epilog (NOP out dec bp) static void patchPrologs(NeModuleT *mod) { uint16_t dgroupSel = mod->autoDataSel; int prologCount = 0; int epilogCount = 0; for (int s = 0; s < mod->segmentCount; s++) { if (!mod->segments[s].isCode) { continue; } uint8_t *base = (uint8_t *)mod->segments[s].linearAddr; uint32_t size = mod->segments[s].size; // Pass 1: Patch prologs — find "45 55 8B EC" (inc bp; push bp; mov bp,sp) for (uint32_t i = 0; i + 3 < size; i++) { if (base[i] != 0x45 || base[i + 1] != 0x55 || base[i + 2] != 0x8B || base[i + 3] != 0xEC) { continue; } // NOP out inc bp base[i] = 0x90; prologCount++; // If preceded by "8C D8 90" (mov ax,ds; nop), patch to mov ax,DGROUP if (i >= 3 && base[i - 3] == 0x8C && base[i - 2] == 0xD8 && base[i - 1] == 0x90) { base[i - 3] = 0xB8; base[i - 2] = (uint8_t)(dgroupSel & 0xFF); base[i - 1] = (uint8_t)(dgroupSel >> 8); } // "1E 8E D8" (push ds; mov ds,ax) must be kept intact! // The driver expects DS = DGROUP for all DS-relative data access. // Do NOT NOP these out. } // Pass 2: Patch epilogs — find "5D 4D" followed by any return: // CB = retf, C3 = ret, CA xx xx = retf N, C2 xx xx = ret N // Pascal calling convention uses retf N (CA) to clean parameters, // so most epilogs are "5D 4D CA xx xx", not "5D 4D CB". for (uint32_t i = 0; i + 2 < size; i++) { if (base[i] == 0x5D && base[i + 1] == 0x4D && (base[i + 2] == 0xCB || base[i + 2] == 0xC3 || base[i + 2] == 0xCA || base[i + 2] == 0xC2)) { base[i + 1] = 0x90; epilogCount++; } } } dbg("windrv: patched %d prologs, %d epilogs (DGROUP=0x%04X)\n", prologCount, epilogCount, dgroupSel); } // Patch VFLATD initialization code to avoid a 20-byte stack imbalance. // // The VFLATD init code at seg5:0x2368 is a subroutine (no prolog, near ret // at 0x252B) called from the mode setup function. It allocates DOS memory // via GlobalDOSAlloc/GlobalAlloc/GlobalLock/GetCurrentPDB, pushing 20 bytes // of intermediate values onto the stack. All exit paths converge at 0x2519 // (GlobalFree) -> 0x2522 (SetSwapAreaSize) -> 0x252B (ret) WITHOUT cleaning // these 20 bytes. // // In real Windows 3.x the caller at 0x3613 restores SP from BP, so the // imbalance is harmless. But our thunk returns via a clean `ret`, which // pops 0x2362 (junk) instead of the real return address 0x3613, landing // in the middle of a `lea sp,[bp-2]` instruction -> SIGILL. // // There are TWO entry points to this init code: // 0x22C5: wrapper function that checks [0EE9] and proceeds with init // 0x2368: direct entry from mode setup (after VBE mode set) // // Fix: patch BOTH to C3 (near ret) so neither path executes VFLATD init. // With LFB mode forced via DPMI (patchVflatdBypassCall), VFLATD setup // is unnecessary. static void patchVflatdStackBug(NeModuleT *mod) { int segIdx = -1; for (int s = 0; s < mod->segmentCount; s++) { if (mod->segments[s].isCode && mod->segments[s].size > 0x2369) { uint8_t *base = (uint8_t *)mod->segments[s].linearAddr; // Verify the call at 0x02A4 targets 0x22C5: E8 1E 20 if (base[0x02A4] == 0xE8 && base[0x02A5] == 0x1E && base[0x02A6] == 0x20) { segIdx = s; break; } } } if (segIdx < 0) { dbg("windrv: VFLATD init patch: pattern not found, skipping\n"); return; } uint8_t *base = (uint8_t *)mod->segments[segIdx].linearAddr; // Patch wrapper function at 0x22C5 to immediate return base[0x22C5] = 0xC3; dbg("windrv: patched VFLATD init wrapper at seg %d offset 0x22C5 (ret)\n", segIdx); // Patch direct entry at 0x2368 (C6 06 A4 49 00 = mov byte [49A4],0) if (base[0x2368] == 0xC6 && base[0x2369] == 0x06 && base[0x236A] == 0xA4 && base[0x236B] == 0x49) { base[0x2368] = 0xC3; dbg("windrv: patched VFLATD init direct entry at seg %d offset 0x2368 (ret)\n", segIdx); } else { dbg("windrv: VFLATD init direct entry at 0x2368: unexpected bytes, skipping\n"); } } // Bypass the VFLATD API call at seg5:0x3FD4. // // The driver checks [DS:8889] to decide between two framebuffer paths: // [8889] == 0xFF: DPMI path (allocate descriptor, map physical via INT 31h) // [8889] != 0xFF: VFLATD path (call far through [DS:0D76]) // // Since VFLATD is not available, the far pointer at [0D76] is null, causing // a GPF. Force the DPMI path by patching the conditional jump to unconditional. // // Original at 0x3FA9: 80 3E 89 88 FF 74 32 (cmp byte [8889],0xFF; jz +0x32) // Patched: EB 37 90 90 90 90 90 (jmp +0x37; nop*5) // // Both reach 0x3FE2 which uses DPMI INT 31h functions 0800h/0007h/0008h // to map the physical framebuffer — fully supported by CWSDPMI. static void patchVflatdBypassCall(NeModuleT *mod) { int segIdx = -1; for (int s = 0; s < mod->segmentCount; s++) { if (mod->segments[s].isCode && mod->segments[s].size > 0x3FB0) { uint8_t *base = (uint8_t *)mod->segments[s].linearAddr; if (base[0x3FA9] == 0x80 && base[0x3FAA] == 0x3E && base[0x3FAB] == 0x89 && base[0x3FAC] == 0x88 && base[0x3FAD] == 0xFF && base[0x3FAE] == 0x74 && base[0x3FAF] == 0x32) { segIdx = s; break; } } } if (segIdx < 0) { dbg("windrv: VFLATD bypass patch: pattern not found, skipping\n"); return; } uint8_t *base = (uint8_t *)mod->segments[segIdx].linearAddr; // 0x3FA9: EB 37 jmp 0x3FE2 (unconditional -> DPMI path) // 0x3FAB: 90*5 nop padding base[0x3FA9] = 0xEB; base[0x3FAA] = 0x37; base[0x3FAB] = 0x90; base[0x3FAC] = 0x90; base[0x3FAD] = 0x90; base[0x3FAE] = 0x90; base[0x3FAF] = 0x90; dbg("windrv: patched VFLATD bypass at seg %d offset 0x3FA9\n", segIdx); // NOP all "call far [DS:0D76]" (FF 1E 76 0D) in the code segment. // These call through the VFLATD entry point which is null since VFLATD // isn't present. With LFB mode via DPMI, bank switching is unnecessary. uint32_t segSize = mod->segments[segIdx].size; int nopCount = 0; for (uint32_t i = 0; i + 3 < segSize; i++) { if (base[i] == 0xFF && base[i + 1] == 0x1E && base[i + 2] == 0x76 && base[i + 3] == 0x0D) { base[i] = 0x90; base[i + 1] = 0x90; base[i + 2] = 0x90; base[i + 3] = 0x90; dbg("windrv: NOPed VFLATD call at seg %d offset 0x%04" PRIX32 "\n", segIdx, i); nopCount++; } } dbg("windrv: NOPed %d VFLATD call(s) total\n", nopCount); }