WinDriver/win31drv/windrv.c
2026-02-21 18:01:54 -06:00

3301 lines
114 KiB
C

// ============================================================================
// windrv.c - Main driver interface
//
// Implements the public windrv.h API by coordinating the NE loader,
// thunking layer, and Windows API stubs to load and use Windows 3.x
// display drivers from DOS programs compiled with DJGPP.
// ============================================================================
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <inttypes.h>
#include <pc.h>
#include <dpmi.h>
#include <go32.h>
#include <sys/nearptr.h>
#include <sys/movedata.h>
#include <sys/farptr.h>
#include "windrv.h"
#include "wintypes.h"
#include "winddi.h"
#include "neformat.h"
#include "neload.h"
#include "thunk.h"
#include "winstub.h"
#include "log.h"
// ============================================================================
// Driver instance structure (opaque handle)
// ============================================================================
struct WdrvDriverS {
NeModuleT neMod;
char filePath[256];
// DDI entry point addresses (16-bit selector:offset)
struct {
uint16_t sel;
uint16_t off;
bool present;
} ddiEntry[DDI_MAX_ORDINAL];
// Device info from Enable (style=0) call
GdiInfo16T gdiInfo;
bool gdiInfoValid;
// GDI objects embedded within DGROUP.
// Windows 3.x drivers expect all GDI objects (PDEVICE, brush,
// drawMode) to share the same segment, because in Win3.1 they
// are all in the global GDI heap. When the driver does e.g.
// "lds si, lpBrush" it expects DS to still cover DGROUP.
// We achieve this by allocating objects at offsets within the
// DGROUP segment, so every far pointer uses autoDataSel.
uint32_t dgroupObjBase; // Start offset of object area in DGROUP
// Physical device structure (within DGROUP)
uint16_t pdevOff; // Offset within DGROUP
uint32_t pdevLinear; // Linear address for C access
uint32_t pdevSize; // Allocated size
// Logical brush (within DGROUP, input to RealizeObject)
uint16_t logBrushOff;
uint32_t logBrushLinear;
// Physical brush (within DGROUP, output of RealizeObject)
uint16_t brushOff;
uint32_t brushLinear;
uint32_t brushRealizedColor; // Color of last realized brush
bool brushRealized;
// Logical pen (within DGROUP, input to RealizeObject)
uint16_t logPenOff;
uint32_t logPenLinear;
// Physical pen (within DGROUP, output of RealizeObject)
uint16_t penOff;
uint32_t penLinear;
uint32_t penRealizedColor;
bool penRealized;
// Physical color (within DGROUP, output of ColorInfo)
uint16_t physColorOff;
uint32_t physColorLinear;
// Draw mode (within DGROUP)
uint16_t drawModeOff;
uint32_t drawModeLinear;
// Current state
bool enabled;
uint32_t currentColor;
// Video RAM mapping
void *vramPtr;
uint32_t vramPhysAddr;
uint32_t vramSize;
uint32_t vramLinear;
int32_t pitch;
// Display Y offset: the S3 driver writes an 8x8 color brush pattern
// to a fixed VRAM location (~(144,1)-(151,8)) during dithered fills.
// We shift the CRTC display start down by this many scanlines so the
// scratch area is off-screen, and add the offset to all Y coordinates.
int16_t dispYOffset;
bool isS3;
};
// ============================================================================
// Global state
// ============================================================================
static ThunkContextT gThunkCtx;
static StubContextT gStubCtx;
static bool gInitialized = false;
static int32_t gLastError = WDRV_OK;
static bool gDebug = false;
static bool gIsS3 = false;
// Forward declarations
static FarPtr16T importResolver(const char *moduleName, uint16_t ordinal, const char *funcName);
static bool resolveDriverEntries(struct WdrvDriverS *drv);
static bool extendDgroupForObjects(struct WdrvDriverS *drv);
static bool allocPDevice(struct WdrvDriverS *drv);
static bool allocDrawMode(struct WdrvDriverS *drv);
static bool allocBrushBuffers(struct WdrvDriverS *drv);
static bool allocPenBuffers(struct WdrvDriverS *drv);
static bool realizeBrush(struct WdrvDriverS *drv, uint32_t color);
static bool realizePen(struct WdrvDriverS *drv, uint32_t color);
static uint32_t colorToPhys(struct WdrvDriverS *drv, uint32_t colorRef);
static void setDisplayStart(struct WdrvDriverS *drv, uint32_t byteOffset);
static void freeDrawObjects(struct WdrvDriverS *drv);
static uint16_t alloc16BitBlock(uint32_t size, uint32_t *linearOut);
static void free16BitBlock(uint16_t sel, uint32_t linear);
static void setError(int32_t err);
static void waitForEngine(void);
static void dbg(const char *fmt, ...);
static void patchPrologs(NeModuleT *mod);
static void patchVflatdStackBug(NeModuleT *mod);
static void patchVflatdBypassCall(NeModuleT *mod);
static bool installInt10hReflector(void);
static void removeInt10hReflector(void);
static bool installDpmi300Proxy(void);
static void removeDpmi300Proxy(void);
static bool patchDoInt10h(struct WdrvDriverS *drv);
static bool patchBiosDataAccess(struct WdrvDriverS *drv);
static void patchWinFlags(struct WdrvDriverS *drv, uint16_t oldFlags, uint16_t newFlags);
static bool installInt2FhHandler(void);
static void removeInt2FhHandler(void);
static bool installExceptionCapture(void);
static void removeExceptionCapture(void);
// ============================================================================
// INT 10h (Video BIOS) reflector
//
// Win 3.x display drivers call INT 10h for video mode setting and BIOS
// queries. In protected mode, these calls won't reach the real-mode BIOS
// unless we intercept them and use DPMI to simulate a real-mode interrupt.
// ============================================================================
static __dpmi_paddr gOldInt10hVec;
static bool gInt10hInstalled = false;
// Globals for the raw INT 10h handler assembly stub.
// Non-static so the asm symbols (prefixed with _) are accessible.
uint16_t gInt10hDsSel; // DJGPP DS selector
uint32_t gInt10hSavedSS; // Interrupted SS
uint32_t gInt10hSavedESP; // Interrupted ESP
uint32_t gInt10hSavedFS; // Interrupted FS
uint8_t gInt10hStack[4096] __attribute__((aligned(16))); // Handler stack
uint32_t gInt10hStackTop; // Top of handler stack
// Diagnostic: count INT 10h handler entries
volatile uint32_t gInt10hEntryCount = 0;
volatile uint32_t gInt10hLastSS = 0;
volatile uint32_t gInt10hLastESP = 0;
// ============================================================================
// Exception capture - captures primary fault CS:EIP before DJGPP's handler
// (which may itself crash handling exceptions from 16-bit code).
//
// DPMI 0.9 exception frame on the locked exception stack:
// ESP+0x00: Return EIP (to DPMI host, for RETF)
// ESP+0x04: Return CS
// ESP+0x08: Error code
// ESP+0x0C: Faulting EIP
// ESP+0x10: Faulting CS
// ESP+0x14: Faulting EFLAGS
// ESP+0x18: Faulting ESP
// ESP+0x1C: Faulting SS
// ============================================================================
volatile uint32_t gFaultCaptured = 0;
volatile uint32_t gFaultNum = 0;
volatile uint32_t gFaultErr = 0;
volatile uint32_t gFaultEIP = 0;
volatile uint32_t gFaultCS = 0;
volatile uint32_t gFaultESP = 0;
volatile uint32_t gFaultSS = 0;
volatile uint32_t gFaultEAX = 0;
volatile uint32_t gFaultEBX = 0;
volatile uint32_t gFaultECX = 0;
volatile uint32_t gFaultEDX = 0;
volatile uint32_t gFaultESI = 0;
volatile uint32_t gFaultEDI = 0;
volatile uint32_t gFaultEBP = 0;
volatile uint32_t gFaultDS = 0;
volatile uint32_t gFaultES = 0;
uint8_t gFaultStack[4096] __attribute__((aligned(16)));
uint32_t gFaultStackTop;
// Packed 48-bit far pointers for chaining to old exception handlers.
// ljmp indirect reads offset32 + selector16 (6 bytes), so no padding allowed.
typedef struct __attribute__((packed)) {
uint32_t offset;
uint16_t selector;
} FarPtr48T;
FarPtr48T gOldExc0dFar;
FarPtr48T gOldExc0eFar;
static __dpmi_paddr gOldExc0D;
static __dpmi_paddr gOldExc0E;
static bool gExcCaptureInstalled = false;
static __dpmi_paddr gOldInt2FhVec;
FarPtr48T gOldInt2FhFar;
static bool gInt2FhInstalled = false;
// Saved register state for the raw INT 10h handler.
// Layout matches the save/restore sequence in the assembly stub.
typedef struct __attribute__((packed)) {
uint32_t edi; // +0
uint32_t esi; // +4
uint32_t ebp; // +8
uint32_t _reserved; // +12 (alignment padding)
uint32_t ebx; // +16
uint32_t edx; // +20
uint32_t ecx; // +24
uint32_t eax; // +28
uint32_t es; // +32 (zero-extended from 16-bit)
uint32_t ds; // +36 (zero-extended from 16-bit)
uint32_t eip; // +40 (from IRET frame)
uint32_t cs; // +44 (from IRET frame)
uint32_t eflags; // +48 (from IRET frame)
} Int10FrameT;
// Non-static so the asm symbol _gInt10Frame is accessible.
Int10FrameT gInt10Frame;
// Worker function called from the assembly stub.
// Non-static so the asm symbol _int10hWorker is accessible.
void int10hWorker(Int10FrameT *frame)
{
__dpmi_regs rRegs;
memset(&rRegs, 0, sizeof(rRegs));
uint16_t func = (uint16_t)frame->eax;
// NOTE: No dbg()/logErr() here — file I/O from the INT 10h handler corrupts
// callback state (observed: GlobalDOSAlloc params garbled after 4F15h stub).
rRegs.x.ax = (uint16_t)frame->eax;
rRegs.x.bx = (uint16_t)frame->ebx;
rRegs.x.cx = (uint16_t)frame->ecx;
rRegs.x.dx = (uint16_t)frame->edx;
rRegs.x.si = (uint16_t)frame->esi;
rRegs.x.di = (uint16_t)frame->edi;
rRegs.x.bp = (uint16_t)frame->ebp;
// VBE Set Mode: translate S3 OEM modes to VESA standard modes.
if (func == 0x4F02) {
uint16_t origBX = rRegs.x.bx;
uint16_t modeNum = origBX & 0x3FFF;
uint16_t flags = origBX & 0xC000;
uint16_t vesaMode = modeNum;
switch (modeNum) {
case 0x0201: vesaMode = 0x0101; break; // 640x480x256
case 0x0202: vesaMode = 0x0103; break; // 800x600x256
case 0x0203: vesaMode = 0x0103; break; // 800x600x256
case 0x0204: vesaMode = 0x0105; break; // 1024x768x256
case 0x0205: vesaMode = 0x0105; break; // 1024x768x256
}
if (vesaMode != modeNum) {
rRegs.x.bx = flags | vesaMode;
logErr("INT10: VBE mode 0x%04X -> 0x%04X (S3 OEM -> VESA)\n",
origBX, rRegs.x.bx);
}
}
// Stub out VBE functions we cannot support.
if (func == 0x4F0A || func == 0x4F15) {
frame->eax = (frame->eax & 0xFFFF0000) | 0x0100;
return;
}
// ================================================================
// Translate ES for real-mode reflection.
//
// The driver's ES is a PM selector. Real-mode INT 10h expects a
// real-mode paragraph segment. If ES points to conventional memory
// (<1MB), compute the real-mode segment directly. If ES points to
// extended memory (>=1MB), bounce through the DOS transfer buffer.
//
// Only specific sub-functions use ES as a buffer pointer. Each
// function family uses a different offset register:
// VBE 4Fxx: ES:DI
// AH=10h: ES:DX (palette)
// AH=11h: ES:BP (font data)
// AH=1Bh: ES:DI (state info)
// ================================================================
uint16_t pmES = (uint16_t)frame->es;
bool useTB = false;
uint32_t tb = 0;
uint32_t copySize = 0;
bool copyIn = false; // PM -> transfer buffer before INT
bool copyOut = false; // transfer buffer -> PM after INT
// Identify which offset register this function uses, and determine
// the exact copy size and direction. pmOff holds the PM-side
// offset from the appropriate register; offReg identifies which
// real-mode register to update after translation.
// 0 = DI, 1 = DX, 2 = BP
uint16_t pmOff = 0;
int offReg = 0;
bool needsES = false;
uint8_t ah = (uint8_t)(func >> 8);
uint8_t al = (uint8_t)(func & 0xFF);
if ((func & 0xFF00) == 0x4F00) {
// VBE functions — ES:DI
offReg = 0;
pmOff = rRegs.x.di;
if (al == 0x00) {
needsES = true; copyIn = true; copyOut = true; copySize = 512;
} else if (al == 0x01) {
needsES = true; copyOut = true; copySize = 256;
} else if (al == 0x04) {
needsES = true; copyIn = true; copyOut = true; copySize = 1024;
} else if (al == 0x09) {
needsES = true; copyIn = true;
copySize = rRegs.x.cx * 4;
if (copySize > 4096) {
copySize = 4096;
}
}
} else if (ah == 0x10) {
// Palette functions — ES:DX
offReg = 1;
pmOff = rRegs.x.dx;
if (al == 0x02) {
// Set All Palette Registers: 17 bytes (16 regs + overscan)
needsES = true; copyIn = true; copySize = 17;
} else if (al == 0x09) {
// Read All Palette Registers: 17 bytes
needsES = true; copyOut = true; copySize = 17;
} else if (al == 0x12) {
// Set Block of DAC Color Registers: CX * 3 bytes
needsES = true; copyIn = true;
copySize = rRegs.x.cx * 3;
if (copySize > 4096) {
copySize = 4096;
}
} else if (al == 0x17) {
// Read Block of DAC Color Registers: CX * 3 bytes
needsES = true; copyOut = true;
copySize = rRegs.x.cx * 3;
if (copySize > 4096) {
copySize = 4096;
}
}
} else if (ah == 0x11) {
// Character generator — ES:BP
offReg = 2;
pmOff = rRegs.x.bp;
if (al == 0x00 || al == 0x10) {
// Load User Font: CX chars * BH bytes/char
needsES = true; copyIn = true;
copySize = rRegs.x.cx * (rRegs.x.bx >> 8);
if (copySize > 8192) {
copySize = 8192;
}
}
// AL=20/21 set interrupt vectors to ES:BP — the address must
// point at resident data, not a temporary buffer, so skip.
} else if (ah == 0x1B) {
// Functionality/State Info — ES:DI, 64-byte buffer
offReg = 0;
pmOff = rRegs.x.di;
needsES = true; copyOut = true; copySize = 64;
}
if (pmES != 0 && needsES && copySize > 0) {
unsigned long esBase;
__dpmi_get_segment_base_address(pmES, &esBase);
if (esBase < 0x100000) {
// Conventional memory: compute real-mode ES + offset directly.
uint32_t linear = esBase + pmOff;
rRegs.x.es = (uint16_t)(linear >> 4);
uint16_t rmOff = (uint16_t)(linear & 0x0F);
if (offReg == 0) {
rRegs.x.di = rmOff;
} else if (offReg == 1) {
rRegs.x.dx = rmOff;
} else {
rRegs.x.bp = rmOff;
}
} else {
// Extended memory: bounce through the DOS transfer buffer.
tb = _go32_info_block.linear_address_of_transfer_buffer;
if (copyIn) {
movedata(pmES, pmOff, _dos_ds, tb, copySize);
}
rRegs.x.es = (uint16_t)(tb >> 4);
uint16_t rmOff = (uint16_t)(tb & 0x0F);
if (offReg == 0) {
rRegs.x.di = rmOff;
} else if (offReg == 1) {
rRegs.x.dx = rmOff;
} else {
rRegs.x.bp = rmOff;
}
useTB = true;
}
}
__dpmi_simulate_real_mode_interrupt(0x10, &rRegs);
if (useTB && copyOut) {
movedata(_dos_ds, tb, pmES, pmOff, copySize);
}
// Update return registers.
frame->eax = (frame->eax & 0xFFFF0000) | rRegs.x.ax;
frame->ebx = (frame->ebx & 0xFFFF0000) | rRegs.x.bx;
frame->ecx = (frame->ecx & 0xFFFF0000) | rRegs.x.cx;
frame->edx = (frame->edx & 0xFFFF0000) | rRegs.x.dx;
frame->esi = (frame->esi & 0xFFFF0000) | rRegs.x.si;
frame->ebp = (frame->ebp & 0xFFFF0000) | rRegs.x.bp;
frame->eflags = (frame->eflags & 0xFFFF0000) | rRegs.x.flags;
if (!needsES) {
// No ES translation was done — pass through real-mode DI
frame->edi = (frame->edi & 0xFFFF0000) | rRegs.x.di;
}
// Log VBE failures
if ((func & 0xFF00) == 0x4F00) {
uint16_t retAX = (uint16_t)frame->eax;
if (retAX != 0x004F) {
logErr("INT10: VBE func %04X returned AX=%04X (FAILED)\n",
func, retAX);
}
}
}
// Raw INT 10h handler stub in assembly.
//
// The _go32_dpmi_allocate_iret_wrapper mechanism fails when an interrupt
// fires during 16-bit code execution — software interrupts are dispatched
// on the CURRENT stack (DPMI spec), so the wrapper tries to build its
// _go32_dpmi_registers structure on the 16-bit stack with a different
// SS base, producing an invalid pointer (observed: regs=0x7a2, page fault).
//
// This handler avoids the problem by:
// 1. Saving ALL registers to a global structure using CS-relative
// addressing (CS base == DS base in DJGPP)
// 2. Switching SS:ESP to a dedicated 32-bit handler stack in DJGPP's
// data segment (so SS base == DS base, safe for C library calls)
// 3. Calling the C worker function
// 4. Restoring SS:ESP and all registers from the global structure
// 5. Returning via IRET
//
// NOT re-entrant — uses global state. Acceptable because the handler
// doesn't enable interrupts, and INT 10h is a software interrupt that
// cannot nest (our worker uses DPMI INT 31h, not INT 10h).
//
// Uses FS for writes (code segments are read-only in protected mode).
// CS-relative reads are fine (readable code segment).
__asm__(
" .text\n"
" .p2align 4\n"
" .globl _int10hRawHandler\n"
"_int10hRawHandler:\n"
// ---- Save original FS, then load FS with our writable DS selector ----
" pushl %eax\n"
" pushl %ecx\n"
" xorl %eax, %eax\n"
" movw %fs, %ax\n"
" movw %cs:_gInt10hDsSel, %cx\n"
" movw %cx, %fs\n"
" movl %eax, %fs:_gInt10hSavedFS\n"
// Diagnostic: increment entry counter, save SS and ESP
" movl %fs:_gInt10hEntryCount, %eax\n"
" incl %eax\n"
" movl %eax, %fs:_gInt10hEntryCount\n"
" xorl %eax, %eax\n"
" movw %ss, %ax\n"
" movl %eax, %fs:_gInt10hLastSS\n"
" movl %esp, %fs:_gInt10hLastESP\n"
" popl %ecx\n"
" popl %eax\n"
// ---- Save all GP registers to global frame via FS (writable) ----
" movl %eax, %fs:_gInt10Frame+28\n"
" movl %ecx, %fs:_gInt10Frame+24\n"
" movl %edx, %fs:_gInt10Frame+20\n"
" movl %ebx, %fs:_gInt10Frame+16\n"
" movl %ebp, %fs:_gInt10Frame+8\n"
" movl %esi, %fs:_gInt10Frame+4\n"
" movl %edi, %fs:_gInt10Frame+0\n"
// ---- Save segment registers (zero-extended to 32 bits) ----
" xorl %eax, %eax\n"
" movw %es, %ax\n"
" movl %eax, %fs:_gInt10Frame+32\n"
" movw %ds, %ax\n"
" movl %eax, %fs:_gInt10Frame+36\n"
// ---- Save IRET frame from interrupted stack (SS:ESP) ----
" movl (%esp), %eax\n"
" movl %eax, %fs:_gInt10Frame+40\n"
" movl 4(%esp), %eax\n"
" movl %eax, %fs:_gInt10Frame+44\n"
" movl 8(%esp), %eax\n"
" movl %eax, %fs:_gInt10Frame+48\n"
// ---- Save SS:ESP and switch to DJGPP handler stack ----
" movl %esp, %fs:_gInt10hSavedESP\n"
" xorl %eax, %eax\n"
" movw %ss, %ax\n"
" movl %eax, %fs:_gInt10hSavedSS\n"
" movw %cs:_gInt10hDsSel, %ax\n"
" movw %ax, %ds\n"
" movw %ax, %es\n"
" movw %ax, %ss\n"
" movl _gInt10hStackTop, %esp\n"
// ---- Call C worker: int10hWorker(&gInt10Frame) ----
" leal _gInt10Frame, %eax\n"
" pushl %eax\n"
" call _int10hWorker\n"
" addl $4, %esp\n"
// ---- Restore SS:ESP (back to interrupted code's stack) ----
" movl %cs:_gInt10hSavedESP, %eax\n"
" movl %cs:_gInt10hSavedSS, %ecx\n"
" movw %cx, %ss\n"
" movl %eax, %esp\n"
// ---- Write modified EFLAGS back to IRET frame on stack ----
// The C worker updates frame->eflags with real-mode return flags
// (e.g. CF for VBE success/failure). Write it back so IRET uses it.
" movl %cs:_gInt10Frame+48, %eax\n"
" movl %eax, 8(%esp)\n"
// ---- Restore GP registers from global frame (CS reads OK) ----
" movl %cs:_gInt10Frame+0, %edi\n"
" movl %cs:_gInt10Frame+4, %esi\n"
" movl %cs:_gInt10Frame+8, %ebp\n"
" movl %cs:_gInt10Frame+16, %ebx\n"
" movl %cs:_gInt10Frame+20, %edx\n"
" movl %cs:_gInt10Frame+24, %ecx\n"
// ---- Restore segment registers (FS/GS always set to DGROUP) ----
" movl %cs:_gCbDgroupSel, %eax\n"
" movw %ax, %fs\n"
" movw %ax, %gs\n"
" movl %cs:_gInt10Frame+32, %eax\n"
" movw %ax, %es\n"
" movl %cs:_gInt10Frame+36, %eax\n"
" movw %ax, %ds\n"
// ---- Restore EAX last (was used as scratch) ----
" movl %cs:_gInt10Frame+28, %eax\n"
" iret\n"
);
// ============================================================================
// DPMI 0x300h (Simulate Real Mode Interrupt) proxy
//
// The VBESVGA driver's DoInt10h calls DPMI INT 31h AX=0300h from 16-bit
// code to perform real-mode INT 10h for VBE BIOS calls. CWSDPMI does
// not correctly service this DPMI function when the INT 31h originates
// from a 16-bit code segment inside a 32-bit DPMI client.
//
// Fix: after the driver's entry point has been called (which patches
// DoInt10h for 386 via SetupInt10h), we change the single "CD 31"
// (INT 31h) instruction in DoInt10h to "CD 64" (INT 64h). Our INT 64h
// handler reads the Real Mode Call Structure (RMCS) that DoInt10h built
// on the 16-bit stack, calls __dpmi_simulate_real_mode_interrupt from
// 32-bit code (which CWSDPMI handles correctly), and writes the results
// back to the RMCS so DoInt10h can unpack them normally.
// ============================================================================
#define DPMI300_INT_NUM 0x64
static __dpmi_paddr gOldDpmi300Vec;
static bool gDpmi300Installed = false;
// Globals for the raw handler assembly stub
uint16_t gDpmi300DsSel;
uint32_t gDpmi300SavedSS;
uint32_t gDpmi300SavedESP;
uint32_t gDpmi300SavedFS;
uint32_t gDpmi300SavedDS;
uint32_t gDpmi300SavedES;
uint32_t gDpmi300SavedGS;
uint32_t gDpmi300RmcsSel; // ES at time of interrupt (RMCS segment)
uint32_t gDpmi300RmcsEdi; // EDI at time of interrupt (RMCS offset)
uint32_t gDpmi300IntNum; // EBX at time of interrupt (BL=int number)
uint8_t gDpmi300Stack[4096] __attribute__((aligned(16)));
uint32_t gDpmi300StackTop;
// Worker: reads RMCS, performs real-mode interrupt, writes results back.
// The DPMI RMCS layout is byte-compatible with DJGPP's __dpmi_regs (50 bytes).
void dpmi300Worker(void)
{
uint16_t rmcsSel = (uint16_t)gDpmi300RmcsSel;
uint32_t rmcsOff = gDpmi300RmcsEdi;
uint8_t intNum = (uint8_t)gDpmi300IntNum;
__dpmi_regs regs;
memset(&regs, 0, sizeof(regs));
movedata(rmcsSel, rmcsOff, _my_ds(), (unsigned)&regs, 50);
dbg("DPMI300: INT %02Xh AX=%04X BX=%04X ES=%04X DI=%04X SS:SP=%04X:%04X\n",
intNum, regs.x.ax, regs.x.bx, regs.x.es, regs.x.di,
regs.x.ss, regs.x.sp);
__dpmi_simulate_real_mode_interrupt(intNum, &regs);
dbg("DPMI300: result AX=%04X\n", regs.x.ax);
// Dump VBE info buffer contents for VBE 4F00h
if (intNum == 0x10 && regs.x.ax == 0x004F) {
uint32_t bufLin = (uint32_t)regs.x.es * 16 + regs.x.di;
uint8_t hdr[32];
dosmemget(bufLin, 32, hdr);
dbg("DPMI300: VBE buf[0..3]=%c%c%c%c ver=%02X%02X modes=%02X%02X:%02X%02X\n",
hdr[0], hdr[1], hdr[2], hdr[3],
hdr[5], hdr[4],
hdr[0x0F], hdr[0x0E], hdr[0x11], hdr[0x10]);
// Mode list pointer at offset 0x0E: offset(word) + segment(word)
uint16_t modesOff = hdr[0x0E] | ((uint16_t)hdr[0x0F] << 8);
uint16_t modesSeg = hdr[0x10] | ((uint16_t)hdr[0x11] << 8);
dbg("DPMI300: VBE modes ptr %04X:%04X (buf at %04X:%04X)\n",
modesSeg, modesOff, regs.x.es, regs.x.di);
// Read first 16 mode numbers
uint32_t modesLin = (uint32_t)modesSeg * 16 + modesOff;
uint16_t modes[16];
dosmemget(modesLin, 32, modes);
dbg("DPMI300: VBE modes:");
for (int i = 0; i < 16 && modes[i] != 0xFFFF; i++) {
dbg(" %03X", modes[i]);
}
dbg("\n");
}
movedata(_my_ds(), (unsigned)&regs, rmcsSel, rmcsOff, 50);
}
extern void dpmi300RawHandler(void);
// Raw INT 64h handler. Same save/restore pattern as the INT 10h reflector
// but simpler: we only need the RMCS pointer (ES:EDI) and interrupt number
// (BL) from the interrupted context. All GP and segment registers are
// preserved across the call — the only visible side effect is that the
// RMCS on the driver's stack is updated and the carry flag is cleared.
__asm__(
" .text\n"
" .p2align 4\n"
" .globl _dpmi300RawHandler\n"
"_dpmi300RawHandler:\n"
// ---- Save FS, load FS with our DS selector ----
" pushl %eax\n"
" pushl %ecx\n"
" xorl %eax, %eax\n"
" movw %fs, %ax\n"
" movw %cs:_gDpmi300DsSel, %cx\n"
" movw %cx, %fs\n"
" movl %eax, %fs:_gDpmi300SavedFS\n"
// ---- Save communication values: ES (RMCS sel), EDI, EBX ----
" xorl %eax, %eax\n"
" movw %es, %ax\n"
" movl %eax, %fs:_gDpmi300SavedES\n"
" movl %eax, %fs:_gDpmi300RmcsSel\n"
" movl %edi, %fs:_gDpmi300RmcsEdi\n"
" movl %ebx, %fs:_gDpmi300IntNum\n"
// ---- Save remaining segment registers ----
" xorl %eax, %eax\n"
" movw %ds, %ax\n"
" movl %eax, %fs:_gDpmi300SavedDS\n"
" movw %gs, %ax\n"
" movl %eax, %fs:_gDpmi300SavedGS\n"
// ---- Restore scratch, then PUSHAL to save all GP regs ----
" popl %ecx\n"
" popl %eax\n"
" pushal\n"
// ---- Save interrupted SS:ESP and switch to handler stack ----
" movw %cs:_gDpmi300DsSel, %ax\n"
" movw %ax, %fs\n"
" movl %esp, %fs:_gDpmi300SavedESP\n"
" xorl %eax, %eax\n"
" movw %ss, %ax\n"
" movl %eax, %fs:_gDpmi300SavedSS\n"
" movw %fs:_gDpmi300DsSel, %ax\n"
" movw %ax, %ds\n"
" movw %ax, %es\n"
" movw %ax, %ss\n"
" movl _gDpmi300StackTop, %esp\n"
// ---- Call C worker ----
" call _dpmi300Worker\n"
// ---- Restore interrupted SS:ESP ----
" movl %cs:_gDpmi300SavedSS, %ecx\n"
" movl %cs:_gDpmi300SavedESP, %eax\n"
" movw %cx, %ss\n"
" movl %eax, %esp\n"
// ---- POPAL to restore all GP registers ----
" popal\n"
// ---- Restore segment registers ----
" pushl %eax\n"
" movl %cs:_gDpmi300SavedFS, %eax\n"
" movw %ax, %fs\n"
" movl %cs:_gDpmi300SavedGS, %eax\n"
" movw %ax, %gs\n"
" movl %cs:_gDpmi300SavedES, %eax\n"
" movw %ax, %es\n"
" movl %cs:_gDpmi300SavedDS, %eax\n"
" movw %ax, %ds\n"
" popl %eax\n"
// ---- Clear carry flag in IRET frame EFLAGS (success) ----
" andl $0xFFFFFFFE, 8(%esp)\n"
" iret\n"
);
// Worker function for exception handler — logs full diagnostics and exits.
// Non-static so the asm symbol _faultWorker is accessible.
void faultWorker(void)
{
logErr("\n=== EXCEPTION #%" PRIu32 " ===\n", gFaultNum);
logErr(" CS:EIP = %04" PRIX32 ":%08" PRIX32 " error=%04" PRIX32 "\n",
gFaultCS, gFaultEIP, gFaultErr);
logErr(" SS:ESP = %04" PRIX32 ":%08" PRIX32 "\n", gFaultSS, gFaultESP);
logErr(" eax=%08" PRIX32 " ebx=%08" PRIX32 " ecx=%08" PRIX32 " edx=%08" PRIX32 "\n",
gFaultEAX, gFaultEBX, gFaultECX, gFaultEDX);
logErr(" esi=%08" PRIX32 " edi=%08" PRIX32 " ebp=%08" PRIX32 "\n",
gFaultESI, gFaultEDI, gFaultEBP);
logErr(" ds=%04" PRIX32 " es=%04" PRIX32 "\n", gFaultDS, gFaultES);
// Dump instruction bytes at CS:EIP using _farpeekb
// (movedata fails on 16-bit code segments in fault context)
uint16_t faultSel = (uint16_t)gFaultCS;
uint32_t faultOff = gFaultEIP;
unsigned long csBase;
if (__dpmi_get_segment_base_address(faultSel, &csBase) == 0) {
unsigned csLimit = __dpmi_get_segment_limit(faultSel);
logErr(" cs: base=%08lX limit=%04X\n", csBase, csLimit);
logErr(" code:");
for (int i = 0; i < 16 && (faultOff + i) <= csLimit; i++) {
logErr(" %02X", _farpeekb(faultSel, faultOff + i));
}
logErr("\n");
}
// Dump segment info for DS and ES
unsigned long dsBase;
unsigned long esBase;
if ((uint16_t)gFaultDS != 0 && __dpmi_get_segment_base_address((uint16_t)gFaultDS, &dsBase) == 0) {
logErr(" ds: base=%08lX\n", dsBase);
}
if ((uint16_t)gFaultES != 0 && __dpmi_get_segment_base_address((uint16_t)gFaultES, &esBase) == 0) {
logErr(" es: base=%08lX\n", esBase);
}
// Dump 32 words from the faulting stack using _farpeekw
if ((uint16_t)gFaultSS != 0) {
unsigned ssLimit = __dpmi_get_segment_limit((uint16_t)gFaultSS);
logErr(" ss: limit=%04X\n", ssLimit);
logErr(" stack:");
for (int i = 0; i < 32 && (gFaultESP + i * 2 + 1) <= ssLimit; i++) {
if (i == 16) {
logErr("\n ");
}
logErr(" %04X", _farpeekw((uint16_t)gFaultSS, gFaultESP + i * 2));
}
logErr("\n");
}
// Exit cleanly via DOS
__asm__ volatile ("movl $0x4CFF, %%eax; int $0x21" ::: "eax");
__builtin_unreachable();
}
// Raw exception handlers for GPF (#13) and PF (#14).
//
// These capture fault state (GP registers, segment registers, instruction
// bytes) then switch to a private stack and call faultWorker() to log
// full diagnostics and exit cleanly (avoiding secondary crashes from
// DJGPP's handler trying to process faults from 16-bit code).
//
// DPMI exception frame on stack:
// ESP+0x00: Return EIP (to DPMI host, for RETF)
// ESP+0x04: Return CS
// ESP+0x08: Error code
// ESP+0x0C: Faulting EIP
// ESP+0x10: Faulting CS
// ESP+0x14: Faulting EFLAGS
// ESP+0x18: Faulting ESP
// ESP+0x1C: Faulting SS
//
// After pushing EAX, offsets shift by +4.
__asm__(
" .text\n"
" .p2align 4\n"
" .globl _exc0dRawHandler\n"
"_exc0dRawHandler:\n"
" pushl %eax\n"
" movw %cs:_gInt10hDsSel, %ax\n"
" movw %ax, %fs\n"
" cmpl $0, %fs:_gFaultCaptured\n"
" jne 1f\n"
// First fault — capture everything
" movl $1, %fs:_gFaultCaptured\n"
" movl $13, %fs:_gFaultNum\n"
// Save GP registers via FS
" popl %eax\n"
" movl %eax, %fs:_gFaultEAX\n"
" movl %ebx, %fs:_gFaultEBX\n"
" movl %ecx, %fs:_gFaultECX\n"
" movl %edx, %fs:_gFaultEDX\n"
" movl %esi, %fs:_gFaultESI\n"
" movl %edi, %fs:_gFaultEDI\n"
" movl %ebp, %fs:_gFaultEBP\n"
" xorl %eax, %eax\n"
" movw %ds, %ax\n"
" movl %eax, %fs:_gFaultDS\n"
" movw %es, %ax\n"
" movl %eax, %fs:_gFaultES\n"
// Save exception frame fields (no pushed EAX shift now)
" movl 0x08(%esp), %eax\n"
" movl %eax, %fs:_gFaultErr\n"
" movl 0x0C(%esp), %eax\n"
" movl %eax, %fs:_gFaultEIP\n"
" movl 0x10(%esp), %eax\n"
" movl %eax, %fs:_gFaultCS\n"
" movl 0x18(%esp), %eax\n"
" movl %eax, %fs:_gFaultESP\n"
" movl 0x1C(%esp), %eax\n"
" movl %eax, %fs:_gFaultSS\n"
// Switch to our private stack and call faultWorker
" movw %fs:_gInt10hDsSel, %ax\n"
" movw %ax, %ds\n"
" movw %ax, %es\n"
" movw %ax, %ss\n"
" movl _gFaultStackTop, %esp\n"
" call _faultWorker\n"
// faultWorker doesn't return, but just in case:
" hlt\n"
"1:\n"
// Secondary fault — chain to old handler
" popl %eax\n"
" ljmp *%cs:_gOldExc0dFar\n"
);
__asm__(
" .text\n"
" .p2align 4\n"
" .globl _exc0eRawHandler\n"
"_exc0eRawHandler:\n"
" pushl %eax\n"
" movw %cs:_gInt10hDsSel, %ax\n"
" movw %ax, %fs\n"
" cmpl $0, %fs:_gFaultCaptured\n"
" jne 1f\n"
// First fault — capture everything
" movl $1, %fs:_gFaultCaptured\n"
" movl $14, %fs:_gFaultNum\n"
" popl %eax\n"
" movl %eax, %fs:_gFaultEAX\n"
" movl %ebx, %fs:_gFaultEBX\n"
" movl %ecx, %fs:_gFaultECX\n"
" movl %edx, %fs:_gFaultEDX\n"
" movl %esi, %fs:_gFaultESI\n"
" movl %edi, %fs:_gFaultEDI\n"
" movl %ebp, %fs:_gFaultEBP\n"
" xorl %eax, %eax\n"
" movw %ds, %ax\n"
" movl %eax, %fs:_gFaultDS\n"
" movw %es, %ax\n"
" movl %eax, %fs:_gFaultES\n"
" movl 0x08(%esp), %eax\n"
" movl %eax, %fs:_gFaultErr\n"
" movl 0x0C(%esp), %eax\n"
" movl %eax, %fs:_gFaultEIP\n"
" movl 0x10(%esp), %eax\n"
" movl %eax, %fs:_gFaultCS\n"
" movl 0x18(%esp), %eax\n"
" movl %eax, %fs:_gFaultESP\n"
" movl 0x1C(%esp), %eax\n"
" movl %eax, %fs:_gFaultSS\n"
" movw %fs:_gInt10hDsSel, %ax\n"
" movw %ax, %ds\n"
" movw %ax, %es\n"
" movw %ax, %ss\n"
" movl _gFaultStackTop, %esp\n"
" call _faultWorker\n"
" hlt\n"
"1:\n"
" popl %eax\n"
" ljmp *%cs:_gOldExc0eFar\n"
);
// Raw INT 2Fh handler for Windows API emulation.
//
// Windows 3.x display drivers call INT 2Fh to check for the Windows
// Enhanced Mode environment. Without this handler, the calls are
// reflected to real mode where DOS returns "not installed", causing
// the driver's initialization to fail.
//
// Handled functions:
// AX=1600h: Windows Enhanced Mode installation check
// Returns AL=03h, AH=0Ah (Windows 3.10 Enhanced Mode)
// AX=4000h-400Ah: Virtual DMA Services (VDS)
// Returns carry clear (success, no-op)
// AX=4010h+: Windows/386 VMM API calls
// Returns AX=0 (not present, proceed normally)
//
// All other INT 2Fh calls are chained to the previous handler.
// This handler modifies only AX and returns via IRET, so no stack
// switching is needed (unlike the INT 10h handler).
extern void int2FhRawHandler(void);
__asm__(
" .text\n"
" .p2align 4\n"
" .globl _int2FhRawHandler\n"
"_int2FhRawHandler:\n"
" cmpw $0x1600, %ax\n"
" je 1f\n"
" cmpb $0x40, %ah\n"
" je 3f\n"
" ljmp *%cs:_gOldInt2FhFar\n"
"1:\n"
// Windows 3.10 Enhanced Mode is "running"
" movw $0x0A03, %ax\n"
" iret\n"
"3:\n"
// AH=40h: VDS and Windows/386 API calls
// VDS calls (AL=00h-0Ah): return carry clear (success, no-op)
// VMM calls (AL=10h+): return AX=0 (not present)
" cmpb $0x0A, %al\n"
" jbe 4f\n"
// VMM/Win386 API: not present
" xorw %ax, %ax\n"
" iret\n"
"4:\n"
// VDS: success (carry clear)
" clc\n"
" iret\n"
);
// ============================================================================
// Library initialization
// ============================================================================
int32_t wdrvInit(void)
{
if (gInitialized) {
return WDRV_OK;
}
// Initialize the thunking layer
if (!thunkInit(&gThunkCtx)) {
setError(WDRV_ERR_THUNK_FAILED);
return gLastError;
}
// Initialize the Windows API stubs
if (!stubInit(&gStubCtx, &gThunkCtx)) {
thunkShutdown(&gThunkCtx);
setError(WDRV_ERR_INIT);
return gLastError;
}
// Install PM interrupt reflector for INT 10h.
// CWSDPMI's default reflection doesn't work correctly when the
// interrupt fires from 16-bit code segments (stack frame mismatch).
if (!installInt10hReflector()) {
logErr("windrv: warning: could not install INT 10h reflector\n");
}
// Install DPMI 0x300h proxy on INT 64h.
// CWSDPMI doesn't correctly handle INT 31h AX=0300h (simulate real-mode
// interrupt) when called from 16-bit code segments within a 32-bit DPMI
// client. DoInt10h in the VBESVGA driver calls INT 31h from 16-bit code
// to perform VBE calls. We redirect those to our proxy which performs the
// same operation from 32-bit code via __dpmi_simulate_real_mode_interrupt.
if (!installDpmi300Proxy()) {
logErr("windrv: warning: could not install DPMI 300h proxy\n");
}
// Install PM handler for INT 2Fh (Windows API emulation).
// The driver calls INT 2Fh AX=1600h to check for Windows Enhanced
// Mode. Without this, the check fails and Enable() returns 0.
// This raw handler only intercepts specific AX values and chains
// to the old handler for everything else, so it's safe for
// DJGPP/CWSDPMI internal INT 2Fh usage.
if (!installInt2FhHandler()) {
logErr("windrv: warning: could not install INT 2Fh handler\n");
}
// Install exception capture to diagnose primary fault CS:EIP
// (must be after installInt10hReflector which sets gInt10hDsSel)
if (!installExceptionCapture()) {
logErr("windrv: warning: could not install exception capture\n");
}
// Enable near pointer access for direct memory operations
if (__djgpp_nearptr_enable() == 0) {
logErr("windrv: warning: near pointer access not available\n");
}
gInitialized = true;
setError(WDRV_OK);
return WDRV_OK;
}
void wdrvShutdown(void)
{
if (!gInitialized) {
return;
}
removeExceptionCapture();
removeInt2FhHandler();
removeDpmi300Proxy();
removeInt10hReflector();
stubShutdown(&gStubCtx);
thunkShutdown(&gThunkCtx);
__djgpp_nearptr_disable();
gInitialized = false;
}
// ============================================================================
// Driver loading
// ============================================================================
WdrvHandleT wdrvLoadDriver(const char *driverPath)
{
if (!gInitialized) {
setError(WDRV_ERR_INIT);
return NULL;
}
struct WdrvDriverS *drv = (struct WdrvDriverS *)calloc(1, sizeof(struct WdrvDriverS));
if (!drv) {
setError(WDRV_ERR_NO_MEMORY);
return NULL;
}
strncpy(drv->filePath, driverPath, sizeof(drv->filePath) - 1);
// Load the NE module
if (gDebug) {
extern void neSetDebug(bool enable);
neSetDebug(true);
}
if (!neLoadModule(&drv->neMod, driverPath, importResolver)) {
setError(WDRV_ERR_LOAD_FAILED);
free(drv);
return NULL;
}
stubSetModule(&gStubCtx, &drv->neMod);
if (gDebug) {
neDumpModule(&drv->neMod);
}
// Extend DGROUP to include space for GDI objects (PDEVICE, brush, etc.)
if (!extendDgroupForObjects(drv)) {
setError(WDRV_ERR_NO_MEMORY);
neUnloadModule(&drv->neMod);
free(drv);
return NULL;
}
// Set the driver's DGROUP selector so the thunk loads DS correctly
gThunkCtx.dgroupSel = drv->neMod.autoDataSel;
dbg("windrv: DGROUP selector = 0x%04X\n", gThunkCtx.dgroupSel);
// Patch Windows PROLOG_0 sequences in all code segments.
// In real Windows, the module loader converts the 3-byte prolog
// "mov ax, ds; nop" (8C D8 90) to "mov ax, <DGROUP_sel>" (B8 xx xx)
// so that AX always gets the correct DGROUP selector regardless of
// the current DS value at function entry. Without this, internal
// near/far calls within the driver (where AX has been clobbered)
// will fault when the prolog tries to load DS from AX.
patchPrologs(&drv->neMod);
// Patch the VFLATD initialization routine's stack imbalance bug.
// The function at seg5:0x2368 pushes 20 bytes of intermediate values
// during API calls but never cleans them before ret. In Windows 3.x
// the caller restores SP from BP so this is harmless, but our thunk
// relies on a clean ret.
patchVflatdStackBug(&drv->neMod);
// Bypass the VFLATD API call for framebuffer mapping.
// The driver checks [8889] to choose between VFLATD (VxD call through
// a far pointer at [0D76]) and DPMI (INT 31h to map physical memory).
// Since VFLATD isn't available, force the DPMI path which uses standard
// DPMI functions (0800h, 0007h, 0008h) that CWSDPMI supports.
patchVflatdBypassCall(&drv->neMod);
// Resolve DDI entry points
if (!resolveDriverEntries(drv)) {
setError(WDRV_ERR_NO_ENTRY);
neUnloadModule(&drv->neMod);
free(drv);
return NULL;
}
// Verify that at least Enable and Disable are present
if (!drv->ddiEntry[DDI_ORD_ENABLE].present ||
!drv->ddiEntry[DDI_ORD_DISABLE].present) {
logErr("windrv: driver missing Enable (%d) or Disable (%d)\n",
drv->ddiEntry[DDI_ORD_ENABLE].present,
drv->ddiEntry[DDI_ORD_DISABLE].present);
setError(WDRV_ERR_NO_ENTRY);
neUnloadModule(&drv->neMod);
free(drv);
return NULL;
}
// Verify segment integrity after loading
if (drv->ddiEntry[DDI_ORD_ENABLE].present) {
uint16_t codeSel = drv->ddiEntry[DDI_ORD_ENABLE].sel;
uint16_t codeOff = drv->ddiEntry[DDI_ORD_ENABLE].off;
// Find the segment's stored linear address
int segIdx = drv->neMod.exports[DDI_ORD_ENABLE].segIndex - 1;
uint32_t storedLinear = drv->neMod.segments[segIdx].linearAddr;
// Read actual descriptor base from DPMI
uint32_t descBase = 0;
__dpmi_get_segment_base_address(codeSel, (unsigned long *)&descBase);
// Read via flat pointer (linearAddr + offset)
uint8_t *flatPtr = (uint8_t *)(storedLinear + codeOff);
uint8_t flatBytes[8];
for (int i = 0; i < 8; i++) {
flatBytes[i] = flatPtr[i];
}
// Read via far pointer (selector:offset)
uint8_t farBytes[8];
for (int i = 0; i < 8; i++) {
farBytes[i] = _farpeekb(codeSel, codeOff + i);
}
// Read raw 8-byte LDT descriptor
uint8_t rawDesc[8];
__dpmi_get_descriptor(codeSel, rawDesc);
uint32_t ldtBase = (uint32_t)rawDesc[2] | ((uint32_t)rawDesc[3] << 8) |
((uint32_t)rawDesc[4] << 16) | ((uint32_t)rawDesc[7] << 24);
uint32_t ldtLimit = (uint32_t)rawDesc[0] | ((uint32_t)rawDesc[1] << 8) |
((uint32_t)(rawDesc[6] & 0x0F) << 16);
dbg("windrv: dsBase=0x%08X ptrVal=0x%08" PRIX32
" descBase=0x%08" PRIX32 " ldtBase=0x%08" PRIX32
" ldtLimit=0x%05" PRIX32 "\n",
__djgpp_base_address, storedLinear, descBase, ldtBase, ldtLimit);
dbg("windrv: rawDesc: %02X %02X %02X %02X %02X %02X %02X %02X\n",
rawDesc[0], rawDesc[1], rawDesc[2], rawDesc[3],
rawDesc[4], rawDesc[5], rawDesc[6], rawDesc[7]);
dbg("windrv: flat[%p]: %02X %02X %02X %02X %02X %02X %02X %02X\n",
flatPtr,
flatBytes[0], flatBytes[1], flatBytes[2], flatBytes[3],
flatBytes[4], flatBytes[5], flatBytes[6], flatBytes[7]);
dbg("windrv: far[%04X:%04X]: %02X %02X %02X %02X %02X %02X %02X %02X\n",
codeSel, codeOff,
farBytes[0], farBytes[1], farBytes[2], farBytes[3],
farBytes[4], farBytes[5], farBytes[6], farBytes[7]);
}
// Patch DoInt10h's INT 31h -> INT 64h BEFORE calling the entry point.
// The entry point calls SetupInt10h which self-modifies the Code segment
// (patches PUSHAD/POPAD on 386). We patch first so that when the entry
// point later calls DoInt10h for VBE queries, it uses our proxy.
patchDoInt10h(drv);
patchBiosDataAccess(drv);
// Call the NE module entry point (driver_initialization).
// This runs the driver's one-time init code:
// - SetupInt10h: allocates a real-mode stack for VBE INT 10h calls
// - dev_initialization: sets ScreenSelector, checks CPU type, VDD query
// Without this, DoInt10h uses an uninitialized stack and all VBE calls
// fail, causing the driver's Enable to hit its fatal error path.
if (drv->neMod.neHeader.entryPointCS != 0) {
uint16_t epSegIdx = drv->neMod.neHeader.entryPointCS - 1;
if (epSegIdx < drv->neMod.segmentCount) {
uint16_t epSel = drv->neMod.segments[epSegIdx].selector;
uint16_t epOff = drv->neMod.neHeader.entryPointIP;
dbg("windrv: calling entry point at %04X:%04X\n", epSel, epOff);
uint32_t epResult = thunkCall16(&gThunkCtx, epSel, epOff, NULL, 0);
dbg("windrv: entry point returned %u\n", (uint16_t)epResult);
}
}
setError(WDRV_OK);
return drv;
}
void wdrvUnloadDriver(WdrvHandleT handle)
{
if (!handle) {
return;
}
freeDrawObjects(handle);
// PDEVICE and other objects are in DGROUP - freed by neUnloadModule
neUnloadModule(&handle->neMod);
free(handle);
}
int32_t wdrvGetInfo(WdrvHandleT handle, WdrvInfoT *info)
{
if (!handle) {
return WDRV_ERR_NOT_LOADED;
}
memset(info, 0, sizeof(WdrvInfoT));
memcpy(info->driverName, handle->neMod.moduleName, sizeof(info->driverName) - 1);
info->driverName[sizeof(info->driverName) - 1] = '\0';
// If we've queried GDIINFO, fill in from that
if (handle->gdiInfoValid) {
info->driverVersion = handle->gdiInfo.dpVersion;
info->maxWidth = handle->gdiInfo.dpHorzRes;
info->maxHeight = handle->gdiInfo.dpVertRes;
info->maxBpp = handle->gdiInfo.dpBitsPixel * handle->gdiInfo.dpPlanes;
info->numColors = handle->gdiInfo.dpNumColors;
info->rasterCaps = handle->gdiInfo.dpRaster;
}
info->hasBitBlt = handle->ddiEntry[DDI_ORD_BITBLT].present;
info->hasOutput = handle->ddiEntry[DDI_ORD_OUTPUT].present;
info->hasPixel = handle->ddiEntry[DDI_ORD_PIXEL].present;
info->hasStretchBlt = handle->ddiEntry[DDI_ORD_STRETCHBLT].present;
info->hasExtTextOut = handle->ddiEntry[DDI_ORD_EXTTEXTOUT].present;
info->hasSetPalette = handle->ddiEntry[DDI_ORD_SETPALETTE].present;
info->hasSetCursor = handle->ddiEntry[DDI_ORD_SETCURSOR].present;
return WDRV_OK;
}
// ============================================================================
// Mode setting
// ============================================================================
int32_t wdrvEnable(WdrvHandleT handle, int32_t width, int32_t height, int32_t bpp)
{
if (!handle) {
return WDRV_ERR_NOT_LOADED;
}
(void)width;
(void)height;
(void)bpp;
// Allocate the PDEVICE structure
if (!allocPDevice(handle)) {
setError(WDRV_ERR_NO_MEMORY);
return gLastError;
}
// Allocate draw mode and physical objects
if (!allocDrawMode(handle)) {
setError(WDRV_ERR_NO_MEMORY);
return gLastError;
}
if (!allocBrushBuffers(handle)) {
setError(WDRV_ERR_NO_MEMORY);
return gLastError;
}
if (!allocPenBuffers(handle)) {
setError(WDRV_ERR_NO_MEMORY);
return gLastError;
}
// ================================================================
// Enable the display driver (DDK standard order).
//
// WORD PASCAL Enable(LPDEVICE lpDevice, WORD style,
// LPSTR lpDeviceType, LPSTR lpOutputFile,
// LPGDIINFO lpData)
//
// Per the DDK and VBESVGA source, the correct call order is:
//
// Step 1: Enable(gdiInfoBuf, style=1/InquireInfo) — returns GDIINFO
// lpDevice is a GDIINFO-sized buffer (NOT the PDEVICE).
// The driver reads SYSTEM.INI settings and returns mode info.
//
// Step 2: Enable(pdevBuf, style=0/EnableDevice) — initializes device
// lpDevice is the PDEVICE buffer. The driver copies its
// physical device template there and sets the video mode.
// ================================================================
// Allocate a 16-bit "DISPLAY" string for lpDeviceType
uint32_t devTypeLin;
uint16_t devTypeSel = alloc16BitBlock(16, &devTypeLin);
if (devTypeSel) {
memcpy((void *)devTypeLin, "DISPLAY", 8);
}
uint16_t dgSel = handle->neMod.autoDataSel;
uint16_t params[9];
// ================================================================
// Step 1: Enable(style=1/InquireInfo) — get GDIINFO
//
// lpDevice = separate GDIINFO buffer (driver writes GDIINFO here).
// [0x8894] starts at 0x00, so S3 driver runs full mode selection
// (reads SCREEN-SIZE, COLOR-FORMAT, etc. from SYSTEM.INI).
// ================================================================
// Allocate 256 bytes — some drivers (e.g. S3) write extended
// GDIINFO fields beyond the standard 108-byte structure.
uint32_t gdiInfoLinear;
uint16_t gdiInfoSel = alloc16BitBlock(256, &gdiInfoLinear);
if (gdiInfoSel == 0) {
if (devTypeSel) {
free16BitBlock(devTypeSel, devTypeLin);
}
setError(WDRV_ERR_NO_MEMORY);
return gLastError;
}
params[0] = gdiInfoSel; // lpDevice = GDIINFO buffer (NOT PDEVICE!)
params[1] = 0;
params[2] = ENABLE_ENABLE; // style = 1 (InquireInfo)
params[3] = devTypeSel; // lpDeviceType = "DISPLAY"
params[4] = 0;
params[5] = 0; // lpOutputFile = NULL
params[6] = 0;
params[7] = 0; // lpData = NULL
params[8] = 0;
dbg("windrv: calling Enable(style=1, InquireInfo)\n");
uint32_t result = thunkCall16(&gThunkCtx,
handle->ddiEntry[DDI_ORD_ENABLE].sel,
handle->ddiEntry[DDI_ORD_ENABLE].off,
params, 9);
logErr("windrv: Enable(style=1) returned %u\n", (uint16_t)result);
// Read GDIINFO from the buffer
memcpy(&handle->gdiInfo, (void *)gdiInfoLinear, sizeof(GdiInfo16T));
handle->gdiInfoValid = true;
free16BitBlock(gdiInfoSel, gdiInfoLinear);
logErr("windrv: GDIINFO: %dx%d %dbpp %dplanes, PDEVICE size=%d\n",
handle->gdiInfo.dpHorzRes, handle->gdiInfo.dpVertRes,
handle->gdiInfo.dpBitsPixel, handle->gdiInfo.dpPlanes,
handle->gdiInfo.dpDEVICEsize);
// ================================================================
// For VGA-class drivers (1bpp, 4 planes), repatch __WINFLAGS from
// WF_ENHANCED to WF_STANDARD. VGA.DRV's physical_enable hangs in
// Enhanced mode because it tries to communicate with the VDD.
// ================================================================
if (handle->gdiInfoValid &&
handle->gdiInfo.dpBitsPixel == 1 && handle->gdiInfo.dpPlanes == 4) {
uint16_t enhFlags = WF_PMODE | WF_CPU386 | WF_ENHANCED;
uint16_t stdFlags = WF_PMODE | WF_CPU386 | WF_STANDARD;
patchWinFlags(handle, enhFlags, stdFlags);
}
// ================================================================
// Step 2: Enable(style=0/EnableDevice) — initialize PDEVICE + mode
//
// lpDevice = the PDEVICE buffer. The driver copies its physical
// device template there and calls physical_enable (sets INT 10h
// video mode, initializes hardware).
// ================================================================
params[0] = dgSel;
params[1] = handle->pdevOff;
params[2] = ENABLE_INQUIRE; // style = 0 (EnableDevice)
params[3] = devTypeSel; // lpDeviceType = "DISPLAY"
params[4] = 0;
params[5] = 0; // lpOutputFile = NULL
params[6] = 0;
params[7] = 0; // lpData = NULL
params[8] = 0;
dbg("windrv: calling Enable(style=0, EnableDevice)\n");
result = thunkCall16(&gThunkCtx,
handle->ddiEntry[DDI_ORD_ENABLE].sel,
handle->ddiEntry[DDI_ORD_ENABLE].off,
params, 9);
logErr("windrv: Enable(style=0) returned %u\n", (uint16_t)result);
if (devTypeSel) {
free16BitBlock(devTypeSel, devTypeLin);
}
if ((uint16_t)result == 0) {
setError(WDRV_ERR_ENABLE_FAILED);
return gLastError;
}
// Log PDEVICE after EnableDevice
{
uint16_t crtcBase = (inportb(0x3CC) & 0x01) ? 0x3D4 : 0x3B4;
outportb(crtcBase, 0x13);
uint8_t cr13 = inportb(crtcBase + 1);
logErr("windrv: CR13 after Enable(style=0): 0x%02X (pitch=%u)\n",
cr13, (uint16_t)cr13 * 8);
}
{
DibPDevice16T *pd = (DibPDevice16T *)handle->pdevLinear;
logErr("windrv: PDEVICE: deType=0x%04X deWidth=%u deHeight=%u "
"deWidthBytes=%u dePlanes=%u deBitsPixel=%u\n",
pd->deType, pd->deWidth, pd->deHeight,
pd->deWidthBytes, pd->dePlanes, pd->deBitsPixel);
// Dump all PDEVICE bytes
uint8_t *pdb = (uint8_t *)handle->pdevLinear;
uint32_t pdSize = handle->pdevSize < 64 ? handle->pdevSize : 64;
logErr("windrv: PDEVICE hex (%lu bytes):", (unsigned long)pdSize);
for (uint32_t bi = 0; bi < pdSize; bi++) {
logErr(" %02X", pdb[bi]);
}
logErr("\n");
// If EnableDevice left deWidth/deHeight/deBitsPixel as zero,
// fill them from GDIINFO
if (pd->deWidth == 0 && handle->gdiInfoValid) {
pd->deWidth = (uint16_t)handle->gdiInfo.dpHorzRes;
}
if (pd->deHeight == 0 && handle->gdiInfoValid) {
pd->deHeight = (uint16_t)handle->gdiInfo.dpVertRes;
}
if (pd->deBitsPixel == 0 && handle->gdiInfoValid) {
pd->deBitsPixel = (uint8_t)handle->gdiInfo.dpBitsPixel;
}
}
// Query current VBE mode for diagnostics
{
__dpmi_regs vr;
memset(&vr, 0, sizeof(vr));
vr.x.ax = 0x4F03; // VBE Return Current VBE Mode
__dpmi_int(0x10, &vr);
logErr("windrv: VBE current mode: AX=%04X BX=%04X (mode=0x%03X)\n",
vr.x.ax, vr.x.bx, vr.x.bx & 0x3FFF);
uint16_t crtcBase = (inportb(0x3CC) & 0x01) ? 0x3D4 : 0x3B4;
outportb(crtcBase, 0x13);
uint8_t cr13 = inportb(crtcBase + 1);
logErr("windrv: CR13 after Enable complete: 0x%02X (pitch=%u)\n",
cr13, (uint16_t)cr13 * 8);
// Read display start address (CR0C:CR0D + S3 extensions CR31, CR51, CR69)
outportb(crtcBase, 0x0C);
uint8_t cr0c = inportb(crtcBase + 1);
outportb(crtcBase, 0x0D);
uint8_t cr0d = inportb(crtcBase + 1);
outportb(crtcBase, 0x31);
uint8_t cr31 = inportb(crtcBase + 1);
outportb(crtcBase, 0x51);
uint8_t cr51 = inportb(crtcBase + 1);
uint32_t dispStart = ((uint32_t)cr0c << 8) | cr0d;
dispStart |= ((uint32_t)(cr31 & 0x30)) << 12; // bits 17:16
dispStart |= ((uint32_t)(cr51 & 0x03)) << 18; // bits 19:18
logErr("windrv: display start: CR0C=0x%02X CR0D=0x%02X CR31=0x%02X CR51=0x%02X -> offset 0x%lX (byte %lu)\n",
cr0c, cr0d, cr31, cr51, (unsigned long)dispStart, (unsigned long)(dispStart * 4));
}
// Check that our pre-allocated PDEVICE is large enough
if (handle->gdiInfo.dpDEVICEsize > 0 &&
(uint32_t)handle->gdiInfo.dpDEVICEsize > handle->pdevSize) {
logErr("windrv: PDEVICE too small (%u < %d), max is %d\n",
(unsigned)handle->pdevSize, handle->gdiInfo.dpDEVICEsize,
PDEVICE_MAX_SIZE);
setError(WDRV_ERR_NO_MEMORY);
return gLastError;
}
// Try to set up a default draw mode
DrawMode16T *dm = (DrawMode16T *)handle->drawModeLinear;
dm->rop2 = R2_COPYPEN;
dm->bkMode = BM_OPAQUE;
dm->bkColor = 0x00FFFFFF;
dm->textColor = 0x00000000;
// Map video RAM for direct access.
// Query VBE to get the linear framebuffer physical address and total
// VRAM size, then map the FULL VRAM via DPMI 0800h. The driver's own
// Enable only maps the visible framebuffer, but other DDI functions
// (e.g. SetPalette) access off-screen VRAM areas that need to be mapped.
handle->vramPhysAddr = 0xA0000;
handle->vramSize = 0x10000;
{
// Get current VBE mode number
__dpmi_regs vr;
memset(&vr, 0, sizeof(vr));
vr.x.ax = 0x4F03;
__dpmi_int(0x10, &vr);
uint16_t curMode = vr.x.bx & 0x3FFF;
if (vr.x.ax == 0x004F && curMode >= 0x100) {
// Query VBE controller info for total VRAM
unsigned long tbuf = __tb & 0xFFFFF;
uint16_t tbSeg = (uint16_t)(tbuf >> 4);
uint16_t tbOff = (uint16_t)(tbuf & 0x0F);
memset(&vr, 0, sizeof(vr));
vr.x.ax = 0x4F00;
vr.x.es = tbSeg;
vr.x.di = tbOff;
// Write "VBE2" signature to get VBE 2.0+ info
dosmemput("VBE2", 4, tbuf);
__dpmi_int(0x10, &vr);
uint32_t totalVram = 0;
if (vr.x.ax == 0x004F) {
uint16_t mem64k;
dosmemget(tbuf + 0x12, 2, &mem64k);
totalVram = (uint32_t)mem64k * 65536UL;
dbg("windrv: VBE total VRAM: %" PRIu32 " bytes (%" PRIu32 " KB)\n",
totalVram, totalVram / 1024);
}
// Query mode info for LFB physical base
memset(&vr, 0, sizeof(vr));
vr.x.ax = 0x4F01;
vr.x.cx = curMode;
vr.x.es = tbSeg;
vr.x.di = tbOff;
__dpmi_int(0x10, &vr);
if (vr.x.ax == 0x004F) {
uint32_t physBase;
dosmemget(tbuf + 0x28, 4, &physBase);
dbg("windrv: VBE LFB physical base: 0x%08lX\n", (unsigned long)physBase);
if (physBase != 0) {
handle->vramPhysAddr = physBase;
// Map at least 4MB even if VBE reports less — drivers
// access off-screen VRAM (cursor masks, palette tables,
// pattern caches) beyond the visible framebuffer.
if (totalVram < 4UL * 1024 * 1024) {
totalVram = 4UL * 1024 * 1024;
}
handle->vramSize = totalVram;
dbg("windrv: VRAM size after fixup: 0x%lX\n",
(unsigned long)handle->vramSize);
}
}
}
}
// Map physical VRAM for direct access
__dpmi_meminfo mi;
mi.address = handle->vramPhysAddr;
mi.size = handle->vramSize;
if (__dpmi_physical_address_mapping(&mi) == 0) {
handle->vramLinear = mi.address;
handle->vramPtr = (void *)(mi.address + __djgpp_conventional_base);
dbg("windrv: mapped VRAM: phys=0x%08lX size=0x%lX linear=0x%08lX\n",
(unsigned long)handle->vramPhysAddr,
(unsigned long)handle->vramSize,
(unsigned long)handle->vramLinear);
}
handle->pitch = handle->gdiInfo.dpHorzRes *
((handle->gdiInfo.dpBitsPixel + 7) / 8);
// Realize a default white brush
if (handle->ddiEntry[DDI_ORD_REALIZEOBJECT].present) {
if (!realizeBrush(handle, 0x00FFFFFF)) {
dbg("windrv: warning: initial RealizeObject(brush) failed\n");
}
}
// Check if this is a hardware (S3-style) or software (DIB) driver.
// deType == 0xFFFF indicates a DIB engine / software renderer.
DibPDevice16T *pd = (DibPDevice16T *)handle->pdevLinear;
bool isHardwareDriver = (pd->deType >= 0);
// Detect S3 hardware by probing the chip ID register (CR30).
// Only S3 chips need cursor disable and display start offset.
outportb(0x3D4, 0x38);
outportb(0x3D5, 0x48); // unlock S3 registers
outportb(0x3D4, 0x30);
uint8_t cr30 = inportb(0x3D5);
bool isS3 = (cr30 >= 0x81 && cr30 <= 0xE1);
handle->isS3 = isS3;
gIsS3 = isS3;
dbg("windrv: S3 chip ID probe: CR30=0x%02X isS3=%d\n", cr30, isS3);
// VGA-class drivers (1bpp, 4 planes) run as basic VGA even on S3
// hardware — they don't use the S3 accelerator or scratch area.
bool isVgaClass = handle->gdiInfoValid &&
handle->gdiInfo.dpBitsPixel == 1 &&
handle->gdiInfo.dpPlanes == 4;
if (isHardwareDriver && isS3 && !isVgaClass) {
// Disable the hardware cursor. S3 Trio64 (and compatible) drivers
// may enable a default cursor during Enable that we don't manage.
// CR45 bit 0 = hardware cursor enable on S3.
outportb(0x3D4, 0x45);
outportb(0x3D5, inportb(0x3D5) & ~0x01);
// Shift the visible display down by 10 scanlines so the S3 driver's
// pattern scratch area at VRAM (144,1)-(151,8) is off-screen.
// All drawing Y coordinates are offset by dispYOffset to compensate.
handle->dispYOffset = 10;
setDisplayStart(handle, (uint32_t)handle->dispYOffset * handle->pitch);
} else {
// Non-S3 hardware, VGA-class, or software/DIB driver: no S3
// scratch area, no display start shift.
handle->dispYOffset = 0;
}
handle->enabled = true;
// Watch the area ~0x4B8 bytes before end-of-.text. Corruption
// in VBESVGA consistently zeros a byte near this offset.
{
extern char etext;
uint32_t etextOff = (uint32_t)&etext;
uint32_t watchOff = etextOff - 0x4B8;
dbg("windrv: etext=0x%08" PRIX32 " watch=0x%08" PRIX32 "\n",
etextOff, watchOff);
thunkSetWatch(_my_ds(), watchOff);
}
setError(WDRV_OK);
return WDRV_OK;
}
int32_t wdrvDisable(WdrvHandleT handle)
{
if (!handle || !handle->enabled) {
return WDRV_ERR_NOT_ENABLED;
}
// Call Disable(lpDevice)
// VOID PASCAL Disable(LPDEVICE lpDevice)
// 1 far pointer = 2 words
uint16_t params[2];
params[0] = handle->neMod.autoDataSel; // lpDevice seg (DGROUP)
params[1] = handle->pdevOff; // lpDevice off
dbg("windrv: calling Disable()\n");
// Reset display start to 0 before Disable restores text mode
if (handle->dispYOffset != 0) {
setDisplayStart(handle, 0);
handle->dispYOffset = 0;
}
waitForEngine();
thunkCall16(&gThunkCtx,
handle->ddiEntry[DDI_ORD_DISABLE].sel,
handle->ddiEntry[DDI_ORD_DISABLE].off,
params, 2);
dbg("windrv: Disable() returned\n");
handle->enabled = false;
setError(WDRV_OK);
return WDRV_OK;
}
// ============================================================================
// Drawing operations
// ============================================================================
int32_t wdrvBitBlt(WdrvHandleT handle, WdrvBitBltParamsT *p)
{
if (!handle || !handle->enabled) {
logErr("windrv: BitBlt: not enabled (handle=%p enabled=%d)\n",
(void *)handle, handle ? handle->enabled : -1);
return WDRV_ERR_NOT_ENABLED;
}
if (!handle->ddiEntry[DDI_ORD_BITBLT].present) {
logErr("windrv: BitBlt: not present\n");
return WDRV_ERR_UNSUPPORTED;
}
// BOOL PASCAL BitBlt(LPDEVICE lpDstDev, WORD DstX, WORD DstY,
// LPDEVICE lpSrcDev, WORD SrcX, WORD SrcY,
// WORD xExt, WORD yExt, DWORD Rop3,
// LPBRUSH lpBrush, LPDRAWMODE lpDrawMode)
//
// Pascal push order (left to right):
// lpDstDev(2w), DstX(1w), DstY(1w),
// lpSrcDev(2w), SrcX(1w), SrcY(1w),
// xExt(1w), yExt(1w), Rop3(2w),
// lpBrush(2w), lpDrawMode(2w)
// Total: 16 words
uint16_t dgSel = handle->neMod.autoDataSel;
uint16_t params[16];
int i = 0;
// Determine if the ROP uses the source. The 8-bit ROP is in bits 23-16.
// If flipping the source bit doesn't change any output bit, source is
// not used and lpSrcDev must be NULL per the DDI spec.
uint8_t rop8 = (uint8_t)(p->rop3 >> 16);
bool ropNeedsSrc = (((rop8 >> 2) ^ rop8) & 0x33) != 0;
// lpDstDev
params[i++] = dgSel;
params[i++] = handle->pdevOff;
// DstX, DstY (offset Y into hidden-scanline region)
params[i++] = (uint16_t)p->dstX;
params[i++] = (uint16_t)(p->dstY + handle->dispYOffset);
// lpSrcDev (NULL for pattern-only ROPs, screen PDEVICE otherwise)
if (ropNeedsSrc) {
params[i++] = dgSel;
params[i++] = handle->pdevOff;
} else {
params[i++] = 0;
params[i++] = 0;
}
// SrcX, SrcY (offset Y for screen-to-screen blits)
params[i++] = (uint16_t)p->srcX;
params[i++] = (uint16_t)(p->srcY + handle->dispYOffset);
// xExt, yExt
params[i++] = (uint16_t)p->width;
params[i++] = (uint16_t)p->height;
// Rop3 (DWORD: high word first in Pascal push order)
params[i++] = (uint16_t)(p->rop3 >> 16);
params[i++] = (uint16_t)(p->rop3 & 0xFFFF);
// lpBrush
params[i++] = dgSel;
params[i++] = handle->brushOff;
// lpDrawMode
params[i++] = dgSel;
params[i++] = handle->drawModeOff;
dbg("windrv: BitBlt dst=%04X:%04X (%d,%d) src=%04X:%04X (%d,%d) %dx%d rop=0x%08lX brush=%04X:%04X dm=%04X:%04X\n",
dgSel, handle->pdevOff, p->dstX, p->dstY,
ropNeedsSrc ? dgSel : 0, ropNeedsSrc ? handle->pdevOff : 0,
p->srcX, p->srcY,
p->width, p->height, (unsigned long)p->rop3,
dgSel, handle->brushOff, dgSel, handle->drawModeOff);
waitForEngine();
uint32_t result = thunkCall16(&gThunkCtx,
handle->ddiEntry[DDI_ORD_BITBLT].sel,
handle->ddiEntry[DDI_ORD_BITBLT].off,
params, i);
waitForEngine();
dbg("windrv: BitBlt returned %lu\n", (unsigned long)(result & 0xFFFF));
return ((int16_t)(result & 0xFFFF)) ? WDRV_OK : WDRV_ERR_UNSUPPORTED;
}
int32_t wdrvFillRect(WdrvHandleT handle, int16_t x, int16_t y, int16_t w, int16_t h, uint32_t color)
{
if (!handle || !handle->enabled) {
return WDRV_ERR_NOT_ENABLED;
}
// Realize brush with the requested color
if (!handle->brushRealized || handle->brushRealizedColor != color) {
if (handle->ddiEntry[DDI_ORD_REALIZEOBJECT].present) {
realizeBrush(handle, color);
}
}
// If driver supports BitBlt, use PATCOPY
if (handle->ddiEntry[DDI_ORD_BITBLT].present) {
WdrvBitBltParamsT bp;
memset(&bp, 0, sizeof(bp));
bp.dstX = x;
bp.dstY = y;
bp.srcX = 0;
bp.srcY = 0;
bp.width = w;
bp.height = h;
bp.rop3 = PATCOPY;
return wdrvBitBlt(handle, &bp);
}
// Fall back to Output with rectangle
if (handle->ddiEntry[DDI_ORD_OUTPUT].present) {
// Allocate 16-bit memory for the point array and pen
// Output(lpDstDev, style, count, lpPoints, lpPen, lpBrush, lpDrawMode, lpClipRect)
// For rectangle: style=OS_RECTANGLE, count=2 (top-left, bottom-right)
// Build 2-point rectangle (offset Y into hidden-scanline region)
Point16T pts[2];
pts[0].x = x;
pts[0].y = y + handle->dispYOffset;
pts[1].x = x + w;
pts[1].y = y + h + handle->dispYOffset;
uint32_t ptsLinear;
uint16_t ptsSel = alloc16BitBlock(sizeof(pts), &ptsLinear);
if (ptsSel == 0) {
return WDRV_ERR_NO_MEMORY;
}
memcpy((void *)ptsLinear, pts, sizeof(pts));
// Output params (Pascal order):
// lpDstDev(2w), style(1w), count(1w), lpPoints(2w),
// lpPen(2w), lpBrush(2w), lpDrawMode(2w), lpClipRect(2w)
// Total: 14 words
uint16_t dgSel = handle->neMod.autoDataSel;
uint16_t params[14];
int i = 0;
params[i++] = dgSel; // lpDstDev seg
params[i++] = handle->pdevOff; // lpDstDev off
params[i++] = OS_RECTANGLE; // style
params[i++] = 2; // count
params[i++] = ptsSel; // lpPoints seg
params[i++] = 0; // lpPoints off
params[i++] = 0; // lpPen seg (NULL)
params[i++] = 0; // lpPen off
params[i++] = dgSel; // lpBrush seg
params[i++] = handle->brushOff; // lpBrush off
params[i++] = dgSel; // lpDrawMode seg
params[i++] = handle->drawModeOff; // lpDrawMode off
params[i++] = 0; // lpClipRect seg (NULL = no clip)
params[i++] = 0; // lpClipRect off
waitForEngine();
uint32_t result = thunkCall16(&gThunkCtx,
handle->ddiEntry[DDI_ORD_OUTPUT].sel,
handle->ddiEntry[DDI_ORD_OUTPUT].off,
params, i);
free16BitBlock(ptsSel, ptsLinear);
return ((int16_t)(result & 0xFFFF)) ? WDRV_OK : WDRV_ERR_UNSUPPORTED;
}
return WDRV_ERR_UNSUPPORTED;
}
int32_t wdrvSetPixel(WdrvHandleT handle, int16_t x, int16_t y, uint32_t color)
{
if (!handle || !handle->enabled) {
return WDRV_ERR_NOT_ENABLED;
}
if (!handle->ddiEntry[DDI_ORD_PIXEL].present) {
return WDRV_ERR_UNSUPPORTED;
}
// DWORD PASCAL Pixel(LPDEVICE lpDevice, WORD x, WORD y,
// DWORD color, LPDRAWMODE lpDrawMode)
// Pascal push order:
// lpDevice(2w), x(1w), y(1w), color(2w), lpDrawMode(2w)
// Total: 8 words
// Convert COLORREF to physical color via ColorInfo DDI
uint32_t physColor = colorToPhys(handle, color);
// Set draw mode to COPYPEN for setting pixels
DrawMode16T *dm = (DrawMode16T *)handle->drawModeLinear;
dm->rop2 = R2_COPYPEN;
uint16_t dgSel = handle->neMod.autoDataSel;
uint16_t params[8];
int i = 0;
params[i++] = dgSel; // lpDevice seg
params[i++] = handle->pdevOff; // lpDevice off
params[i++] = (uint16_t)x; // x
params[i++] = (uint16_t)(y + handle->dispYOffset); // y (offset)
params[i++] = (uint16_t)(physColor >> 16); // color high
params[i++] = (uint16_t)(physColor); // color low
params[i++] = dgSel; // lpDrawMode seg
params[i++] = handle->drawModeOff; // lpDrawMode off
waitForEngine();
thunkCall16(&gThunkCtx,
handle->ddiEntry[DDI_ORD_PIXEL].sel,
handle->ddiEntry[DDI_ORD_PIXEL].off,
params, i);
waitForEngine();
return WDRV_OK;
}
uint32_t wdrvGetPixel(WdrvHandleT handle, int16_t x, int16_t y)
{
if (!handle || !handle->enabled) {
return 0;
}
if (!handle->ddiEntry[DDI_ORD_PIXEL].present) {
return 0;
}
// Pixel with color = -1 (0xFFFFFFFF) reads instead of writes
DrawMode16T *dm = (DrawMode16T *)handle->drawModeLinear;
dm->rop2 = R2_COPYPEN;
uint16_t dgSel = handle->neMod.autoDataSel;
uint16_t params[8];
int i = 0;
params[i++] = dgSel;
params[i++] = handle->pdevOff;
params[i++] = (uint16_t)x;
params[i++] = (uint16_t)(y + handle->dispYOffset);
params[i++] = 0xFFFF; // color = -1 means "get pixel"
params[i++] = 0xFFFF;
params[i++] = dgSel;
params[i++] = handle->drawModeOff;
return thunkCall16(&gThunkCtx,
handle->ddiEntry[DDI_ORD_PIXEL].sel,
handle->ddiEntry[DDI_ORD_PIXEL].off,
params, i);
}
int32_t wdrvPolyline(WdrvHandleT handle, Point16T *points, int16_t count, uint32_t color)
{
if (!handle || !handle->enabled) {
return WDRV_ERR_NOT_ENABLED;
}
if (!handle->ddiEntry[DDI_ORD_OUTPUT].present) {
return WDRV_ERR_UNSUPPORTED;
}
// Realize a physical pen (driver expects RealizeObject output, not a logical pen)
if (!handle->penRealized || handle->penRealizedColor != color) {
if (!realizePen(handle, color)) {
return WDRV_ERR_UNSUPPORTED;
}
}
// Allocate 16-bit memory for the point array, offsetting Y coordinates
uint32_t ptsSize = count * sizeof(Point16T);
uint32_t ptsLinear;
uint16_t ptsSel = alloc16BitBlock(ptsSize, &ptsLinear);
if (ptsSel == 0) {
return WDRV_ERR_NO_MEMORY;
}
memcpy((void *)ptsLinear, points, ptsSize);
{
Point16T *dst = (Point16T *)ptsLinear;
for (int16_t pi = 0; pi < count; pi++) {
dst[pi].y += handle->dispYOffset;
}
}
// Output(lpDstDev, style, count, lpPoints, lpPen, lpBrush, lpDrawMode, lpClipRect)
uint16_t dgSel = handle->neMod.autoDataSel;
uint16_t params[14];
int i = 0;
params[i++] = dgSel;
params[i++] = handle->pdevOff;
params[i++] = OS_POLYLINE;
params[i++] = count;
params[i++] = ptsSel;
params[i++] = 0;
params[i++] = dgSel; // lpPen in DGROUP (physical pen)
params[i++] = handle->penOff;
params[i++] = 0; // lpBrush = NULL
params[i++] = 0;
params[i++] = dgSel;
params[i++] = handle->drawModeOff;
params[i++] = 0; // lpClipRect = NULL
params[i++] = 0;
waitForEngine();
uint32_t result = thunkCall16(&gThunkCtx,
handle->ddiEntry[DDI_ORD_OUTPUT].sel,
handle->ddiEntry[DDI_ORD_OUTPUT].off,
params, i);
waitForEngine();
free16BitBlock(ptsSel, ptsLinear);
return ((int16_t)(result & 0xFFFF)) ? WDRV_OK : WDRV_ERR_UNSUPPORTED;
}
int32_t wdrvRectangle(WdrvHandleT handle, int16_t x, int16_t y, int16_t w, int16_t h, uint32_t color)
{
// Use Output with OS_RECTANGLE for outlined rectangle
if (!handle || !handle->enabled) {
return WDRV_ERR_NOT_ENABLED;
}
if (!handle->ddiEntry[DDI_ORD_OUTPUT].present) {
return WDRV_ERR_UNSUPPORTED;
}
// Realize a physical pen (driver expects RealizeObject output, not a logical pen)
if (!handle->penRealized || handle->penRealizedColor != color) {
if (!realizePen(handle, color)) {
return WDRV_ERR_UNSUPPORTED;
}
}
Point16T pts[2];
pts[0].x = x;
pts[0].y = y + handle->dispYOffset;
pts[1].x = x + w;
pts[1].y = y + h + handle->dispYOffset;
uint32_t ptsLinear;
uint16_t ptsSel = alloc16BitBlock(sizeof(pts), &ptsLinear);
if (ptsSel == 0) {
return WDRV_ERR_NO_MEMORY;
}
memcpy((void *)ptsLinear, pts, sizeof(pts));
uint16_t dgSel = handle->neMod.autoDataSel;
uint16_t params[14];
int i = 0;
params[i++] = dgSel;
params[i++] = handle->pdevOff;
params[i++] = OS_RECTANGLE;
params[i++] = 2;
params[i++] = ptsSel;
params[i++] = 0;
params[i++] = dgSel; // lpPen in DGROUP (physical pen)
params[i++] = handle->penOff;
params[i++] = dgSel;
params[i++] = handle->brushOff;
params[i++] = dgSel;
params[i++] = handle->drawModeOff;
params[i++] = 0; // lpClipRect = NULL
params[i++] = 0;
waitForEngine();
uint32_t result = thunkCall16(&gThunkCtx,
handle->ddiEntry[DDI_ORD_OUTPUT].sel,
handle->ddiEntry[DDI_ORD_OUTPUT].off,
params, i);
waitForEngine();
free16BitBlock(ptsSel, ptsLinear);
return ((int16_t)(result & 0xFFFF)) ? WDRV_OK : WDRV_ERR_UNSUPPORTED;
}
// ============================================================================
// Palette operations
// ============================================================================
int32_t wdrvSetPalette(WdrvHandleT handle, int32_t startIndex, int32_t count, const uint8_t *colors)
{
if (!handle || !handle->enabled) {
return WDRV_ERR_NOT_ENABLED;
}
if (!handle->ddiEntry[DDI_ORD_SETPALETTE].present) {
return WDRV_ERR_UNSUPPORTED;
}
// SetPalette(nStartIndex:WORD, nNumEntries:WORD, lpPalette:DWORD)
// Pascal order: nStartIndex(1w), nNumEntries(1w), lpPalette(2w)
// Total: 4 words
// Allocate 16-bit memory for the palette data
uint32_t palSize = count * 4; // RGBQUAD per entry
uint32_t palLinear;
uint16_t palSel = alloc16BitBlock(palSize, &palLinear);
if (palSel == 0) {
return WDRV_ERR_NO_MEMORY;
}
memcpy((void *)palLinear, colors, palSize);
uint16_t params[4];
params[0] = (uint16_t)startIndex;
params[1] = (uint16_t)count;
params[2] = palSel;
params[3] = 0;
thunkCall16(&gThunkCtx,
handle->ddiEntry[DDI_ORD_SETPALETTE].sel,
handle->ddiEntry[DDI_ORD_SETPALETTE].off,
params, 4);
free16BitBlock(palSel, palLinear);
return WDRV_OK;
}
// ============================================================================
// Framebuffer access
// ============================================================================
void *wdrvGetFramebuffer(WdrvHandleT handle)
{
if (!handle || !handle->enabled) {
return NULL;
}
return handle->vramPtr;
}
int32_t wdrvGetPitch(WdrvHandleT handle)
{
if (!handle || !handle->enabled) {
return 0;
}
return handle->pitch;
}
// ============================================================================
// Error handling
// ============================================================================
int32_t wdrvGetLastError(void)
{
return gLastError;
}
const char *wdrvGetLastErrorString(void)
{
switch (gLastError) {
case WDRV_OK: return "no error";
case WDRV_ERR_INIT: return "initialization failed";
case WDRV_ERR_NO_DPMI: return "DPMI not available";
case WDRV_ERR_FILE_NOT_FOUND: return "file not found";
case WDRV_ERR_BAD_FORMAT: return "not a valid NE executable";
case WDRV_ERR_LOAD_FAILED: return "failed to load driver";
case WDRV_ERR_NO_MEMORY: return "out of memory";
case WDRV_ERR_RELOC_FAILED: return "relocation failed";
case WDRV_ERR_NO_ENTRY: return "required DDI entry not found";
case WDRV_ERR_ENABLE_FAILED: return "driver Enable() failed";
case WDRV_ERR_THUNK_FAILED: return "thunk setup failed";
case WDRV_ERR_NOT_LOADED: return "no driver loaded";
case WDRV_ERR_NOT_ENABLED: return "driver not enabled";
case WDRV_ERR_UNSUPPORTED: return "operation not supported";
default: return "unknown error";
}
}
void wdrvSetDebug(bool enable)
{
gDebug = enable;
extern void neSetDebug(bool enable);
neSetDebug(enable);
thunkSetDebug(enable);
stubSetDebug(enable);
}
void wdrvDumpSegmentBases(WdrvHandleT handle)
{
if (!handle) {
return;
}
logErr("=== NE Module Segment Bases ===\n");
for (int i = 0; i < handle->neMod.segmentCount; i++) {
LoadedSegT *seg = &handle->neMod.segments[i];
unsigned long base = 0;
__dpmi_get_segment_base_address(seg->selector, &base);
unsigned long limit = __dpmi_get_segment_limit(seg->selector);
logErr(" seg[%d] sel=%04X base=0x%08lX limit=0x%08lX size=%" PRIu32 " %s\n",
i + 1, seg->selector, base, limit, seg->size,
seg->isCode ? "CODE" : "DATA");
}
unsigned long dgBase = 0;
__dpmi_get_segment_base_address(handle->neMod.autoDataSel, &dgBase);
logErr(" DGROUP sel=%04X base=0x%08lX\n", handle->neMod.autoDataSel, dgBase);
logErr(" pdevOff=%04X brushOff=%04X drawModeOff=%04X\n",
handle->pdevOff, handle->brushOff, handle->drawModeOff);
logErr(" dgroupObjBase=0x%" PRIX32 " pdevLinear=0x%" PRIX32 "\n",
handle->dgroupObjBase, handle->pdevLinear);
}
// ============================================================================
// Internal implementation
// ============================================================================
static FarPtr16T importResolver(const char *moduleName, uint16_t ordinal, const char *funcName)
{
return stubResolveImport(&gStubCtx, moduleName, ordinal, funcName);
}
static bool resolveDriverEntries(struct WdrvDriverS *drv)
{
// Resolve all known DDI ordinals
static const uint16_t ddiOrdinals[] = {
DDI_ORD_BITBLT, DDI_ORD_COLORINFO, DDI_ORD_CONTROL,
DDI_ORD_DISABLE, DDI_ORD_ENABLE, DDI_ORD_ENUMDFFONTS,
DDI_ORD_ENUMOBJ, DDI_ORD_OUTPUT, DDI_ORD_PIXEL,
DDI_ORD_REALIZEOBJECT, DDI_ORD_STRBLT, DDI_ORD_SCANLR,
DDI_ORD_DEVICEMODE, DDI_ORD_EXTTEXTOUT, DDI_ORD_GETCHARWIDTH,
DDI_ORD_DEVICEBITMAP, DDI_ORD_FASTBORDER, DDI_ORD_SETATTRIBUTE,
DDI_ORD_DIBTODEVICE, DDI_ORD_CREATEBITMAP, DDI_ORD_DELETEBITMAP,
DDI_ORD_SELECTBITMAP, DDI_ORD_BITMAPBITS, DDI_ORD_RECLIP,
DDI_ORD_GETPALETTE, DDI_ORD_SETPALETTE, DDI_ORD_SETPALETTETRANS,
DDI_ORD_UPDATECOLORS, DDI_ORD_STRETCHBLT, DDI_ORD_STRETCHDIBITS,
DDI_ORD_SELECTPALETTE,
DDI_ORD_INQUIRE, DDI_ORD_SETCURSOR, DDI_ORD_MOVECURSOR,
DDI_ORD_CHECKCRSR,
0 // Sentinel
};
int found = 0;
for (int i = 0; ddiOrdinals[i] != 0; i++) {
uint16_t ord = ddiOrdinals[i];
uint16_t seg;
uint16_t off;
uint16_t sel;
if (neLookupExport(&drv->neMod, ord, &seg, &off, &sel)) {
drv->ddiEntry[ord].sel = sel;
drv->ddiEntry[ord].off = off;
drv->ddiEntry[ord].present = true;
found++;
dbg("windrv: DDI ord %u -> %04X:%04X\n", ord, sel, off);
}
}
dbg("windrv: resolved %d DDI entry points\n", found);
return found > 0;
}
// Extend DGROUP to include space for GDI objects.
// Layout within the extension area (16-byte aligned):
// +0x0000: PDEVICE (4096 bytes)
// +0x1000: PhysBrush (128 bytes)
// +0x1080: LogBrush (16 bytes)
// +0x1090: DrawMode (48 bytes)
// +0x10C0: PhysPen (128 bytes)
// +0x1140: LogPen (16 bytes)
// Total: 0x1150 bytes
#define DGROUP_OBJ_PDEV_OFF 0x0000
#define DGROUP_OBJ_BRUSH_OFF 0x1000
#define DGROUP_OBJ_LOGBRUSH_OFF 0x1080
#define DGROUP_OBJ_DRAWMODE_OFF 0x1090
#define DGROUP_OBJ_PEN_OFF 0x10C0
#define DGROUP_OBJ_LOGPEN_OFF 0x1140
#define DGROUP_OBJ_PHYSCOLOR_OFF 0x1150
#define DGROUP_OBJ_TOTAL_SIZE 0x1158
static bool extendDgroupForObjects(struct WdrvDriverS *drv)
{
int dgIdx = drv->neMod.neHeader.autoDataSegIndex - 1;
if (dgIdx < 0 || dgIdx >= drv->neMod.segmentCount) {
logErr("windrv: no DGROUP segment\n");
return false;
}
uint32_t oldSize = drv->neMod.segments[dgIdx].size;
// Align object area start to 16 bytes
uint32_t objBase = (oldSize + 15) & ~15;
// The S3 driver uses DGROUP offsets well beyond the initial data for
// graphics engine working buffers (e.g., 0xA6E8, 0xBEE8). In Windows
// 3.x, DGROUP is typically the full 64K segment. Extend to 64K to
// ensure the driver has all the working space it expects.
uint32_t targetSize = 0x10000;
if (objBase + DGROUP_OBJ_TOTAL_SIZE > targetSize) {
logErr("windrv: DGROUP objects don't fit in 64K\n");
return false;
}
uint32_t extraBytes = targetSize - oldSize;
uint32_t oldSizeOut;
if (!neExtendSegment(&drv->neMod, dgIdx, extraBytes, &oldSizeOut)) {
return false;
}
uint32_t dgLinear = drv->neMod.segments[dgIdx].linearAddr;
// Initialize DGROUP stack management fields if needed. In real Windows,
// KERNEL sets these during module loading. VGA.DRV ships with
// [0x0A]=0xFFFF which its stack check function interprets as "no stack
// space available", causing all deep functions (BitBlt, etc.) to fail.
// Only patch if the original data has the 0xFFFF sentinel.
{
uint16_t *dgWords = (uint16_t *)dgLinear;
if (dgWords[5] == 0xFFFF) {
dgWords[5] = (uint16_t)objBase; // [0x0A] pStackBot
dbg("windrv: patched DGROUP stack bottom [0x0A] from FFFF to %04X\n",
(uint16_t)objBase);
}
if (dgWords[4] == 0xFFFF) {
dgWords[4] = 0xFFFE; // [0x08] pStackMin
}
}
drv->dgroupObjBase = objBase;
drv->pdevOff = (uint16_t)(objBase + DGROUP_OBJ_PDEV_OFF);
drv->pdevLinear = dgLinear + objBase + DGROUP_OBJ_PDEV_OFF;
drv->pdevSize = PDEVICE_MAX_SIZE;
drv->brushOff = (uint16_t)(objBase + DGROUP_OBJ_BRUSH_OFF);
drv->brushLinear = dgLinear + objBase + DGROUP_OBJ_BRUSH_OFF;
drv->logBrushOff = (uint16_t)(objBase + DGROUP_OBJ_LOGBRUSH_OFF);
drv->logBrushLinear = dgLinear + objBase + DGROUP_OBJ_LOGBRUSH_OFF;
drv->drawModeOff = (uint16_t)(objBase + DGROUP_OBJ_DRAWMODE_OFF);
drv->drawModeLinear = dgLinear + objBase + DGROUP_OBJ_DRAWMODE_OFF;
drv->penOff = (uint16_t)(objBase + DGROUP_OBJ_PEN_OFF);
drv->penLinear = dgLinear + objBase + DGROUP_OBJ_PEN_OFF;
drv->logPenOff = (uint16_t)(objBase + DGROUP_OBJ_LOGPEN_OFF);
drv->logPenLinear = dgLinear + objBase + DGROUP_OBJ_LOGPEN_OFF;
drv->physColorOff = (uint16_t)(objBase + DGROUP_OBJ_PHYSCOLOR_OFF);
drv->physColorLinear = dgLinear + objBase + DGROUP_OBJ_PHYSCOLOR_OFF;
dbg("windrv: DGROUP extended by %" PRIu32 " bytes (old=%" PRIu32 " new=%" PRIu32 ")\n",
extraBytes, oldSize, drv->neMod.segments[dgIdx].size);
dbg("windrv: DGROUP objects: pdev=%04X brush=%04X logBrush=%04X drawMode=%04X pen=%04X logPen=%04X\n",
drv->pdevOff, drv->brushOff, drv->logBrushOff, drv->drawModeOff, drv->penOff, drv->logPenOff);
return true;
}
static bool allocPDevice(struct WdrvDriverS *drv)
{
// PDEVICE is pre-allocated within DGROUP by extendDgroupForObjects
memset((void *)drv->pdevLinear, 0, drv->pdevSize);
return true;
}
static bool allocDrawMode(struct WdrvDriverS *drv)
{
// DrawMode is pre-allocated within DGROUP
DrawMode16T *dm = (DrawMode16T *)drv->drawModeLinear;
memset(dm, 0, sizeof(DrawMode16T));
dm->rop2 = R2_COPYPEN;
dm->bkMode = BM_OPAQUE;
return true;
}
static bool allocBrushBuffers(struct WdrvDriverS *drv)
{
// Both brushes are pre-allocated within DGROUP
LogBrush16T *lb = (LogBrush16T *)drv->logBrushLinear;
memset(lb, 0, sizeof(LogBrush16T));
lb->lbStyle = BS_SOLID;
lb->lbColor = 0x00FFFFFF;
memset((void *)drv->brushLinear, 0, PHYS_OBJ_MAX_SIZE);
drv->brushRealized = false;
return true;
}
static bool allocPenBuffers(struct WdrvDriverS *drv)
{
// Both pens are pre-allocated within DGROUP
LogPen16T *lp = (LogPen16T *)drv->logPenLinear;
memset(lp, 0, sizeof(LogPen16T));
lp->lopnStyle = PS_SOLID;
lp->lopnWidth.x = 1;
lp->lopnWidth.y = 0;
lp->lopnColor = 0x00000000;
memset((void *)drv->penLinear, 0, PHYS_OBJ_MAX_SIZE);
drv->penRealized = false;
return true;
}
static uint32_t colorToPhys(struct WdrvDriverS *drv, uint32_t colorRef)
{
if (!drv->ddiEntry[DDI_ORD_COLORINFO].present) {
return colorRef;
}
// DWORD PASCAL ColorInfo(LPDEVICE lpDevice, DWORD dwColorIn,
// LPDWORD lpPhysColor)
// Pascal push order: lpDevice(2w), dwColorIn(2w), lpPhysColor(2w)
uint16_t dgSel = drv->neMod.autoDataSel;
uint16_t params[6];
params[0] = dgSel; // lpDevice seg
params[1] = drv->pdevOff; // lpDevice off
params[2] = (uint16_t)(colorRef >> 16); // dwColorIn high
params[3] = (uint16_t)(colorRef); // dwColorIn low
params[4] = dgSel; // lpPhysColor seg
params[5] = drv->physColorOff; // lpPhysColor off
// Clear the output buffer
*(uint32_t *)drv->physColorLinear = 0;
waitForEngine();
thunkCall16(&gThunkCtx,
drv->ddiEntry[DDI_ORD_COLORINFO].sel,
drv->ddiEntry[DDI_ORD_COLORINFO].off,
params, 6);
waitForEngine();
uint32_t physColor = *(uint32_t *)drv->physColorLinear;
dbg("windrv: ColorInfo(0x%06lX) -> phys 0x%08lX\n",
(unsigned long)colorRef, (unsigned long)physColor);
return physColor;
}
static void setDisplayStart(struct WdrvDriverS *drv, uint32_t byteOffset)
{
(void)drv;
// S3 display start address is in units of 4 bytes (DWORDs).
// CR0C:CR0D = bits 15:0, CR31[5:4] = bits 17:16, CR51[1:0] = bits 19:18
uint32_t startAddr = byteOffset / 4;
uint16_t crtcBase = (inportb(0x3CC) & 0x01) ? 0x3D4 : 0x3B4;
// Unlock S3 registers
outportb(crtcBase, 0x38);
outportb(crtcBase + 1, 0x48);
outportb(crtcBase, 0x39);
outportb(crtcBase + 1, 0xA5);
// Write display start address bits 15:0
outportb(crtcBase, 0x0D);
outportb(crtcBase + 1, (uint8_t)(startAddr & 0xFF));
outportb(crtcBase, 0x0C);
outportb(crtcBase + 1, (uint8_t)((startAddr >> 8) & 0xFF));
// Write bits 17:16 to CR31
outportb(crtcBase, 0x31);
uint8_t cr31 = inportb(crtcBase + 1);
cr31 = (cr31 & ~0x30) | (uint8_t)(((startAddr >> 16) & 0x03) << 4);
outportb(crtcBase + 1, cr31);
// Write bits 19:18 to CR51
outportb(crtcBase, 0x51);
uint8_t cr51 = inportb(crtcBase + 1);
cr51 = (cr51 & ~0x03) | (uint8_t)((startAddr >> 18) & 0x03);
outportb(crtcBase + 1, cr51);
dbg("windrv: display start set to byte offset %lu (reg=0x%lX)\n",
(unsigned long)byteOffset, (unsigned long)startAddr);
}
static bool realizeBrush(struct WdrvDriverS *drv, uint32_t color)
{
if (!drv->ddiEntry[DDI_ORD_REALIZEOBJECT].present) {
return false;
}
uint16_t dgSel = drv->neMod.autoDataSel;
// Set up the logical brush
LogBrush16T *lb = (LogBrush16T *)drv->logBrushLinear;
lb->lbStyle = BS_SOLID;
lb->lbColor = color;
lb->lbHatch = 0;
// Clear the physical brush buffer
memset((void *)drv->brushLinear, 0, PHYS_OBJ_MAX_SIZE);
// RealizeObject(lpDevice, nStyle, lpInObj, lpOutObj, lpTextXForm)
// Pascal push order: left-to-right
uint16_t params[9];
params[0] = dgSel; // lpDevice seg
params[1] = drv->pdevOff; // lpDevice off
params[2] = OBJ_BRUSH; // nStyle
params[3] = dgSel; // lpInObj seg
params[4] = drv->logBrushOff; // lpInObj off
params[5] = dgSel; // lpOutObj seg
params[6] = drv->brushOff; // lpOutObj off
params[7] = 0; // lpTextXForm seg (NULL)
params[8] = 0; // lpTextXForm off (NULL)
waitForEngine();
uint32_t result = thunkCall16(&gThunkCtx,
drv->ddiEntry[DDI_ORD_REALIZEOBJECT].sel,
drv->ddiEntry[DDI_ORD_REALIZEOBJECT].off,
params, 9);
waitForEngine();
dbg("windrv: RealizeObject(brush, color=0x%06lX) returned %d\n",
(unsigned long)color, (int16_t)(result & 0xFFFF));
if ((int16_t)(result & 0xFFFF) > 0) {
drv->brushRealized = true;
drv->brushRealizedColor = color;
// Dump the first 16 bytes of the realized brush
uint8_t *bdata = (uint8_t *)drv->brushLinear;
dbg("windrv: brush[0..15]:");
for (int k = 0; k < 16; k++) {
dbg(" %02X", bdata[k]);
}
dbg("\n");
return true;
}
return false;
}
static bool realizePen(struct WdrvDriverS *drv, uint32_t color)
{
if (!drv->ddiEntry[DDI_ORD_REALIZEOBJECT].present) {
return false;
}
uint16_t dgSel = drv->neMod.autoDataSel;
// Set up the logical pen
LogPen16T *lp = (LogPen16T *)drv->logPenLinear;
lp->lopnStyle = PS_SOLID;
lp->lopnWidth.x = 1;
lp->lopnWidth.y = 0;
lp->lopnColor = color;
// Clear the physical pen buffer
memset((void *)drv->penLinear, 0, PHYS_OBJ_MAX_SIZE);
// RealizeObject(lpDevice, nStyle, lpInObj, lpOutObj, lpTextXForm)
// Pascal push order: left-to-right
uint16_t params[9];
params[0] = dgSel; // lpDevice seg
params[1] = drv->pdevOff; // lpDevice off
params[2] = OBJ_PEN; // nStyle
params[3] = dgSel; // lpInObj seg
params[4] = drv->logPenOff; // lpInObj off
params[5] = dgSel; // lpOutObj seg
params[6] = drv->penOff; // lpOutObj off
params[7] = 0; // lpTextXForm seg (NULL)
params[8] = 0; // lpTextXForm off (NULL)
waitForEngine();
uint32_t result = thunkCall16(&gThunkCtx,
drv->ddiEntry[DDI_ORD_REALIZEOBJECT].sel,
drv->ddiEntry[DDI_ORD_REALIZEOBJECT].off,
params, 9);
waitForEngine();
dbg("windrv: RealizeObject(pen, color=0x%06lX) returned %d\n",
(unsigned long)color, (int16_t)(result & 0xFFFF));
if ((int16_t)(result & 0xFFFF) > 0) {
drv->penRealized = true;
drv->penRealizedColor = color;
// Dump the first 16 bytes of the realized pen
uint8_t *pdata = (uint8_t *)drv->penLinear;
dbg("windrv: pen[0..15]:");
for (int k = 0; k < 16; k++) {
dbg(" %02X", pdata[k]);
}
dbg("\n");
return true;
}
return false;
}
static void freeDrawObjects(struct WdrvDriverS *drv)
{
// Objects are embedded in DGROUP - freed when module is unloaded
drv->brushRealized = false;
drv->penRealized = false;
}
static uint16_t alloc16BitBlock(uint32_t size, uint32_t *linearOut)
{
uint8_t *mem = (uint8_t *)calloc(1, size);
if (!mem) {
return 0;
}
uint32_t ptrVal = (uint32_t)mem;
int sel = __dpmi_allocate_ldt_descriptors(1);
if (sel < 0) {
free(mem);
return 0;
}
// True linear address = DJGPP pointer + DS base
__dpmi_set_segment_base_address(sel, ptrVal + __djgpp_base_address);
__dpmi_set_segment_limit(sel, size - 1);
__dpmi_set_descriptor_access_rights(sel, 0x00F2); // 16-bit data RW
*linearOut = ptrVal;
return (uint16_t)sel;
}
static void free16BitBlock(uint16_t sel, uint32_t linear)
{
if (sel) {
__dpmi_free_ldt_descriptor(sel);
}
if (linear) {
free((void *)linear);
}
}
static void setError(int32_t err)
{
gLastError = err;
}
static void waitForEngine(void)
{
if (!gIsS3) {
return;
}
// Wait for the S3 graphics engine to become idle by polling GP_STAT.
// Bit 9 (0x0200) = hardware busy.
for (int i = 0; i < 100000; i++) {
uint16_t stat = inportw(0x9AE8);
if (!(stat & 0x0200)) {
break;
}
}
}
// Declared in file-scope asm above
extern void int10hRawHandler(void);
static bool installInt10hReflector(void)
{
// Save DJGPP's DS selector for the assembly stub.
// The stub uses CS-relative addressing to load this value since
// DS is undefined on PM interrupt handler entry.
gInt10hDsSel = _my_ds();
gInt10hStackTop = (uint32_t)gInt10hStack + sizeof(gInt10hStack);
__dpmi_get_protected_mode_interrupt_vector(0x10, &gOldInt10hVec);
__dpmi_paddr newVec;
newVec.offset32 = (unsigned long)int10hRawHandler;
newVec.selector = _my_cs();
if (__dpmi_set_protected_mode_interrupt_vector(0x10, &newVec) != 0) {
return false;
}
gInt10hInstalled = true;
return true;
}
static void removeInt10hReflector(void)
{
if (gInt10hInstalled) {
__dpmi_set_protected_mode_interrupt_vector(0x10, &gOldInt10hVec);
gInt10hInstalled = false;
}
}
static bool installDpmi300Proxy(void)
{
gDpmi300DsSel = _my_ds();
gDpmi300StackTop = (uint32_t)gDpmi300Stack + sizeof(gDpmi300Stack);
__dpmi_get_protected_mode_interrupt_vector(DPMI300_INT_NUM, &gOldDpmi300Vec);
__dpmi_paddr newVec;
newVec.offset32 = (unsigned long)dpmi300RawHandler;
newVec.selector = _my_cs();
if (__dpmi_set_protected_mode_interrupt_vector(DPMI300_INT_NUM, &newVec) != 0) {
return false;
}
gDpmi300Installed = true;
dbg("windrv: DPMI 300h proxy installed on INT %02Xh\n", DPMI300_INT_NUM);
return true;
}
// Search a loaded driver's code segments for the DoInt10h INT 31h instruction
// and patch it to use our proxy interrupt instead. DoInt10h builds a RMCS on
// the stack and then does:
// mov ax, 0300h ; B8 00 03
// ...
// int 31h ; CD 31
// We find "CD 31" within a small window after "B8 00 03" and change the 0x31
// to DPMI300_INT_NUM (0x64).
static bool patchDoInt10h(struct WdrvDriverS *drv)
{
bool patched = false;
for (int s = 0; s < drv->neMod.segmentCount; s++) {
if (!drv->neMod.segments[s].isCode) {
continue;
}
uint16_t sel = drv->neMod.segments[s].selector;
uint32_t lin = drv->neMod.segments[s].linearAddr;
uint32_t size = drv->neMod.segments[s].size;
// Scan for "B8 00 03" (mov ax, 0300h)
for (uint32_t i = 0; i + 2 < size; i++) {
uint8_t b0 = *(uint8_t *)(lin + i);
uint8_t b1 = *(uint8_t *)(lin + i + 1);
uint8_t b2 = *(uint8_t *)(lin + i + 2);
if (b0 != 0xB8 || b1 != 0x00 || b2 != 0x03) {
continue;
}
// Found "mov ax, 0300h" at offset i. Search ahead for "CD 31".
uint32_t searchEnd = i + 24;
if (searchEnd > size - 1) {
searchEnd = size - 1;
}
for (uint32_t j = i + 3; j + 1 <= searchEnd; j++) {
uint8_t c0 = *(uint8_t *)(lin + j);
uint8_t c1 = *(uint8_t *)(lin + j + 1);
if (c0 == 0xCD && c1 == 0x31) {
// Create a data alias for the code segment so we can write
uint16_t dataSel = __dpmi_create_alias_descriptor(sel);
if (dataSel == 0) {
logErr("windrv: patchDoInt10h: cannot create alias for seg %d\n", s);
break;
}
// Patch 0x31 -> DPMI300_INT_NUM
_farpokeb(dataSel, j + 1, DPMI300_INT_NUM);
// Verify
uint8_t verify = _farpeekb(sel, j + 1);
dbg("windrv: patched INT 31h -> INT %02Xh at seg%d:%04" PRIX32
" (verify: %02X)\n", DPMI300_INT_NUM, s + 1, j, verify);
__dpmi_free_ldt_descriptor(dataSel);
patched = true;
break;
}
}
}
}
if (!patched) {
dbg("windrv: patchDoInt10h: no INT 31h found after MOV AX,0300h\n");
}
return patched;
}
// Patch hardcoded "mov ax, 0040h; mov es, ax" in driver code segments.
//
// physical_enable in VGA.ASM loads ES with the literal value 0x0040 to
// access the BIOS data area. In real Windows 3.1, selector 0x0040 either
// maps to 0040:0000 or is trapped by the VDD. Under CWSDPMI, 0x0040 is
// an invalid ring-0 GDT selector that causes a GPF.
//
// We scan for the byte pattern B8 40 00 8E C0 (mov ax,0040h; mov es,ax)
// and patch the immediate to our biosDataSel from the stub context.
static bool patchBiosDataAccess(struct WdrvDriverS *drv)
{
uint16_t biosSel = gStubCtx.biosDataSel;
if (biosSel == 0) {
logErr("windrv: patchBiosDataAccess: no biosDataSel\n");
return false;
}
bool patched = false;
for (int s = 0; s < drv->neMod.segmentCount; s++) {
if (!drv->neMod.segments[s].isCode) {
continue;
}
uint16_t sel = drv->neMod.segments[s].selector;
uint32_t lin = drv->neMod.segments[s].linearAddr;
uint32_t size = drv->neMod.segments[s].size;
for (uint32_t i = 0; i + 4 < size; i++) {
uint8_t *p = (uint8_t *)(lin + i);
// B8 40 00 8E C0 = mov ax, 0040h; mov es, ax
if (p[0] == 0xB8 && p[1] == 0x40 && p[2] == 0x00 &&
p[3] == 0x8E && p[4] == 0xC0) {
uint16_t dataSel = __dpmi_create_alias_descriptor(sel);
if (dataSel == 0) {
logErr("windrv: patchBiosDataAccess: cannot create alias for seg %d\n", s);
break;
}
_farpokeb(dataSel, i + 1, (uint8_t)(biosSel & 0xFF));
_farpokeb(dataSel, i + 2, (uint8_t)(biosSel >> 8));
uint8_t v0 = _farpeekb(sel, i + 1);
uint8_t v1 = _farpeekb(sel, i + 2);
logErr("windrv: patched mov ax,0040h -> mov ax,%04Xh at seg%d:%04" PRIX32
" (verify: %02X %02X)\n", biosSel, s + 1, i, v0, v1);
__dpmi_free_ldt_descriptor(dataSel);
patched = true;
}
}
}
if (!patched) {
dbg("windrv: patchBiosDataAccess: pattern not found (OK for some drivers)\n");
}
return patched;
}
// Repatch __WINFLAGS in all driver segments.
//
// The NE loader patches __WINFLAGS (KERNEL.178) into the driver's code/data
// segments at relocation time. After Enable(style=1) reveals the driver type,
// we may need to change WF_ENHANCED to WF_STANDARD for VGA-class drivers
// whose Enable(style=0) hangs waiting for a VDD that doesn't exist.
//
// We scan all segments for the 16-bit word pattern and replace it.
static void patchWinFlags(struct WdrvDriverS *drv, uint16_t oldFlags, uint16_t newFlags)
{
if (oldFlags == newFlags) {
return;
}
uint8_t oldLo = (uint8_t)(oldFlags & 0xFF);
uint8_t oldHi = (uint8_t)(oldFlags >> 8);
uint8_t newLo = (uint8_t)(newFlags & 0xFF);
uint8_t newHi = (uint8_t)(newFlags >> 8);
int count = 0;
for (int s = 0; s < drv->neMod.segmentCount; s++) {
uint16_t sel = drv->neMod.segments[s].selector;
uint32_t lin = drv->neMod.segments[s].linearAddr;
uint32_t size = drv->neMod.segments[s].size;
bool isCode = drv->neMod.segments[s].isCode;
if (size < 2) {
continue;
}
// Need a writable alias for code segments
uint16_t dataSel = 0;
if (isCode) {
dataSel = __dpmi_create_alias_descriptor(sel);
if (dataSel == 0) {
continue;
}
}
for (uint32_t i = 0; i + 1 < size; i++) {
uint8_t *p = (uint8_t *)(lin + i);
if (p[0] == oldLo && p[1] == oldHi) {
if (isCode) {
_farpokeb(dataSel, i, newLo);
_farpokeb(dataSel, i + 1, newHi);
} else {
p[0] = newLo;
p[1] = newHi;
}
count++;
}
}
if (dataSel != 0) {
__dpmi_free_ldt_descriptor(dataSel);
}
}
if (count > 0) {
dbg("windrv: patched %d __WINFLAGS locations: 0x%04X -> 0x%04X\n",
count, oldFlags, newFlags);
}
}
static void removeDpmi300Proxy(void)
{
if (gDpmi300Installed) {
__dpmi_set_protected_mode_interrupt_vector(DPMI300_INT_NUM, &gOldDpmi300Vec);
gDpmi300Installed = false;
}
}
// Declared in file-scope asm above
extern void exc0dRawHandler(void);
extern void exc0eRawHandler(void);
static bool installExceptionCapture(void)
{
// Initialize fault handler stack
gFaultStackTop = (uint32_t)gFaultStack + sizeof(gFaultStack);
// Get old exception handlers
__dpmi_get_processor_exception_handler_vector(0x0D, &gOldExc0D);
__dpmi_get_processor_exception_handler_vector(0x0E, &gOldExc0E);
// Copy to packed far pointers for asm indirect far jumps
gOldExc0dFar.offset = (uint32_t)gOldExc0D.offset32;
gOldExc0dFar.selector = (uint16_t)gOldExc0D.selector;
gOldExc0eFar.offset = (uint32_t)gOldExc0E.offset32;
gOldExc0eFar.selector = (uint16_t)gOldExc0E.selector;
// Install our handlers
__dpmi_paddr newVec;
newVec.selector = _my_cs();
newVec.offset32 = (unsigned long)exc0dRawHandler;
if (__dpmi_set_processor_exception_handler_vector(0x0D, &newVec) != 0) {
return false;
}
newVec.offset32 = (unsigned long)exc0eRawHandler;
if (__dpmi_set_processor_exception_handler_vector(0x0E, &newVec) != 0) {
__dpmi_set_processor_exception_handler_vector(0x0D, &gOldExc0D);
return false;
}
gExcCaptureInstalled = true;
return true;
}
static void removeExceptionCapture(void)
{
if (gExcCaptureInstalled) {
__dpmi_set_processor_exception_handler_vector(0x0D, &gOldExc0D);
__dpmi_set_processor_exception_handler_vector(0x0E, &gOldExc0E);
gExcCaptureInstalled = false;
}
}
static bool installInt2FhHandler(void)
{
__dpmi_paddr oldVec;
__dpmi_get_protected_mode_interrupt_vector(0x2F, &oldVec);
gOldInt2FhVec = oldVec;
gOldInt2FhFar.offset = oldVec.offset32;
gOldInt2FhFar.selector = oldVec.selector;
__dpmi_paddr newVec;
newVec.offset32 = (unsigned long)int2FhRawHandler;
newVec.selector = _my_cs();
if (__dpmi_set_protected_mode_interrupt_vector(0x2F, &newVec) != 0) {
return false;
}
gInt2FhInstalled = true;
return true;
}
static void removeInt2FhHandler(void)
{
if (gInt2FhInstalled) {
__dpmi_set_protected_mode_interrupt_vector(0x2F, &gOldInt2FhVec);
gInt2FhInstalled = false;
}
}
static void dbg(const char *fmt, ...)
{
if (!gDebug) {
return;
}
va_list ap;
va_start(ap, fmt);
logErrV(fmt, ap);
va_end(ap);
}
// Patch Windows PROLOG_0 sequences in all code segments.
//
// The Windows 3.x module loader converts the 3-byte function prolog
// 8C D8 90 (mov ax, ds ; nop)
// to
// B8 xx xx (mov ax, <DGROUP selector>)
//
// This ensures AX holds the correct DGROUP selector when the function
// body executes "push ds ; mov ds, ax" for FAR entry.
//
// However, NEAR calls enter at offset+3 (skipping the mov ax), so AX
// may be clobbered. Since DS is always DGROUP at both entry paths
// (the relay sets it for far calls, the caller preserves it for near
// calls), the "mov ds, ax" is redundant. We NOP it out so the
// function simply does "push ds" (saving DGROUP for the epilog) and
// continues with DS already correct.
//
// Full original 10-byte prolog:
// 8C D8 90 mov ax, ds ; nop offset+0 (far entry)
// 45 inc bp offset+3 (near entry)
// 55 push bp offset+4
// 8B EC mov bp, sp offset+5
// 1E push ds offset+7
// 8E D8 mov ds, ax offset+8
//
// Patched:
// B8 xx xx mov ax, DGROUP offset+0 (for far entry AX)
// 45 inc bp offset+3
// 55 push bp offset+4
// 8B EC mov bp, sp offset+5
// 1E push ds offset+7
// 90 90 nop ; nop offset+8 (DS already correct)
// Patch Win16 PROLOG_0/PROLOG_1 function prologs and their matching epilogs.
//
// Win16 PROLOG_0 functions use `inc bp` to mark far frames for stack walking
// and `dec bp` in the epilog to undo it. The Windows kernel needs these odd
// BP markers for stack traversal and memory management, but our DOS environment
// has no such requirement. Leaving them in causes frame pointer corruption
// when the odd BP propagates through the call chain.
//
// Prolog pattern (two variants):
// 8C D8 90 45 55 8B EC [1E 8E D8] mov ax,ds; nop; inc bp; push bp; mov bp,sp; [push ds; mov ds,ax]
// B8 XX XX 45 55 8B EC [1E 8E D8] mov ax,IMMED; inc bp; push bp; mov bp,sp; [push ds; mov ds,ax]
//
// Epilog pattern:
// 5D 4D CB pop bp; dec bp; retf
// 5D 4D C3 pop bp; dec bp; ret
//
// Patches applied:
// - 8C D8 90 → B8 DGROUP_LO DGROUP_HI (load correct DGROUP selector)
// - 45 → 90 (NOP out inc bp)
// - 8E D8 → 90 90 (NOP out mov ds,ax — DS already set by thunk)
// - 4D → 90 in epilog (NOP out dec bp)
static void patchPrologs(NeModuleT *mod)
{
uint16_t dgroupSel = mod->autoDataSel;
int prologCount = 0;
int epilogCount = 0;
for (int s = 0; s < mod->segmentCount; s++) {
if (!mod->segments[s].isCode) {
continue;
}
uint8_t *base = (uint8_t *)mod->segments[s].linearAddr;
uint32_t size = mod->segments[s].size;
// Pass 1: Patch prologs — find "45 55 8B EC" (inc bp; push bp; mov bp,sp)
for (uint32_t i = 0; i + 3 < size; i++) {
if (base[i] != 0x45 ||
base[i + 1] != 0x55 ||
base[i + 2] != 0x8B ||
base[i + 3] != 0xEC) {
continue;
}
// NOP out inc bp
base[i] = 0x90;
prologCount++;
// If preceded by "8C D8 90" (mov ax,ds; nop), patch to mov ax,DGROUP
if (i >= 3 &&
base[i - 3] == 0x8C &&
base[i - 2] == 0xD8 &&
base[i - 1] == 0x90) {
base[i - 3] = 0xB8;
base[i - 2] = (uint8_t)(dgroupSel & 0xFF);
base[i - 1] = (uint8_t)(dgroupSel >> 8);
}
// "1E 8E D8" (push ds; mov ds,ax) must be kept intact!
// The driver expects DS = DGROUP for all DS-relative data access.
// Do NOT NOP these out.
}
// Pass 2: Patch epilogs — find "5D 4D" followed by any return:
// CB = retf, C3 = ret, CA xx xx = retf N, C2 xx xx = ret N
// Pascal calling convention uses retf N (CA) to clean parameters,
// so most epilogs are "5D 4D CA xx xx", not "5D 4D CB".
for (uint32_t i = 0; i + 2 < size; i++) {
if (base[i] == 0x5D &&
base[i + 1] == 0x4D &&
(base[i + 2] == 0xCB || base[i + 2] == 0xC3 ||
base[i + 2] == 0xCA || base[i + 2] == 0xC2)) {
base[i + 1] = 0x90;
epilogCount++;
}
}
}
dbg("windrv: patched %d prologs, %d epilogs (DGROUP=0x%04X)\n",
prologCount, epilogCount, dgroupSel);
}
// Patch VFLATD initialization code to avoid a 20-byte stack imbalance.
//
// The VFLATD init code at seg5:0x2368 is a subroutine (no prolog, near ret
// at 0x252B) called from the mode setup function. It allocates DOS memory
// via GlobalDOSAlloc/GlobalAlloc/GlobalLock/GetCurrentPDB, pushing 20 bytes
// of intermediate values onto the stack. All exit paths converge at 0x2519
// (GlobalFree) -> 0x2522 (SetSwapAreaSize) -> 0x252B (ret) WITHOUT cleaning
// these 20 bytes.
//
// In real Windows 3.x the caller at 0x3613 restores SP from BP, so the
// imbalance is harmless. But our thunk returns via a clean `ret`, which
// pops 0x2362 (junk) instead of the real return address 0x3613, landing
// in the middle of a `lea sp,[bp-2]` instruction -> SIGILL.
//
// There are TWO entry points to this init code:
// 0x22C5: wrapper function that checks [0EE9] and proceeds with init
// 0x2368: direct entry from mode setup (after VBE mode set)
//
// Fix: patch BOTH to C3 (near ret) so neither path executes VFLATD init.
// With LFB mode forced via DPMI (patchVflatdBypassCall), VFLATD setup
// is unnecessary.
static void patchVflatdStackBug(NeModuleT *mod)
{
int segIdx = -1;
for (int s = 0; s < mod->segmentCount; s++) {
if (mod->segments[s].isCode && mod->segments[s].size > 0x2369) {
uint8_t *base = (uint8_t *)mod->segments[s].linearAddr;
// Verify the call at 0x02A4 targets 0x22C5: E8 1E 20
if (base[0x02A4] == 0xE8 && base[0x02A5] == 0x1E &&
base[0x02A6] == 0x20) {
segIdx = s;
break;
}
}
}
if (segIdx < 0) {
dbg("windrv: VFLATD init patch: pattern not found, skipping\n");
return;
}
uint8_t *base = (uint8_t *)mod->segments[segIdx].linearAddr;
// Patch wrapper function at 0x22C5 to immediate return
base[0x22C5] = 0xC3;
dbg("windrv: patched VFLATD init wrapper at seg %d offset 0x22C5 (ret)\n", segIdx);
// Patch direct entry at 0x2368 (C6 06 A4 49 00 = mov byte [49A4],0)
if (base[0x2368] == 0xC6 && base[0x2369] == 0x06 &&
base[0x236A] == 0xA4 && base[0x236B] == 0x49) {
base[0x2368] = 0xC3;
dbg("windrv: patched VFLATD init direct entry at seg %d offset 0x2368 (ret)\n", segIdx);
} else {
dbg("windrv: VFLATD init direct entry at 0x2368: unexpected bytes, skipping\n");
}
}
// Bypass the VFLATD API call at seg5:0x3FD4.
//
// The driver checks [DS:8889] to decide between two framebuffer paths:
// [8889] == 0xFF: DPMI path (allocate descriptor, map physical via INT 31h)
// [8889] != 0xFF: VFLATD path (call far through [DS:0D76])
//
// Since VFLATD is not available, the far pointer at [0D76] is null, causing
// a GPF. Force the DPMI path by patching the conditional jump to unconditional.
//
// Original at 0x3FA9: 80 3E 89 88 FF 74 32 (cmp byte [8889],0xFF; jz +0x32)
// Patched: EB 37 90 90 90 90 90 (jmp +0x37; nop*5)
//
// Both reach 0x3FE2 which uses DPMI INT 31h functions 0800h/0007h/0008h
// to map the physical framebuffer — fully supported by CWSDPMI.
static void patchVflatdBypassCall(NeModuleT *mod)
{
int segIdx = -1;
for (int s = 0; s < mod->segmentCount; s++) {
if (mod->segments[s].isCode && mod->segments[s].size > 0x3FB0) {
uint8_t *base = (uint8_t *)mod->segments[s].linearAddr;
if (base[0x3FA9] == 0x80 && base[0x3FAA] == 0x3E &&
base[0x3FAB] == 0x89 && base[0x3FAC] == 0x88 &&
base[0x3FAD] == 0xFF && base[0x3FAE] == 0x74 &&
base[0x3FAF] == 0x32) {
segIdx = s;
break;
}
}
}
if (segIdx < 0) {
dbg("windrv: VFLATD bypass patch: pattern not found, skipping\n");
return;
}
uint8_t *base = (uint8_t *)mod->segments[segIdx].linearAddr;
// 0x3FA9: EB 37 jmp 0x3FE2 (unconditional -> DPMI path)
// 0x3FAB: 90*5 nop padding
base[0x3FA9] = 0xEB;
base[0x3FAA] = 0x37;
base[0x3FAB] = 0x90;
base[0x3FAC] = 0x90;
base[0x3FAD] = 0x90;
base[0x3FAE] = 0x90;
base[0x3FAF] = 0x90;
dbg("windrv: patched VFLATD bypass at seg %d offset 0x3FA9\n", segIdx);
// NOP all "call far [DS:0D76]" (FF 1E 76 0D) in the code segment.
// These call through the VFLATD entry point which is null since VFLATD
// isn't present. With LFB mode via DPMI, bank switching is unnecessary.
uint32_t segSize = mod->segments[segIdx].size;
int nopCount = 0;
for (uint32_t i = 0; i + 3 < segSize; i++) {
if (base[i] == 0xFF && base[i + 1] == 0x1E &&
base[i + 2] == 0x76 && base[i + 3] == 0x0D) {
base[i] = 0x90;
base[i + 1] = 0x90;
base[i + 2] = 0x90;
base[i + 3] = 0x90;
dbg("windrv: NOPed VFLATD call at seg %d offset 0x%04" PRIX32 "\n", segIdx, i);
nopCount++;
}
}
dbg("windrv: NOPed %d VFLATD call(s) total\n", nopCount);
}