commit 628ef231b99ea2dc7eb2ea3d248ad54dfe46760f Author: Scott Duensing Date: Sat Feb 21 18:01:54 2026 -0600 Initial commit. diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000..ef524a1 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,106 @@ +{ + "permissions": { + "allow": [ + "Bash(wc:*)", + "WebSearch", + "WebFetch(domain:wuffs.org)", + "WebFetch(domain:www.os2museum.com)", + "WebFetch(domain:github.com)", + "WebFetch(domain:betawiki.net)", + "WebFetch(domain:raw.githubusercontent.com)", + "Bash(curl:*)", + "WebFetch(domain:techshelps.github.io)", + "WebFetch(domain:fragglet.github.io)", + "WebFetch(domain:dos-help.soulsphere.org)", + "WebFetch(domain:library.thedatadungeon.com)", + "Bash(dpkg:*)", + "Bash(apt list:*)", + "Bash(snap list:*)", + "Bash(flatpak list:*)", + "Bash(apt-cache search:*)", + "WebFetch(domain:packages.debian.org)", + "WebFetch(domain:sourceforge.net)", + "Bash(flatpak --version:*)", + "Bash(flatpak remotes:*)", + "Bash(flatpak remote-add:*)", + "Bash(flatpak search:*)", + "Bash(~/djgpp/bin/i586-pc-msdosdjgpp-gcc:*)", + "Bash(~/djgpp/djgpp/bin/i586-pc-msdosdjgpp-gcc:*)", + "Bash(flatpak install:*)", + "Bash(make:*)", + "Bash(ldd:*)", + "Bash(~/djgpp/djgpp/bin/i586-pc-msdosdjgpp-gcc-ar:*)", + "Bash(apt-get download:*)", + "Bash(dpkg-deb -x:*)", + "Bash(LD_LIBRARY_PATH=/tmp/libfl2-extract/usr/lib/x86_64-linux-gnu ~/djgpp/djgpp/bin/i586-pc-msdosdjgpp-ar:*)", + "Bash(unzip:*)", + "WebFetch(domain:archive.org)", + "WebFetch(domain:theretroweb.com)", + "WebFetch(domain:files.mpoli.fi)", + "WebFetch(domain:www.dosdays.co.uk)", + "Bash(xxd:*)", + "Bash(chmod:*)", + "Bash(pip3 install:*)", + "Bash(objdump:*)", + "Bash(find:*)", + "Bash(ls:*)", + "Bash(~/djgpp/djgpp/bin/i586-pc-msdosdjgpp-objdump:*)", + "Bash(~/djgpp/djgpp/bin/i586-pc-msdosdjgpp-nm:*)", + "Bash(LD_LIBRARY_PATH=/home/scott/claude/windriver/tools/lib /home/scott/djgpp/djgpp/bin/i586-pc-msdosdjgpp-objdump:*)", + "Bash(LD_LIBRARY_PATH=/home/scott/claude/windriver/tools/lib /home/scott/djgpp/djgpp/bin/i586-pc-msdosdjgpp-nm:*)", + "Bash(LD_LIBRARY_PATH=tools/lib ~/djgpp/djgpp/bin/i586-pc-msdosdjgpp-objdump:*)", + "Bash(ndisasm:*)", + "Bash(flatpak run:*)", + "Bash(pkill:*)", + "Bash(/tmp/analyze_patterns.py:*)", + "Bash(/tmp/analyze_patterns2.py:*)", + "Bash(/tmp/analyze_patterns3.py:*)", + "Bash(/tmp/analyze_patterns4.py:*)", + "Bash(/tmp/analyze_patterns5.py:*)", + "Bash(/tmp/final_analysis.py:*)", + "Bash(/tmp/detailed_decode.py << 'EOF'\n#!/usr/bin/env python3\nimport re\n\n# Read the ERROR.LOG\nwith open\\('/home/scott/claude/windriver/bin/ERROR.LOG', 'r'\\) as f:\n lines = f.readlines\\(\\)\n\n# Extract hex dumps from DIAG lines\ndiag_lines = {}\nfor line in lines:\n if 'windrv: DIAG seg5' in line:\n match = re.match\\(r'.*windrv: DIAG seg5 \\([0-9A-F]+\\):\\\\s*\\(.*\\)', line\\)\n if match:\n offset = match.group\\(1\\)\n hex_str = match.group\\(2\\).strip\\(\\)\n diag_lines[offset] = hex_str\n\n# Build full hex dump\nhex_dump = {}\nfor offset_str, hex_str in sorted\\(diag_lines.items\\(\\)\\):\n offset = int\\(offset_str, 16\\)\n bytes_list = [int\\(b, 16\\) for b in hex_str.split\\(\\)]\n for i, byte_val in enumerate\\(bytes_list\\):\n hex_dump[offset + i] = byte_val\n\ndef format_byte\\(offset\\):\n b = hex_dump.get\\(offset\\)\n return f\"{b:02X}\" if b is not None else \"??\"\n\ndef show_instructions\\(start, end\\):\n \"\"\"Show hex bytes with manual instruction decode\"\"\"\n offset = start\n while offset < end:\n b1 = hex_dump.get\\(offset\\)\n if b1 is None:\n break\n \n # Get instruction mnemonic\n b2 = hex_dump.get\\(offset + 1\\)\n b3 = hex_dump.get\\(offset + 2\\)\n \n instr_len = 1\n mnemonic = None\n \n # Decode some common instructions\n if b1 == 0x8B and b2 is not None and b3 is not None: # MOV reg, r/m\n mod_rm = b2\n mod = \\(mod_rm >> 6\\) & 3\n reg = \\(mod_rm >> 3\\) & 7\n rm = mod_rm & 7\n regs = ['AX', 'CX', 'DX', 'BX', 'SP', 'BP', 'SI', 'DI']\n rm_names = ['BX+SI', 'BX+DI', 'BP+SI', 'BP+DI', 'SI', 'DI', 'BP', 'BX']\n \n if mod == 0:\n if rm == 6:\n disp16 = b3 | \\(hex_dump.get\\(offset + 3, 0\\) << 8\\)\n mnemonic = f\"MOV {regs[reg]}, [0x{disp16:04X}]\"\n instr_len = 4\n else:\n mnemonic = f\"MOV {regs[reg]}, [{rm_names[rm]}]\"\n instr_len = 2\n elif mod == 1: # disp8\n disp = b3 if b3 < 128 else b3 - 256\n mnemonic = f\"MOV {regs[reg]}, [{rm_names[rm]}+{disp:+d}]\"\n instr_len = 3\n elif mod == 2: # disp16\n disp16 = b3 | \\(hex_dump.get\\(offset + 3, 0\\) << 8\\)\n mnemonic = f\"MOV {regs[reg]}, [{rm_names[rm]}+0x{disp16:04X}]\"\n instr_len = 4\n elif mod == 3: # register\n mnemonic = f\"MOV {regs[reg]}, {regs[rm]}\"\n instr_len = 2\n \n elif b1 == 0xC7 and b2 is not None and b3 is not None: # MOV r/m, imm16\n mod_rm = b2\n mod = \\(mod_rm >> 6\\) & 3\n rm = mod_rm & 7\n rm_names = ['BX+SI', 'BX+DI', 'BP+SI', 'BP+DI', 'SI', 'DI', 'BP', 'BX']\n \n if mod == 1: # [bp+disp8], imm16\n disp = b3 if b3 < 128 else b3 - 256\n imm_l = hex_dump.get\\(offset + 3\\)\n imm_h = hex_dump.get\\(offset + 4\\)\n imm = \\(imm_l if imm_l else 0\\) | \\(\\(imm_h if imm_h else 0\\) << 8\\)\n mnemonic = f\"MOV [BP+{disp:+d}], 0x{imm:04X}\"\n instr_len = 5\n \n elif b1 == 0xC4 and b2 is not None and b3 is not None: # LES\n mod_rm = b2\n reg = \\(mod_rm >> 3\\) & 7\n rm = mod_rm & 7\n regs = ['AX', 'CX', 'DX', 'BX', 'SP', 'BP', 'SI', 'DI']\n rm_names = ['BX+SI', 'BX+DI', 'BP+SI', 'BP+DI', 'SI', 'DI', 'BP', 'BX']\n mod = \\(mod_rm >> 6\\) & 3\n \n if mod == 1:\n disp = b3 if b3 < 128 else b3 - 256\n mnemonic = f\"LES {regs[reg]}, [{rm_names[rm]}+{disp:+d}]\"\n instr_len = 3\n \n elif b1 == 0xF3 and b2 == 0xA5: # REP MOVSW\n mnemonic = \"REP MOVSW\"\n instr_len = 2\n \n elif b1 == 0xF3 and b2 == 0xA4: # REP MOVSB\n mnemonic = \"REP MOVSB\"\n instr_len = 2\n \n if mnemonic:\n hex_str = ' '.join\\(format_byte\\(offset + i\\) for i in range\\(instr_len\\)\\)\n print\\(f\"0x{offset:04X}: {hex_str:20s} {mnemonic}\"\\)\n offset += instr_len\n else:\n hex_str = format_byte\\(offset\\)\n print\\(f\"0x{offset:04X}: {hex_str:20s} \\(unknown\\)\"\\)\n offset += 1\n\nprint\\(\"=\" * 80\\)\nprint\\(\"INSTRUCTION DECODE - KEY REGIONS FOR lpData ACCESS\"\\)\nprint\\(\"=\" * 80\\)\nprint\\(\\)\n\nprint\\(\"REGION 1: REP MOVSW \\(0x02C0-0x02E0\\) - GDIINFO copy\"\\)\nprint\\(\"-\" * 80\\)\nshow_instructions\\(0x02C0, 0x02E0\\)\n\nprint\\(\\)\nprint\\(\"REGION 2: [BP+12] access at 0x0A65 and 0x0A83\"\\)\nprint\\(\"-\" * 80\\)\nshow_instructions\\(0x0A60, 0x0AA0\\)\n\nprint\\(\\)\nprint\\(\"=\" * 80\\)\nprint\\(\"COMPLETE [BP+offset] REFERENCES\"\\)\nprint\\(\"=\" * 80\\)\nprint\\(\\)\n\n# Comprehensive search for all [BP+offset] patterns\nbp_refs = {}\nfor offset in sorted\\(hex_dump.keys\\(\\)\\):\n if offset + 2 <= max\\(hex_dump.keys\\(\\)\\):\n b1 = hex_dump.get\\(offset\\)\n b2 = hex_dump.get\\(offset + 1\\)\n b3 = hex_dump.get\\(offset + 2\\)\n \n if b1 is None or b2 is None or b3 is None:\n continue\n \n # Look for ModRM patterns with mode=01, r/m=101 \\(which is [BP+disp8]\\)\n if \\(b2 & 0xC7\\) == 0x45:\n disp = b3 if b3 < 128 else b3 - 256\n if disp not in bp_refs:\n bp_refs[disp] = []\n bp_refs[disp].append\\(\\(offset, b1\\)\\)\n\n# Show summary\nfor disp in sorted\\(bp_refs.keys\\(\\)\\):\n refs = bp_refs[disp]\n print\\(f\"\\\\n[BP{disp:+d}]:\"\\)\n for offset, b1 in refs:\n print\\(f\" 0x{offset:04X}: {b1:02X}\"\\)\n if disp == 6:\n print\\(f\" <- style parameter \\(WORD\\)\"\\)\n elif disp == 12:\n print\\(f\" <- lpGDIInfo output buffer \\(FAR POINTER\\)\"\\)\n\nEOF)", + "Bash(timeout 120 bash:*)", + "Bash(timeout 60 bash:*)", + "Bash(timeout:*)", + "Bash(LD_LIBRARY_PATH=tools/lib ~/djgpp/djgpp/bin/i586-pc-msdosdjgpp-gcc:*)", + "Bash(pgrep:*)", + "Bash(xargs kill:*)", + "Bash(tail -3 ls -la /tmp/dosbox_caps/screenshot.*)", + "Bash(kill:*)", + "Bash(DISPLAY=:0 ffmpeg:*)", + "Bash(DISPLAY=:0 flatpak run:*)", + "Bash(ffmpeg:*)", + "Bash(sort:*)", + "Bash(wget:*)", + "WebFetch(domain:winworldpc.com)", + "WebFetch(domain:www.networkdls.com)", + "Bash(git clone:*)", + "Bash(gh release list:*)", + "WebFetch(domain:api.github.com)", + "WebFetch(domain:pdos.csail.mit.edu)", + "WebFetch(domain:stanislavs.org)", + "WebFetch(domain:en.wikipedia.org)", + "WebFetch(domain:www.thejat.in)", + "WebFetch(domain:wiki.osdev.org)", + "WebFetch(domain:osdev.miraheze.org)", + "WebFetch(domain:mirror.math.princeton.edu)", + "WebFetch(domain:fd.lod.bz)", + "WebFetch(domain:www.delorie.com)", + "WebFetch(domain:helppc.netcore2k.net)", + "WebFetch(domain:grokipedia.com)", + "Bash(head -10 echo \"=== -O2 -fno-gcse \\(working\\) ===\")", + "Bash(/tmp/count_callbacks.txt:*)", + "Bash(/tmp/full_mapping.txt:*)", + "Bash(/tmp/callback_analysis.txt:*)", + "WebFetch(domain:www.bitsavers.org)", + "Bash(~/djgpp/djgpp/bin/i586-pc-msdosdjgpp-size:*)", + "WebFetch(domain:dosbox-x.com)", + "WebFetch(domain:www.vogons.org)", + "Bash(# Check for any Win3x driver files in the broader eXo tree find \"\"/mnt/storage/mnt/pve/cephfs/emulation/ugly/Scanned/eXo/eXoWin3x/\"\" -iname \"\"*.drv\"\" -o -iname \"\"*.dr_\"\")", + "Bash(msexpand:*)", + "Bash(import -window root /tmp/dosbox_video/et4000_demo.png)", + "Bash(git lfs:*)", + "Bash(while read f)" + ] + } +} diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..428cb39 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,10 @@ +*.bmp filter=lfs diff=lfs merge=lfs -text +*.CUR filter=lfs diff=lfs merge=lfs -text +*.cur filter=lfs diff=lfs merge=lfs -text +*.ico filter=lfs diff=lfs merge=lfs -text +*.DRV filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.so.* filter=lfs diff=lfs merge=lfs -text +*.BMP filter=lfs diff=lfs merge=lfs -text +*.ICO filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..abaf4bb --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +# Build artifacts +obj/ +win31drv/obj/ +win31drv/libwindrv.a +bin/ + +# Runtime logs +OUTPUT.LOG + +# Editor backups +*~ +*.swp diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..01bc518 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "vbesvga.drv"] + path = vbesvga.drv + url = https://github.com/PluMGMK/vbesvga.drv.git diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..945d02b --- /dev/null +++ b/Makefile @@ -0,0 +1,53 @@ +# ============================================================================ +# Makefile for windrv demo - Windows 3.x display driver loader for DJGPP/DOS +# ============================================================================ + +DJGPP_PREFIX ?= $(HOME)/djgpp/djgpp + +CC = $(DJGPP_PREFIX)/bin/i586-pc-msdosdjgpp-gcc +CFLAGS = -Wall -Wextra -O2 -std=gnu99 -Iwin31drv +LDFLAGS = + +# DJGPP binutils need libfl.so.2 which may not be installed system-wide +export LD_LIBRARY_PATH := $(realpath tools/lib):$(LD_LIBRARY_PATH) + +OBJDIR = obj +BINDIR = bin + +LIBDIR = win31drv +LIB = $(LIBDIR)/libwindrv.a + +DEMO_SRC = demo.c +DEMO_OBJ = $(OBJDIR)/demo.o +DEMO_EXE = $(BINDIR)/demo.exe + +.PHONY: all clean lib demo + +all: lib demo + +lib: + $(MAKE) -C $(LIBDIR) + +demo: $(DEMO_EXE) + +$(DEMO_EXE): $(DEMO_OBJ) lib | $(BINDIR) + $(CC) $(CFLAGS) -o $@ $(DEMO_OBJ) -L$(LIBDIR) -lwindrv $(LDFLAGS) + unzip -oj tools/cwsdpmi.zip bin/CWSDPMI.EXE -d $(BINDIR) 2>/dev/null; true + cp tools/TEST.BAT $(BINDIR)/ + -cp -n drivers/*.DRV $(BINDIR)/ 2>/dev/null; true + +$(OBJDIR)/%.o: %.c | $(OBJDIR) + $(CC) $(CFLAGS) -c -o $@ $< + +$(OBJDIR): + mkdir -p $(OBJDIR) + +$(BINDIR): + mkdir -p $(BINDIR) + +# Dependencies +$(OBJDIR)/demo.o: demo.c win31drv/windrv.h win31drv/wintypes.h + +clean: + $(MAKE) -C $(LIBDIR) clean + -rm -rf $(OBJDIR) $(BINDIR) diff --git a/demo.c b/demo.c new file mode 100644 index 0000000..129a78e --- /dev/null +++ b/demo.c @@ -0,0 +1,323 @@ +// ============================================================================ +// demo.c - Demonstration of using Windows 3.x display drivers from DOS +// +// This program loads a Windows 3.x accelerated display driver (.DRV file) +// and uses its hardware-accelerated functions to draw on screen. +// +// Usage: demo [-d] +// -d Enable debug output +// +// Example: +// demo vga.drv +// demo -d s3trio.drv +// ============================================================================ + +#include +#include +#include +#include +#include +#include + +#include "win31drv/windrv.h" +#include "win31drv/wintypes.h" +#include "win31drv/log.h" + +__attribute__((noinline)) +static void demoDrawing(WdrvHandleT drv); +static void printDriverInfo(WdrvHandleT drv); +static void printUsage(const char *progName); +static void setupPalette(WdrvHandleT drv); + + +int main(int argc, char *argv[]) +{ + const char *driverPath = NULL; + bool debug = false; + + // Parse arguments + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "-d") == 0) { + debug = true; + } else if (argv[i][0] != '-') { + driverPath = argv[i]; + } else { + printUsage(argv[0]); + return 1; + } + } + + if (!driverPath) { + printUsage(argv[0]); + return 1; + } + + logInit("OUTPUT.LOG"); + + // Initialize the library + logMsg("Initializing windrv library...\n"); + int32_t err = wdrvInit(); + if (err != WDRV_OK) { + logErr("Failed to initialize: %s\n", wdrvGetLastErrorString()); + return 1; + } + + if (debug) { + wdrvSetDebug(true); + } + + // Load the driver + logMsg("Loading driver: %s\n", driverPath); + WdrvHandleT drv = wdrvLoadDriver(driverPath); + if (!drv) { + logErr("Failed to load driver: %s\n", wdrvGetLastErrorString()); + wdrvShutdown(); + return 1; + } + + // Print driver info + printDriverInfo(drv); + + // Redirect stdout to the log file before entering SVGA mode. + // In graphics mode, console output through INT 21h can crash + // because the console handler tries to access VGA text memory + // that's now part of the SVGA framebuffer. + freopen("OUTPUT.LOG", "a", stdout); + + // Enable the driver (set video mode) + logMsg("Enabling driver...\n"); + err = wdrvEnable(drv, 0, 0, 0); // Use driver defaults + if (err != WDRV_OK) { + logErr("Failed to enable driver: %s\n", wdrvGetLastErrorString()); + wdrvUnloadDriver(drv); + wdrvShutdown(); + return 1; + } + + // Set up a 256-color palette via the driver's SetPalette DDI. + // This updates both the driver's internal color table (used by + // RealizeObject for color matching) and the VGA DAC hardware. + setupPalette(drv); + + // Run drawing demos + demoDrawing(drv); + + logMsg("Drawing complete. Press any key...\n"); + + // Wait for a keypress so we can see the output + while (!kbhit()) { + // Busy-wait; kbhit() does keyboard I/O which keeps DOSBox-X responsive + } + (void)getkey(); // consume the key + + // Disable the driver (restore text mode) + logMsg("Calling wdrvDisable...\n"); + err = wdrvDisable(drv); + if (err != WDRV_OK) { + logMsg("wdrvDisable failed (err=%d), forcing text mode via INT 10h\n", (int)err); + __dpmi_regs dr; + memset(&dr, 0, sizeof(dr)); + dr.x.ax = 0x0003; // INT 10h AH=00 AL=03: set 80x25 text mode + __dpmi_int(0x10, &dr); + } + + // Clean up + logMsg("Unloading driver...\n"); + wdrvUnloadDriver(drv); + wdrvShutdown(); + + logMsg("Done.\n"); + logShutdown(); + return 0; +} + + +static void printUsage(const char *progName) +{ + logErr("Usage: %s [-d] \n", progName); + logErr(" -d Enable debug output\n"); + logErr("\nLoads a Windows 3.x display driver and demonstrates\n"); + logErr("its accelerated drawing functions from DOS.\n"); +} + + +static void printDriverInfo(WdrvHandleT drv) +{ + WdrvInfoT info; + wdrvGetInfo(drv, &info); + + logMsg("\n=== Driver Information ===\n"); + logMsg(" Name: %s\n", info.driverName); + logMsg(" Version: %u.%u\n", info.driverVersion >> 8, + info.driverVersion & 0xFF); + logMsg(" Resolution: %" PRId32 "x%" PRId32 "\n", info.maxWidth, info.maxHeight); + logMsg(" Color depth: %" PRId32 " bpp\n", info.maxBpp); + logMsg(" Colors: %" PRId32 "\n", info.numColors); + logMsg(" Raster caps: 0x%04" PRIX32 "\n", info.rasterCaps); + logMsg(" Capabilities:\n"); + logMsg(" BitBlt: %s\n", info.hasBitBlt ? "yes" : "no"); + logMsg(" Output: %s\n", info.hasOutput ? "yes" : "no"); + logMsg(" Pixel: %s\n", info.hasPixel ? "yes" : "no"); + logMsg(" StretchBlt: %s\n", info.hasStretchBlt ? "yes" : "no"); + logMsg(" ExtTextOut: %s\n", info.hasExtTextOut ? "yes" : "no"); + logMsg(" SetPalette: %s\n", info.hasSetPalette ? "yes" : "no"); + logMsg(" SetCursor: %s\n", info.hasSetCursor ? "yes" : "no"); +} + + +// noinline: when inlined into main with -O2, the optimizer mishandles +// callee-saved registers across the thunk calls in the Demo 2 → Demo 3 +// transition, causing the handle pointer to be corrupted. +__attribute__((noinline)) +static void demoDrawing(WdrvHandleT drv) +{ + WdrvInfoT info; + wdrvGetInfo(drv, &info); + + int16_t screenW = (int16_t)info.maxWidth; + int16_t screenH = (int16_t)info.maxHeight; + + if (screenW == 0) { + screenW = 640; + } + if (screenH == 0) { + screenH = 480; + } + + logMsg("demoDrawing: screenW=%d screenH=%d hasBitBlt=%d hasOutput=%d hasPixel=%d\n", + screenW, screenH, info.hasBitBlt, info.hasOutput, info.hasPixel); + + // Demo 1: Fill rectangles + if (info.hasBitBlt) { + logMsg("Demo 1: Fill rectangles\n"); + + // Clear screen to white + wdrvFillRect(drv, 0, 0, screenW, screenH, MAKE_RGB(255, 255, 255)); + + static const uint32_t vgaColors[] = { + MAKE_RGB( 0, 0, 0), // black + MAKE_RGB( 0, 0, 255), // blue + MAKE_RGB( 0, 255, 0), // green + MAKE_RGB( 0, 255, 255), // cyan + MAKE_RGB(255, 0, 0), // red + MAKE_RGB(255, 0, 255), // magenta + MAKE_RGB(255, 255, 0), // yellow + MAKE_RGB(170, 170, 170), // light gray + MAKE_RGB( 85, 85, 85), // dark gray + MAKE_RGB( 85, 85, 255), // light blue + MAKE_RGB( 85, 255, 85), // light green + MAKE_RGB( 85, 255, 255), // light cyan + MAKE_RGB(255, 85, 85), // light red + MAKE_RGB(255, 85, 255), // light magenta + MAKE_RGB(255, 255, 85), // yellow-ish + MAKE_RGB(128, 128, 128), // mid gray + }; + + int16_t boxW = screenW / 4; + int16_t boxH = screenH / 4; + + for (int16_t row = 0; row < 4; row++) { + for (int16_t col = 0; col < 4; col++) { + int idx = row * 4 + col; + wdrvFillRect(drv, col * boxW + 4, row * boxH + 4, + boxW - 8, boxH - 8, vgaColors[idx]); + } + } + + logMsg(" Drew %d colored rectangles\n", 16); + } + + // Demo 2: Draw pixel patterns + if (info.hasPixel) { + logMsg("Demo 2: Pixel patterns (8x8)\n"); + int pixCount = 0; + for (int16_t y = 0; y < 8 && y < screenH; y++) { + for (int16_t x = 0; x < 8 && x < screenW; x++) { + uint8_t r = (uint8_t)(x * 32); + uint8_t g = (uint8_t)(y * 32); + uint8_t b = (uint8_t)((x + y) * 16); + wdrvSetPixel(drv, x + screenW - 18, y + 10, MAKE_RGB(r, g, b)); + pixCount++; + } + } + logMsg(" Drew %d pixels\n", pixCount); + } + + // Demo 3: Draw lines using Output (polyline with realized pen) + if (info.hasOutput) { + logMsg("Demo 3: Lines (starburst)\n"); + + int16_t cx = screenW / 2; + int16_t cy = screenH / 2; + int16_t radius = (screenH < screenW ? screenH : screenW) / 3; + int lineCount = 0; + + for (int angle = 0; angle < 360; angle += 15) { + int32_t dx = 0; + int32_t dy = 0; + + int a = angle % 360; + int qa = a % 90; + int32_t s = (int32_t)qa * radius / 90; + int32_t c = (int32_t)(90 - qa) * radius / 90; + + if (a < 90) { + dx = s; + dy = -c; + } else if (a < 180) { + dx = c; + dy = s; + } else if (a < 270) { + dx = -s; + dy = c; + } else { + dx = -c; + dy = -s; + } + + Point16T pts[2]; + pts[0].x = cx; + pts[0].y = cy; + pts[1].x = (int16_t)(cx + dx); + pts[1].y = (int16_t)(cy + dy); + + uint32_t lineColor = MAKE_RGB( + (uint8_t)(angle * 255 / 360), + (uint8_t)(255 - angle * 255 / 360), + 128); + + wdrvPolyline(drv, pts, 2, lineColor); + lineCount++; + } + logMsg(" Drew %d lines\n", lineCount); + } + + // Demo 4: Screen-to-screen blit test + if (info.hasBitBlt) { + logMsg("Demo 4: Screen-to-screen blit\n"); + + WdrvBitBltParamsT bp; + memset(&bp, 0, sizeof(bp)); + bp.srcX = 0; + bp.srcY = 0; + bp.dstX = screenW / 2; + bp.dstY = screenH / 2; + bp.width = screenW / 4; + bp.height = screenH / 4; + bp.rop3 = SRCCOPY; + wdrvBitBlt(drv, &bp); + logMsg(" Screen blit done\n"); + } +} + + +static void setupPalette(WdrvHandleT drv) +{ + // The driver sets up its own palette during Enable (via VBE 4F09) + // and stores an internal color table that RealizeObject uses for + // color matching. We leave the palette as-is so the DAC and the + // internal table stay in sync. RealizeObject will find the best + // matching index, and the DAC will display the correct color. + (void)drv; +} + diff --git a/dosbox-x.conf b/dosbox-x.conf new file mode 100644 index 0000000..a148db9 --- /dev/null +++ b/dosbox-x.conf @@ -0,0 +1,40 @@ +# DOSBox-X configuration for win31drv development +# S3 Trio64 SVGA with VESA support + +[sdl] +output = opengl +windowresolution = 1024x768 + +[dosbox] +machine = svga_s3trio64 +memsize = 64 +quit warning = false + +[cpu] +core = normal +cputype = pentium +cycles = max + +[render] +aspect = true +scaler = none + +[video] +vmemsize = 8 +vmemsizekb = 0 +vesa oldvbe = false +vesa oldvbe10 = false + +[dos] +umb = true +xms = true +ems = true + +[autoexec] +@echo off +mount c /home/scott/claude/windriver +c: +cd bin +DEMO.EXE -d VGA.DRV +rem exit + diff --git a/drivers/ET4000.DRV b/drivers/ET4000.DRV new file mode 100644 index 0000000..f04c731 --- /dev/null +++ b/drivers/ET4000.DRV @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:745c3c3258b3fe125bad0105baaf5bf2c80a81a772fca595f235ba7ae5572680 +size 72480 diff --git a/drivers/S3TRIO.DRV b/drivers/S3TRIO.DRV new file mode 100644 index 0000000..3c0b94a --- /dev/null +++ b/drivers/S3TRIO.DRV @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce9ae6047ddcd4e61a29cbf5a0b2f7303ef5fc4dc0d49e2485754746950ca2f0 +size 237728 diff --git a/drivers/VBESVGA.DRV b/drivers/VBESVGA.DRV new file mode 100644 index 0000000..ed1d531 --- /dev/null +++ b/drivers/VBESVGA.DRV @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3449c52878d02fd46cb5280b9f3bb3113026ae592c1d78f991f3a561f178bbee +size 138832 diff --git a/drivers/VGA.DRV b/drivers/VGA.DRV new file mode 100644 index 0000000..1c8384a --- /dev/null +++ b/drivers/VGA.DRV @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53312ed013bda513c74e33659f4ee7d1b4f2b1cf1a06aed672432a8fbc4a820a +size 73200 diff --git a/tools/TEST.BAT b/tools/TEST.BAT new file mode 100644 index 0000000..1dae980 --- /dev/null +++ b/tools/TEST.BAT @@ -0,0 +1 @@ +demo.exe -d VBESVGA.DRV diff --git a/tools/cwsdpmi.zip b/tools/cwsdpmi.zip new file mode 100644 index 0000000..415ce08 --- /dev/null +++ b/tools/cwsdpmi.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deacda0488e1cdd7c4a9f32fab45662b34c0ed6b2d7d4d13bc07041b62004a8c +size 71339 diff --git a/tools/lib/libfl.so.2 b/tools/lib/libfl.so.2 new file mode 100644 index 0000000..3b4bad1 --- /dev/null +++ b/tools/lib/libfl.so.2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:812df699874145b9c7db71f33e870bb70183c01433018970031b57593ef731d5 +size 14256 diff --git a/tools/lib/libfl.so.2.0.0 b/tools/lib/libfl.so.2.0.0 new file mode 100644 index 0000000..3b4bad1 --- /dev/null +++ b/tools/lib/libfl.so.2.0.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:812df699874145b9c7db71f33e870bb70183c01433018970031b57593ef731d5 +size 14256 diff --git a/vbesvga.drv b/vbesvga.drv new file mode 160000 index 0000000..2da3782 --- /dev/null +++ b/vbesvga.drv @@ -0,0 +1 @@ +Subproject commit 2da3782d3f17531f32bbd9d67a0c242656bb7b07 diff --git a/win31drv/Makefile b/win31drv/Makefile new file mode 100644 index 0000000..e3ff61b --- /dev/null +++ b/win31drv/Makefile @@ -0,0 +1,51 @@ +# ============================================================================ +# Makefile for win31drv - Windows 3.x display driver library for DJGPP/DOS +# ============================================================================ + +DJGPP_PREFIX ?= $(HOME)/djgpp/djgpp + +CC = $(DJGPP_PREFIX)/bin/i586-pc-msdosdjgpp-gcc +AR = $(DJGPP_PREFIX)/bin/i586-pc-msdosdjgpp-ar +CFLAGS = -Wall -Wextra -O2 -std=gnu99 -I. + +# DOSBox-X's S3 Trio64 emulation corrupts specific memory addresses +# during 16-bit driver calls via thunkCall16. With -O2 GCSE enabled, +# the code layout places stack locals and register spills at addresses +# that overlap the corruption targets, causing wrong values in drawing +# parameters. Disabling GCSE for windrv.c changes the layout enough +# to avoid the overlap. Only windrv.c is affected (it has the drawing +# functions that call thunkCall16 with interleaved parameter setup). +WINDRV_CFLAGS = $(CFLAGS) -fno-gcse + +# DJGPP binutils need libfl.so.2 which may not be installed system-wide +export LD_LIBRARY_PATH := $(realpath ../tools/lib):$(LD_LIBRARY_PATH) + +OBJDIR = obj + +SRCS = log.c neload.c thunk.c winstub.c windrv.c +OBJS = $(addprefix $(OBJDIR)/,$(SRCS:.c=.o)) +LIB = libwindrv.a + +.PHONY: all clean + +all: $(LIB) + +$(LIB): $(OBJS) + $(AR) rcs $@ $^ + +$(OBJDIR)/%.o: %.c | $(OBJDIR) + $(CC) $(CFLAGS) -c -o $@ $< + +$(OBJDIR): + mkdir -p $(OBJDIR) + +# Dependencies +$(OBJDIR)/log.o: log.c log.h +$(OBJDIR)/neload.o: neload.c neload.h neformat.h wintypes.h log.h +$(OBJDIR)/thunk.o: thunk.c thunk.h wintypes.h log.h +$(OBJDIR)/winstub.o: winstub.c winstub.h thunk.h wintypes.h log.h +$(OBJDIR)/windrv.o: windrv.c windrv.h wintypes.h winddi.h neformat.h neload.h thunk.h winstub.h log.h + $(CC) $(WINDRV_CFLAGS) -c -o $@ $< + +clean: + -rm -rf $(OBJDIR) $(LIB) diff --git a/win31drv/log.c b/win31drv/log.c new file mode 100644 index 0000000..3dee254 --- /dev/null +++ b/win31drv/log.c @@ -0,0 +1,83 @@ +// ============================================================================ +// log.c - Logging to file +// ============================================================================ + +#include +#include + +#include "log.h" + +static FILE *gLogFile = NULL; +static LogPreIoFuncT gPreIoHook = NULL; + + +void logInit(const char *filename) +{ + if (filename) { + gLogFile = fopen(filename, "w"); + } +} + + +void logErr(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + logErrV(fmt, ap); + va_end(ap); +} + + +void logErrV(const char *fmt, va_list ap) +{ + if (gLogFile) { + if (gPreIoHook) { + gPreIoHook(); + } + vfprintf(gLogFile, fmt, ap); + fflush(gLogFile); + } +} + + +FILE *logGetFile(void) +{ + return gLogFile; +} + + +void logMsg(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + logMsgV(fmt, ap); + va_end(ap); +} + + +void logMsgV(const char *fmt, va_list ap) +{ + if (gLogFile) { + if (gPreIoHook) { + gPreIoHook(); + } + vfprintf(gLogFile, fmt, ap); + fflush(gLogFile); + } +} + + +void logSetPreIoHook(LogPreIoFuncT func) +{ + gPreIoHook = func; +} + + +void logShutdown(void) +{ + if (gLogFile) { + fflush(gLogFile); + fclose(gLogFile); + gLogFile = NULL; + } +} diff --git a/win31drv/log.h b/win31drv/log.h new file mode 100644 index 0000000..53d9f28 --- /dev/null +++ b/win31drv/log.h @@ -0,0 +1,37 @@ +#ifndef LOG_H +#define LOG_H + +#include + +// ============================================================================ +// Logging layer - writes to a log file +// +// Call logInit() with a filename to enable logging. +// ============================================================================ + +// Initialize logging. If filename is non-NULL, opens it for writing. +// Can be called before any other log function. If not called, output +// goes only to stdout/stderr. +void logInit(const char *filename); + +// Shut down logging, flush and close the log file. +void logShutdown(void); + +// Write to stdout and the log file (like printf). +void logMsg(const char *fmt, ...) __attribute__((format(printf, 1, 2))); + +// Write to stderr and the log file (like fprintf(stderr, ...)). +void logErr(const char *fmt, ...) __attribute__((format(printf, 1, 2))); + +// Varargs versions for use by other wrappers. +void logMsgV(const char *fmt, va_list ap); +void logErrV(const char *fmt, va_list ap); + +// Get the underlying log file handle (or NULL if not initialized). +FILE *logGetFile(void); + +// Register a pre-I/O hook called before every file write. +typedef void (*LogPreIoFuncT)(void); +void logSetPreIoHook(LogPreIoFuncT func); + +#endif // LOG_H diff --git a/win31drv/neformat.h b/win31drv/neformat.h new file mode 100644 index 0000000..a4796f6 --- /dev/null +++ b/win31drv/neformat.h @@ -0,0 +1,214 @@ +#ifndef NEFORMAT_H +#define NEFORMAT_H + +#include + +// ============================================================================ +// MZ (DOS) executable header - precedes the NE header +// ============================================================================ + +#define MZ_SIGNATURE 0x5A4D // 'MZ' + +typedef struct __attribute__((packed)) { + uint16_t signature; // 0x00: 'MZ' + uint16_t lastPageBytes; // 0x02: bytes on last page + uint16_t pageCount; // 0x04: pages in file (512 bytes each) + uint16_t relocationCount; // 0x06: relocation entries + uint16_t headerParagraphs;// 0x08: header size in paragraphs + uint16_t minAlloc; // 0x0A: minimum extra paragraphs + uint16_t maxAlloc; // 0x0C: maximum extra paragraphs + uint16_t initSS; // 0x0E: initial SS + uint16_t initSP; // 0x10: initial SP + uint16_t checksum; // 0x12: checksum + uint16_t initIP; // 0x14: initial IP + uint16_t initCS; // 0x16: initial CS + uint16_t relocationOff; // 0x18: relocation table offset + uint16_t overlayNum; // 0x1A: overlay number + uint16_t reserved1[4]; // 0x1C: reserved + uint16_t oemId; // 0x24: OEM identifier + uint16_t oemInfo; // 0x26: OEM information + uint16_t reserved2[10]; // 0x28: reserved + uint32_t neHeaderOffset; // 0x3C: offset to NE header +} MzHeaderT; + +// ============================================================================ +// NE (New Executable) header +// ============================================================================ + +#define NE_SIGNATURE 0x454E // 'NE' + +typedef struct __attribute__((packed)) { + uint16_t signature; // 0x00: 'NE' + uint8_t linkerMajor; // 0x02: linker version + uint8_t linkerMinor; // 0x03: linker revision + uint16_t entryTableOffset; // 0x04: offset to entry table (from NE header) + uint16_t entryTableSize; // 0x06: size of entry table + uint32_t fileCrc; // 0x08: file CRC + uint16_t moduleFlags; // 0x0C: module flags + uint16_t autoDataSegIndex; // 0x0E: auto data segment index (1-based) + uint16_t initialHeapSize; // 0x10: initial heap size + uint16_t initialStackSize; // 0x12: initial stack size + uint16_t entryPointIP; // 0x14: CS:IP entry point (IP) + uint16_t entryPointCS; // 0x16: CS:IP entry point (CS segment index) + uint16_t initialSP; // 0x18: SS:SP initial stack (SP) + uint16_t initialSS; // 0x1A: SS:SP initial stack (SS segment index) + uint16_t segmentCount; // 0x1C: number of segment table entries + uint16_t moduleRefCount; // 0x1E: number of module reference table entries + uint16_t nonResNameSize; // 0x20: size of non-resident name table + uint16_t segmentTableOffset; // 0x22: offset to segment table (from NE header) + uint16_t resourceTableOffset; // 0x24: offset to resource table (from NE header) + uint16_t resNameTableOffset; // 0x26: offset to resident name table (from NE) + uint16_t modRefTableOffset; // 0x28: offset to module reference table (from NE) + uint16_t importNameTableOffset; // 0x2A: offset to imported names table (from NE) + uint32_t nonResNameTableFileOffset; // 0x2C: file offset of non-resident name table + uint16_t movableEntryCount; // 0x30: number of movable entry points + uint16_t sectorAlignShift; // 0x32: sector alignment shift count + uint16_t resourceSegCount; // 0x34: number of resource segments + uint8_t targetOS; // 0x36: target operating system + uint8_t otherFlags; // 0x37: additional flags + uint16_t gangLoadAreaOffset; // 0x38: offset to gang-load area + uint16_t gangLoadAreaSize; // 0x3A: size of gang-load area + uint16_t swapAreaSize; // 0x3C: minimum code swap area size + uint16_t expectedWinVer; // 0x3E: expected Windows version +} NeHeaderT; + +// NE module flags (moduleFlags field) +#define NE_FFLAGS_SINGLEDATA 0x0001 // Single shared DGROUP +#define NE_FFLAGS_MULTIPLEDATA 0x0002 // Multiple DGROUP (DLL with per-instance data) +#define NE_FFLAGS_GLOBALINIT 0x0004 // Global initialization +#define NE_FFLAGS_PROTMODE 0x0008 // Protected mode only +#define NE_FFLAGS_8086 0x0010 // 8086 instructions +#define NE_FFLAGS_80286 0x0020 // 80286 instructions +#define NE_FFLAGS_80386 0x0040 // 80386 instructions +#define NE_FFLAGS_80x87 0x0080 // uses 80x87 +#define NE_FFLAGS_FULLSCREEN 0x0100 // full-screen application (not a DLL) +#define NE_FFLAGS_DLL 0x8000 // DLL or driver (not a task) + +// NE target OS values +#define NE_OS_UNKNOWN 0x00 +#define NE_OS_OS2 0x01 +#define NE_OS_WINDOWS 0x02 +#define NE_OS_DOS4 0x03 +#define NE_OS_WIN386 0x04 + +// ============================================================================ +// NE segment table entry +// ============================================================================ + +typedef struct __attribute__((packed)) { + uint16_t fileSectorOffset; // Logical sector offset in file (0 = no data) + uint16_t fileLength; // Length of segment in file (0 = 64K) + uint16_t flags; // Segment flags + uint16_t minAllocSize; // Minimum allocation size (0 = 64K) +} NeSegEntryT; + +// Segment flags +#define NE_SEGF_DATA 0x0001 // Data segment (0 = code) +#define NE_SEGF_ALLOCATED 0x0002 // Loader has allocated memory +#define NE_SEGF_LOADED 0x0004 // Segment is loaded +#define NE_SEGF_MOVEABLE 0x0010 // Moveable segment +#define NE_SEGF_SHAREABLE 0x0020 // Shareable segment +#define NE_SEGF_PRELOAD 0x0040 // Preload segment +#define NE_SEGF_READONLY 0x0080 // Read-only (code) or execute-only (data) +#define NE_SEGF_HASRELOC 0x0100 // Has relocation data +#define NE_SEGF_DISCARD 0x1000 // Discardable + +// ============================================================================ +// NE relocation record +// ============================================================================ + +typedef struct __attribute__((packed)) { + uint8_t srcType; // Source (fixup) type + uint8_t flags; // Relocation flags + uint16_t srcOffset; // Offset within segment of the fixup location + uint16_t target1; // Module index (1-based) or segment number + uint16_t target2; // Ordinal/offset or offset within segment +} NeRelocT; + +// Relocation source types (srcType field) +#define NE_RELOC_LOBYTE 0x00 // Low byte fixup +#define NE_RELOC_SEGMENT 0x02 // 16-bit segment fixup +#define NE_RELOC_FAR_ADDR 0x03 // 32-bit far pointer (seg:off) fixup +#define NE_RELOC_OFFSET 0x05 // 16-bit offset fixup +#define NE_RELOC_FAR48_ADDR 0x0B // 48-bit far pointer fixup +#define NE_RELOC_OFFSET32 0x0D // 32-bit offset fixup + +// Relocation target flags (flags field) +#define NE_RELF_INTERNALREF 0x00 // Internal reference +#define NE_RELF_IMPORTORD 0x01 // Import by ordinal +#define NE_RELF_IMPORTNAME 0x02 // Import by name +#define NE_RELF_OSFIXUP 0x03 // OS fixup +#define NE_RELF_TARGET_MASK 0x03 // Mask for target type +#define NE_RELF_ADDITIVE 0x04 // Additive fixup (don't zero target first) + +// ============================================================================ +// NE entry table structures +// ============================================================================ + +// Entry table is a series of bundles. Each bundle starts with: +// BYTE count - number of entries in this bundle (0 = end of table) +// BYTE indicator - 0x00 = empty, 0xFF = moveable, else fixed segment number + +// Fixed entry (indicator = segment number 1-254) +typedef struct __attribute__((packed)) { + uint8_t flags; // Entry flags + uint16_t offset; // Offset within segment +} NeFixedEntryT; + +// Moveable entry (indicator = 0xFF) +typedef struct __attribute__((packed)) { + uint8_t flags; // Entry flags + uint16_t int3fh; // INT 3Fh instruction (0xCD3F) + uint8_t segIndex; // Segment number (1-based) + uint16_t offset; // Offset within segment +} NeMoveableEntryT; + +// Entry flags +#define NE_ENTRY_EXPORTED 0x01 // Entry is exported +#define NE_ENTRY_SHDATA 0x02 // Entry uses shared data segment + +// ============================================================================ +// Display driver ordinal numbers (standard DDI exports) +// ============================================================================ + +#define DDI_ORD_BITBLT 1 +#define DDI_ORD_COLORINFO 2 +#define DDI_ORD_CONTROL 3 +#define DDI_ORD_DISABLE 4 +#define DDI_ORD_ENABLE 5 +#define DDI_ORD_ENUMDFFONTS 6 +#define DDI_ORD_ENUMOBJ 7 +#define DDI_ORD_OUTPUT 8 +#define DDI_ORD_PIXEL 9 +#define DDI_ORD_REALIZEOBJECT 10 +#define DDI_ORD_STRBLT 11 +#define DDI_ORD_SCANLR 12 +#define DDI_ORD_DEVICEMODE 13 +#define DDI_ORD_EXTTEXTOUT 14 +#define DDI_ORD_GETCHARWIDTH 15 +#define DDI_ORD_DEVICEBITMAP 16 +#define DDI_ORD_FASTBORDER 17 +#define DDI_ORD_SETATTRIBUTE 18 +#define DDI_ORD_DIBTODEVICE 19 +#define DDI_ORD_CREATEBITMAP 20 +#define DDI_ORD_DELETEBITMAP 21 +#define DDI_ORD_SELECTBITMAP 22 +#define DDI_ORD_BITMAPBITS 23 +#define DDI_ORD_RECLIP 24 +#define DDI_ORD_GETPALETTE 25 +#define DDI_ORD_SETPALETTE 26 +#define DDI_ORD_SETPALETTETRANS 27 +#define DDI_ORD_UPDATECOLORS 28 +#define DDI_ORD_STRETCHBLT 29 +#define DDI_ORD_STRETCHDIBITS 30 +#define DDI_ORD_SELECTPALETTE 31 +#define DDI_ORD_INQUIRE 101 +#define DDI_ORD_SETCURSOR 102 +#define DDI_ORD_MOVECURSOR 103 +#define DDI_ORD_CHECKCRSR 104 +#define DDI_ORD_GETDRIVERRESID 450 + +// Maximum DDI ordinal we track +#define DDI_MAX_ORDINAL 500 + +#endif // NEFORMAT_H diff --git a/win31drv/neload.c b/win31drv/neload.c new file mode 100644 index 0000000..e86ff92 --- /dev/null +++ b/win31drv/neload.c @@ -0,0 +1,962 @@ +// ============================================================================ +// neload.c - NE (New Executable) format loader +// +// Loads Windows 3.x 16-bit DLLs/drivers into protected mode memory +// using DPMI to allocate LDT descriptors for 16-bit code/data segments. +// ============================================================================ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "neload.h" +#include "wintypes.h" +#include "log.h" + +// Forward declarations +static bool readHeaders(NeModuleT *mod, FILE *fp); +static bool loadSegments(NeModuleT *mod, FILE *fp, ImportResolverT resolver); +static bool allocateSegment(NeModuleT *mod, int segIdx, uint32_t size, bool isCode); +static bool loadSegmentData(NeModuleT *mod, int segIdx, FILE *fp, uint32_t fileOffset, uint32_t fileSize); +static bool processRelocations(NeModuleT *mod, int segIdx, FILE *fp, ImportResolverT resolver); +static bool parseEntryTable(NeModuleT *mod, FILE *fp); +static bool parseModuleReferences(NeModuleT *mod, FILE *fp); +static bool parseResidentNames(NeModuleT *mod, FILE *fp); +static void freeSegment(LoadedSegT *seg); +static uint16_t makeDescriptor16(uint32_t base, uint32_t limit, bool isCode); +static void dbgPrint(const char *fmt, ...); + +static bool gDebug = false; + + +static void dbgPrint(const char *fmt, ...) +{ + if (!gDebug) { + return; + } + va_list ap; + va_start(ap, fmt); + logErrV(fmt, ap); + va_end(ap); +} + + +bool neLoadModule(NeModuleT *mod, const char *filePath, ImportResolverT resolver) +{ + memset(mod, 0, sizeof(NeModuleT)); + + FILE *fp = fopen(filePath, "rb"); + if (!fp) { + logErr("neload: cannot open '%s'\n", filePath); + return false; + } + + bool ok = false; + + // Step 1: Read and validate MZ + NE headers + if (!readHeaders(mod, fp)) { + goto done; + } + + // Step 2: Parse module reference table (imported module names) + if (!parseModuleReferences(mod, fp)) { + goto done; + } + + // Step 3: Parse resident name table (module name + exports by name) + if (!parseResidentNames(mod, fp)) { + goto done; + } + + // Step 4: Parse entry table (export ordinals -> segment:offset) + if (!parseEntryTable(mod, fp)) { + goto done; + } + + // Step 5: Load segments, apply relocations + if (!loadSegments(mod, fp, resolver)) { + goto done; + } + + // Resolve auto data segment selector + if (mod->neHeader.autoDataSegIndex > 0 && + mod->neHeader.autoDataSegIndex <= mod->segmentCount) { + mod->autoDataSel = mod->segments[mod->neHeader.autoDataSegIndex - 1].selector; + } + + mod->loaded = true; + ok = true; + +done: + fclose(fp); + if (!ok) { + neUnloadModule(mod); + } + return ok; +} + + +void neUnloadModule(NeModuleT *mod) +{ + for (uint16_t i = 0; i < mod->segmentCount; i++) { + freeSegment(&mod->segments[i]); + } + mod->segmentCount = 0; + mod->loaded = false; +} + + +bool neLookupExport(const NeModuleT *mod, uint16_t ordinal, uint16_t *seg, uint16_t *off, uint16_t *sel) +{ + if (ordinal == 0 || ordinal >= NE_MAX_EXPORTS) { + return false; + } + if (mod->exports[ordinal].segIndex == 0) { + return false; + } + + ExportEntryT *e = (ExportEntryT *)&mod->exports[ordinal]; + uint16_t sIdx = e->segIndex - 1; + + if (sIdx >= mod->segmentCount) { + return false; + } + + if (seg) { + *seg = e->segIndex; + } + if (off) { + *off = e->offset; + } + if (sel) { + *sel = mod->segments[sIdx].selector; + } + return true; +} + + +uint16_t neLookupExportByName(const NeModuleT *mod, const char *name, const char *filePath) +{ + // We need to re-read the resident name table from the file to search by name. + // The first entry is the module name; subsequent entries are exports. + FILE *fp = fopen(filePath, "rb"); + if (!fp) { + return 0; + } + + uint32_t tableOff = mod->neHeaderFileOffset + mod->neHeader.resNameTableOffset; + fseek(fp, tableOff, SEEK_SET); + + uint16_t ordinal = 0; + while (1) { + uint8_t nameLen; + if (fread(&nameLen, 1, 1, fp) != 1 || nameLen == 0) { + break; + } + + char entryName[256]; + if (fread(entryName, 1, nameLen, fp) != nameLen) { + break; + } + entryName[nameLen] = '\0'; + + uint16_t ord; + if (fread(&ord, 2, 1, fp) != 1) { + break; + } + + if (strcasecmp(entryName, name) == 0) { + ordinal = ord; + break; + } + } + + fclose(fp); + return ordinal; +} + + +void neDumpModule(const NeModuleT *mod) +{ + logErr("=== NE Module: %s ===\n", mod->moduleName); + logErr(" Segments: %u\n", mod->segmentCount); + logErr(" Auto data segment: %u\n", mod->neHeader.autoDataSegIndex); + logErr(" Module flags: 0x%04X", mod->neHeader.moduleFlags); + if (mod->neHeader.moduleFlags & NE_FFLAGS_DLL) { + logErr(" (DLL)"); + } + logErr("\n"); + logErr(" Target OS: 0x%02X\n", mod->neHeader.targetOS); + logErr(" Expected Windows version: %u.%u\n", + mod->neHeader.expectedWinVer >> 8, + mod->neHeader.expectedWinVer & 0xFF); + + logErr(" Segments:\n"); + for (uint16_t i = 0; i < mod->segmentCount; i++) { + LoadedSegT *s = (LoadedSegT *)&mod->segments[i]; + logErr(" [%u] %s sel=0x%04X base=0x%08" PRIX32 " size=%" PRIu32, + i + 1, + s->isCode ? "CODE" : "DATA", + s->selector, + s->linearAddr, + s->size); + if (s->flags & NE_SEGF_PRELOAD) { + logErr(" PRELOAD"); + } + if (s->flags & NE_SEGF_MOVEABLE) { + logErr(" MOVEABLE"); + } + logErr("\n"); + } + + logErr(" Module references:\n"); + for (uint16_t i = 0; i < mod->modRefCount; i++) { + logErr(" [%u] %s\n", i + 1, mod->modRefNames[i]); + } + + logErr(" Exports:\n"); + for (uint16_t i = 1; i < NE_MAX_EXPORTS; i++) { + if (mod->exports[i].segIndex != 0) { + logErr(" ord %u -> seg %u : 0x%04X (sel 0x%04X)\n", + i, + mod->exports[i].segIndex, + mod->exports[i].offset, + mod->exports[i].selector); + } + } +} + + +// ============================================================================ +// Internal implementation +// ============================================================================ + +static bool readHeaders(NeModuleT *mod, FILE *fp) +{ + // Read MZ header + MzHeaderT mz; + if (fread(&mz, sizeof(mz), 1, fp) != 1) { + logErr("neload: failed to read MZ header\n"); + return false; + } + if (mz.signature != MZ_SIGNATURE) { + logErr("neload: not an MZ executable (sig=0x%04X)\n", mz.signature); + return false; + } + + // Read NE header + mod->neHeaderFileOffset = mz.neHeaderOffset; + fseek(fp, mz.neHeaderOffset, SEEK_SET); + + if (fread(&mod->neHeader, sizeof(NeHeaderT), 1, fp) != 1) { + logErr("neload: failed to read NE header\n"); + return false; + } + if (mod->neHeader.signature != NE_SIGNATURE) { + logErr("neload: not an NE executable (sig=0x%04X)\n", + mod->neHeader.signature); + return false; + } + + // Validate + if (mod->neHeader.segmentCount > NE_MAX_SEGMENTS) { + logErr("neload: too many segments (%u, max %u)\n", + mod->neHeader.segmentCount, NE_MAX_SEGMENTS); + return false; + } + + mod->segmentCount = mod->neHeader.segmentCount; + mod->sectorAlignShift = mod->neHeader.sectorAlignShift; + if (mod->sectorAlignShift == 0) { + mod->sectorAlignShift = 9; // Default: 512-byte sectors + } + + dbgPrint("neload: NE header at 0x%08" PRIX32 ", %u segments, %u align shift\n", + mod->neHeaderFileOffset, mod->segmentCount, mod->sectorAlignShift); + + return true; +} + + +static bool parseModuleReferences(NeModuleT *mod, FILE *fp) +{ + if (mod->neHeader.moduleRefCount == 0) { + return true; + } + + mod->modRefCount = mod->neHeader.moduleRefCount; + if (mod->modRefCount > NE_MAX_MODREFS) { + logErr("neload: too many module references (%u, max %u)\n", + mod->modRefCount, NE_MAX_MODREFS); + return false; + } + + // Read module reference table (array of offsets into imported name table) + uint32_t modRefTableOff = mod->neHeaderFileOffset + mod->neHeader.modRefTableOffset; + fseek(fp, modRefTableOff, SEEK_SET); + + uint16_t nameOffsets[NE_MAX_MODREFS]; + if (fread(nameOffsets, 2, mod->modRefCount, fp) != mod->modRefCount) { + logErr("neload: failed to read module reference table\n"); + return false; + } + + // Resolve each offset to a name from the imported name table + uint32_t importNameTableOff = mod->neHeaderFileOffset + mod->neHeader.importNameTableOffset; + + for (uint16_t i = 0; i < mod->modRefCount; i++) { + uint32_t nameOff = importNameTableOff + nameOffsets[i]; + fseek(fp, nameOff, SEEK_SET); + + uint8_t nameLen; + if (fread(&nameLen, 1, 1, fp) != 1) { + logErr("neload: failed to read module name length\n"); + return false; + } + + if (nameLen > 31) { + nameLen = 31; + } + if (fread(mod->modRefNames[i], 1, nameLen, fp) != nameLen) { + logErr("neload: failed to read module name\n"); + return false; + } + mod->modRefNames[i][nameLen] = '\0'; + + dbgPrint("neload: module ref [%u] = '%s'\n", i + 1, mod->modRefNames[i]); + } + + return true; +} + + +static bool parseResidentNames(NeModuleT *mod, FILE *fp) +{ + uint32_t tableOff = mod->neHeaderFileOffset + mod->neHeader.resNameTableOffset; + fseek(fp, tableOff, SEEK_SET); + + bool firstEntry = true; + while (1) { + uint8_t nameLen; + if (fread(&nameLen, 1, 1, fp) != 1 || nameLen == 0) { + break; + } + + char name[256]; + if (fread(name, 1, nameLen, fp) != nameLen) { + break; + } + name[nameLen] = '\0'; + + uint16_t ordinal; + if (fread(&ordinal, 2, 1, fp) != 1) { + break; + } + + if (firstEntry) { + // First entry is the module name (ordinal 0) + memcpy(mod->moduleName, name, sizeof(mod->moduleName) - 1); + mod->moduleName[sizeof(mod->moduleName) - 1] = '\0'; + dbgPrint("neload: module name = '%s'\n", mod->moduleName); + firstEntry = false; + } else { + dbgPrint("neload: resident name '%s' = ordinal %u\n", name, ordinal); + } + } + + return true; +} + + +static bool parseEntryTable(NeModuleT *mod, FILE *fp) +{ + uint32_t tableOff = mod->neHeaderFileOffset + mod->neHeader.entryTableOffset; + uint32_t tableEnd = tableOff + mod->neHeader.entryTableSize; + + fseek(fp, tableOff, SEEK_SET); + + uint16_t currentOrdinal = 1; + + while (ftell(fp) < (long)tableEnd) { + uint8_t bundleCount; + uint8_t indicator; + + if (fread(&bundleCount, 1, 1, fp) != 1 || bundleCount == 0) { + break; // End of entry table + } + if (fread(&indicator, 1, 1, fp) != 1) { + break; + } + + if (indicator == 0x00) { + // Empty bundle - skip these ordinals + currentOrdinal += bundleCount; + continue; + } + + for (uint8_t i = 0; i < bundleCount; i++) { + if (currentOrdinal >= NE_MAX_EXPORTS) { + logErr("neload: export ordinal %u exceeds max\n", currentOrdinal); + currentOrdinal++; + continue; + } + + if (indicator == 0xFF) { + // Moveable segment entry + NeMoveableEntryT entry; + if (fread(&entry, sizeof(entry), 1, fp) != 1) { + return false; + } + mod->exports[currentOrdinal].segIndex = entry.segIndex; + mod->exports[currentOrdinal].offset = entry.offset; + mod->exports[currentOrdinal].flags = entry.flags; + mod->exportCount++; + + dbgPrint("neload: entry ord %u -> seg %u : 0x%04X (moveable)\n", + currentOrdinal, entry.segIndex, entry.offset); + } else { + // Fixed segment entry (indicator = segment number) + NeFixedEntryT entry; + if (fread(&entry, sizeof(entry), 1, fp) != 1) { + return false; + } + mod->exports[currentOrdinal].segIndex = indicator; + mod->exports[currentOrdinal].offset = entry.offset; + mod->exports[currentOrdinal].flags = entry.flags; + mod->exportCount++; + + dbgPrint("neload: entry ord %u -> seg %u : 0x%04X (fixed)\n", + currentOrdinal, indicator, entry.offset); + } + + currentOrdinal++; + } + } + + dbgPrint("neload: %u export entries parsed\n", mod->exportCount); + return true; +} + + +static bool loadSegments(NeModuleT *mod, FILE *fp, ImportResolverT resolver) +{ + // Read segment table + uint32_t segTableOff = mod->neHeaderFileOffset + mod->neHeader.segmentTableOffset; + fseek(fp, segTableOff, SEEK_SET); + + NeSegEntryT segTable[NE_MAX_SEGMENTS]; + if (fread(segTable, sizeof(NeSegEntryT), mod->segmentCount, fp) != mod->segmentCount) { + logErr("neload: failed to read segment table\n"); + return false; + } + + // Load each segment + for (uint16_t i = 0; i < mod->segmentCount; i++) { + NeSegEntryT *se = &segTable[i]; + bool isCode = !(se->flags & NE_SEGF_DATA); + + // Determine segment size + uint32_t fileLen = se->fileLength; + if (fileLen == 0 && se->fileSectorOffset != 0) { + fileLen = 0x10000; // 64K + } + + uint32_t allocSize = se->minAllocSize; + if (allocSize == 0) { + allocSize = 0x10000; // 64K + } + if (allocSize < fileLen) { + allocSize = fileLen; + } + + // Allocate memory and create descriptor + if (!allocateSegment(mod, i, allocSize, isCode)) { + logErr("neload: failed to allocate segment %u\n", i + 1); + return false; + } + + mod->segments[i].flags = se->flags; + mod->segments[i].fileSize = fileLen; + + // Load data from file + if (se->fileSectorOffset != 0 && fileLen > 0) { + uint32_t fileOffset = (uint32_t)se->fileSectorOffset << mod->sectorAlignShift; + if (!loadSegmentData(mod, i, fp, fileOffset, fileLen)) { + logErr("neload: failed to load segment %u data\n", i + 1); + return false; + } + } + + dbgPrint("neload: loaded seg %u: %s size=%" PRIu32 " filelen=%" PRIu32 " sel=0x%04X base=0x%08" PRIX32 "\n", + i + 1, isCode ? "CODE" : "DATA", + allocSize, fileLen, + mod->segments[i].selector, + mod->segments[i].linearAddr); + } + + // Resolve export selectors now that segments are loaded + for (uint16_t i = 1; i < NE_MAX_EXPORTS; i++) { + if (mod->exports[i].segIndex > 0 && + mod->exports[i].segIndex <= mod->segmentCount) { + mod->exports[i].selector = mod->segments[mod->exports[i].segIndex - 1].selector; + } + } + + // Process relocations for each segment + for (uint16_t i = 0; i < mod->segmentCount; i++) { + if (segTable[i].flags & NE_SEGF_HASRELOC) { + if (!processRelocations(mod, i, fp, resolver)) { + logErr("neload: relocation failed for segment %u\n", i + 1); + return false; + } + } + } + + return true; +} + + +static bool allocateSegment(NeModuleT *mod, int segIdx, uint32_t size, bool isCode) +{ + LoadedSegT *seg = &mod->segments[segIdx]; + + // Allocate memory using DJGPP's malloc (extended memory, flat model) + // The memory is accessible via the flat DS selector, but we also need + // a 16-bit selector pointing to it. + uint8_t *mem = (uint8_t *)calloc(1, size); + if (!mem) { + logErr("neload: failed to allocate %" PRIu32 " bytes for segment %u\n", size, segIdx + 1); + return false; + } + + // In DJGPP, pointer values are offsets from the DS base. + // The true linear address = pointer + __djgpp_base_address. + // We store the pointer value (for C access) but use the linear + // address when setting up the LDT descriptor base. + uint32_t ptrVal = (uint32_t)mem; + uint32_t linearAddr = ptrVal + __djgpp_base_address; + + // Create a 16-bit LDT descriptor for this segment + uint16_t sel = makeDescriptor16(linearAddr, size - 1, isCode); + if (sel == 0) { + free(mem); + return false; + } + + seg->linearAddr = ptrVal; // Store DJGPP pointer (for C access via cast) + seg->selector = sel; + seg->size = size; + seg->isCode = isCode; + + return true; +} + + +static uint16_t makeDescriptor16(uint32_t base, uint32_t limit, bool isCode) +{ + int sel = __dpmi_allocate_ldt_descriptors(1); + if (sel < 0) { + logErr("neload: failed to allocate LDT descriptor\n"); + return 0; + } + + if (__dpmi_set_segment_base_address(sel, base) < 0) { + logErr("neload: failed to set segment base\n"); + __dpmi_free_ldt_descriptor(sel); + return 0; + } + + if (__dpmi_set_segment_limit(sel, limit) < 0) { + logErr("neload: failed to set segment limit\n"); + __dpmi_free_ldt_descriptor(sel); + return 0; + } + + // Set access rights for 16-bit segment + // Code: present, DPL 3, code, readable, non-conforming = 0xFA + // Data: present, DPL 3, data, writable = 0xF2 + // High byte: G=0, D=0 (16-bit), 0, AVL=0 = 0x00 + uint16_t rights = isCode ? 0x00FA : 0x00F2; + if (__dpmi_set_descriptor_access_rights(sel, rights) < 0) { + logErr("neload: failed to set access rights (0x%04X)\n", rights); + __dpmi_free_ldt_descriptor(sel); + return 0; + } + + return (uint16_t)sel; +} + + +static bool loadSegmentData(NeModuleT *mod, int segIdx, FILE *fp, uint32_t fileOffset, uint32_t fileSize) +{ + LoadedSegT *seg = &mod->segments[segIdx]; + + fseek(fp, fileOffset, SEEK_SET); + + // Read directly into the allocated memory (flat model, so pointer works) + uint8_t *dest = (uint8_t *)seg->linearAddr; + if (fread(dest, 1, fileSize, fp) != fileSize) { + logErr("neload: short read loading segment %u data\n", segIdx + 1); + return false; + } + + return true; +} + + +static bool processRelocations(NeModuleT *mod, int segIdx, FILE *fp, ImportResolverT resolver) +{ + LoadedSegT *seg = &mod->segments[segIdx]; + + // Relocation data follows the segment data in the file. + // The segment table entry gives us the file offset; relocation data + // starts at fileOffset + fileLength. + uint32_t segTableOff = mod->neHeaderFileOffset + mod->neHeader.segmentTableOffset; + fseek(fp, segTableOff + segIdx * sizeof(NeSegEntryT), SEEK_SET); + + NeSegEntryT se; + if (fread(&se, sizeof(se), 1, fp) != 1) { + return false; + } + + uint32_t fileOffset = (uint32_t)se.fileSectorOffset << mod->sectorAlignShift; + uint32_t fileLen = se.fileLength; + if (fileLen == 0 && se.fileSectorOffset != 0) { + fileLen = 0x10000; + } + + // Relocation records follow the segment data + uint32_t relocOff = fileOffset + fileLen; + fseek(fp, relocOff, SEEK_SET); + + // First word is the count of relocation records + uint16_t relocCount; + if (fread(&relocCount, 2, 1, fp) != 1) { + return false; + } + + dbgPrint("neload: segment %u has %u relocations\n", segIdx + 1, relocCount); + + uint8_t *segData = (uint8_t *)seg->linearAddr; + + for (uint16_t i = 0; i < relocCount; i++) { + NeRelocT rec; + if (fread(&rec, sizeof(rec), 1, fp) != 1) { + logErr("neload: failed to read relocation %u/%u\n", i + 1, relocCount); + return false; + } + + uint8_t targetType = rec.flags & NE_RELF_TARGET_MASK; + bool additive = (rec.flags & NE_RELF_ADDITIVE) != 0; + + // Resolve the target address + uint16_t targetSel = 0; + uint16_t targetOff = 0; + bool resolved = false; + + switch (targetType) { + case NE_RELF_INTERNALREF: { + // Internal reference: target1 = segment index (1-based) + // For moveable segments, target2 is an entry table ordinal. + // For fixed segments, target2 is the offset. + uint16_t tSegIdx = rec.target1; + if (rec.target1 == 0xFF) { + // Moveable reference via entry table + uint16_t ordinal = rec.target2; + if (ordinal > 0 && ordinal < NE_MAX_EXPORTS && + mod->exports[ordinal].segIndex > 0) { + tSegIdx = mod->exports[ordinal].segIndex; + targetOff = mod->exports[ordinal].offset; + targetSel = mod->segments[tSegIdx - 1].selector; + resolved = true; + } + } else if (tSegIdx > 0 && tSegIdx <= mod->segmentCount) { + targetSel = mod->segments[tSegIdx - 1].selector; + targetOff = rec.target2; + resolved = true; + } + break; + } + + case NE_RELF_IMPORTORD: { + // Import by ordinal: target1 = module ref index (1-based) + // target2 = ordinal number + uint16_t modIdx = rec.target1; + uint16_t ordinal = rec.target2; + + if (modIdx > 0 && modIdx <= mod->modRefCount && resolver) { + FarPtr16T addr = resolver(mod->modRefNames[modIdx - 1], ordinal, NULL); + if (addr.segment != 0 || addr.offset != 0) { + targetSel = addr.segment; + targetOff = addr.offset; + resolved = true; + } + dbgPrint("neload: RELOC seg %u off 0x%04X srcType=%u: %s.%u -> %04X:%04X\n", + segIdx + 1, rec.srcOffset, rec.srcType, + mod->modRefNames[modIdx - 1], ordinal, + targetSel, targetOff); + } + + if (!resolved) { + dbgPrint("neload: UNRESOLVED import %s.%u in seg %u at 0x%04X\n", + (modIdx > 0 && modIdx <= mod->modRefCount) ? + mod->modRefNames[modIdx - 1] : "???", + ordinal, segIdx + 1, rec.srcOffset); + // Patch in a dummy value (INT 3 / breakpoint) + targetSel = mod->segments[0].selector; // Point to first code seg + targetOff = 0; + resolved = true; + } + break; + } + + case NE_RELF_IMPORTNAME: { + // Import by name: target1 = module ref index (1-based) + // target2 = offset into imported names table + uint16_t modIdx = rec.target1; + uint16_t nameOff16 = rec.target2; + + // Read the function name from the imported names table + char funcName[64] = ""; + uint32_t importNameTableOff = mod->neHeaderFileOffset + + mod->neHeader.importNameTableOffset; + long savedPos = ftell(fp); + fseek(fp, importNameTableOff + nameOff16, SEEK_SET); + + uint8_t nameLen; + if (fread(&nameLen, 1, 1, fp) == 1 && nameLen < 64) { + fread(funcName, 1, nameLen, fp); + funcName[nameLen] = '\0'; + } + fseek(fp, savedPos, SEEK_SET); + + if (modIdx > 0 && modIdx <= mod->modRefCount && resolver) { + FarPtr16T addr = resolver(mod->modRefNames[modIdx - 1], 0, funcName); + if (addr.segment != 0 || addr.offset != 0) { + targetSel = addr.segment; + targetOff = addr.offset; + resolved = true; + } + } + + if (!resolved) { + dbgPrint("neload: UNRESOLVED import %s.%s in seg %u at 0x%04X\n", + (modIdx > 0 && modIdx <= mod->modRefCount) ? + mod->modRefNames[modIdx - 1] : "???", + funcName, segIdx + 1, rec.srcOffset); + targetSel = mod->segments[0].selector; + targetOff = 0; + resolved = true; + } + break; + } + + case NE_RELF_OSFIXUP: { + // OS fixup (floating point emulation, etc.) + // target1 = fixup type (1=FIARQQ, 2=FJARQQ, etc.) + // We just patch in NOPs or a far return. + targetSel = mod->segments[0].selector; + targetOff = 0; + resolved = true; + dbgPrint("neload: OS fixup type %u at seg %u offset 0x%04X\n", + rec.target1, segIdx + 1, rec.srcOffset); + break; + } + } + + if (!resolved) { + logErr("neload: failed to resolve reloc in seg %u at 0x%04X\n", + segIdx + 1, rec.srcOffset); + continue; + } + + // Apply the fixup to the segment data. + // NE relocations can be chained: the word at srcOffset contains + // the offset of the next fixup location (forming a linked list), + // UNLESS the relocation is additive. + uint32_t fixupOff = rec.srcOffset; + + if (additive) { + // Single fixup, add to existing value + switch (rec.srcType) { + case NE_RELOC_LOBYTE: + if (fixupOff < seg->size) { + segData[fixupOff] += (uint8_t)targetOff; + } + break; + case NE_RELOC_OFFSET: + if (fixupOff + 1 < seg->size) { + uint16_t *p = (uint16_t *)(segData + fixupOff); + *p += targetOff; + } + break; + case NE_RELOC_SEGMENT: + if (fixupOff + 1 < seg->size) { + uint16_t *p = (uint16_t *)(segData + fixupOff); + *p += targetSel; + } + break; + case NE_RELOC_FAR_ADDR: + if (fixupOff + 3 < seg->size) { + uint16_t *pOff = (uint16_t *)(segData + fixupOff); + uint16_t *pSeg = (uint16_t *)(segData + fixupOff + 2); + *pOff += targetOff; + *pSeg += targetSel; + } + break; + case NE_RELOC_OFFSET32: + if (fixupOff + 3 < seg->size) { + uint32_t *p = (uint32_t *)(segData + fixupOff); + *p += ((uint32_t)targetSel << 16) | targetOff; + } + break; + } + } else { + // Chained fixups: follow the linked list + int chainLimit = 4096; // Safety limit + int chainCount = 0; + while (fixupOff != 0xFFFF && chainLimit-- > 0) { + if (fixupOff + 1 >= seg->size) { + break; + } + + uint16_t nextOff; + chainCount++; + + switch (rec.srcType) { + case NE_RELOC_LOBYTE: + if (fixupOff < seg->size) { + nextOff = segData[fixupOff]; // Next in chain + segData[fixupOff] = (uint8_t)targetOff; + } else { + nextOff = 0xFFFF; + } + break; + + case NE_RELOC_OFFSET: { + uint16_t *p = (uint16_t *)(segData + fixupOff); + nextOff = *p; + *p = targetOff; + break; + } + + case NE_RELOC_SEGMENT: { + uint16_t *p = (uint16_t *)(segData + fixupOff); + nextOff = *p; + *p = targetSel; + break; + } + + case NE_RELOC_FAR_ADDR: { + if (fixupOff + 3 >= seg->size) { + nextOff = 0xFFFF; + break; + } + uint16_t *pOff = (uint16_t *)(segData + fixupOff); + uint16_t *pSeg = (uint16_t *)(segData + fixupOff + 2); + nextOff = *pOff; // Chain is in offset field + *pOff = targetOff; + *pSeg = targetSel; + break; + } + + case NE_RELOC_OFFSET32: { + if (fixupOff + 3 >= seg->size) { + nextOff = 0xFFFF; + break; + } + uint16_t *p16 = (uint16_t *)(segData + fixupOff); + nextOff = *p16; + uint32_t *p32 = (uint32_t *)(segData + fixupOff); + *p32 = ((uint32_t)targetSel << 16) | targetOff; + break; + } + + default: + dbgPrint("neload: unknown reloc srcType 0x%02X\n", rec.srcType); + nextOff = 0xFFFF; + break; + } + + fixupOff = nextOff; + } + + if (chainCount > 1) { + dbgPrint("neload: chain: %d links patched\n", chainCount); + } + } + } + + return true; +} + + +static void freeSegment(LoadedSegT *seg) +{ + if (seg->selector != 0) { + __dpmi_free_ldt_descriptor(seg->selector); + seg->selector = 0; + } + if (seg->linearAddr != 0) { + free((void *)seg->linearAddr); + seg->linearAddr = 0; + } + seg->size = 0; +} + + +bool neExtendSegment(NeModuleT *mod, int segIdx, uint32_t extraSize, uint32_t *oldSizeOut) +{ + if (segIdx < 0 || segIdx >= mod->segmentCount) { + return false; + } + + LoadedSegT *seg = &mod->segments[segIdx]; + uint32_t oldSize = seg->size; + uint32_t newSize = oldSize + extraSize; + + // 16-bit segments are limited to 64K + if (newSize > 0x10000) { + logErr("neload: cannot extend segment %d beyond 64K (old=%" PRIu32 " extra=%" PRIu32 ")\n", + segIdx + 1, oldSize, extraSize); + return false; + } + + uint8_t *newMem = (uint8_t *)realloc((void *)seg->linearAddr, newSize); + if (!newMem) { + logErr("neload: realloc failed extending segment %d\n", segIdx + 1); + return false; + } + + // Zero the new space + memset(newMem + oldSize, 0, extraSize); + + uint32_t newPtrVal = (uint32_t)newMem; + uint32_t newLinAddr = newPtrVal + __djgpp_base_address; + + seg->linearAddr = newPtrVal; + seg->size = newSize; + + // Update LDT descriptor base (may have moved) and limit + __dpmi_set_segment_base_address(seg->selector, newLinAddr); + __dpmi_set_segment_limit(seg->selector, newSize - 1); + + if (oldSizeOut) { + *oldSizeOut = oldSize; + } + + return true; +} + + +// Enable debug output for the NE loader +void neSetDebug(bool enable) +{ + gDebug = enable; +} diff --git a/win31drv/neload.h b/win31drv/neload.h new file mode 100644 index 0000000..bc93574 --- /dev/null +++ b/win31drv/neload.h @@ -0,0 +1,115 @@ +#ifndef NELOAD_H +#define NELOAD_H + +#include +#include +#include +#include "neformat.h" +#include "wintypes.h" + +// ============================================================================ +// Loaded segment descriptor +// ============================================================================ + +typedef struct { + uint32_t linearAddr; // Linear (physical) address of segment data + uint16_t selector; // DPMI selector for this segment + uint16_t flags; // Original NE segment flags + uint32_t size; // Actual size in memory (from minAllocSize or fileLength) + uint32_t fileSize; // Size of data in the file (0 if no file data) + bool isCode; // true = code segment, false = data segment +} LoadedSegT; + +// ============================================================================ +// Resolved export entry +// ============================================================================ + +typedef struct { + uint16_t segIndex; // 1-based segment index + uint16_t offset; // Offset within segment + uint16_t selector; // DPMI selector (resolved at load time) + uint8_t flags; // Entry flags +} ExportEntryT; + +// ============================================================================ +// Import resolution callback +// +// Called by the NE loader when it encounters an imported reference. +// The callback should return the far pointer (selector:offset) that the +// import should resolve to. Return FARPTR16_NULL if the import cannot +// be resolved (the loader will log a warning and patch in a stub). +// ============================================================================ + +typedef FarPtr16T (*ImportResolverT)(const char *moduleName, uint16_t ordinal, const char *funcName); + +// ============================================================================ +// Loaded NE module +// ============================================================================ + +#define NE_MAX_SEGMENTS 64 +#define NE_MAX_EXPORTS 2048 +#define NE_MAX_MODREFS 16 + +typedef struct { + // Header info + NeHeaderT neHeader; + uint32_t neHeaderFileOffset; // File offset of NE header + uint16_t sectorAlignShift; // Sector alignment + + // Module name (from resident name table) + char moduleName[64]; + + // Segments + uint16_t segmentCount; + LoadedSegT segments[NE_MAX_SEGMENTS]; + + // Exports (indexed by ordinal) + uint16_t exportCount; + ExportEntryT exports[NE_MAX_EXPORTS]; + + // Module references (imported module names) + uint16_t modRefCount; + char modRefNames[NE_MAX_MODREFS][32]; + + // Auto data segment selector (DGROUP) + uint16_t autoDataSel; + + // DOS memory block (for conventional memory allocations) + int dosMemSeg; // DOS memory segment (real mode) + int dosMemSel; // DOS memory selector (PM) + uint32_t dosMemSize; // Size in bytes + + // Is the module valid/loaded? + bool loaded; +} NeModuleT; + +// ============================================================================ +// NE loader functions +// ============================================================================ + +// Load a NE executable from a file. +// importResolver is called for each imported reference. +// Returns true on success. +bool neLoadModule(NeModuleT *mod, const char *filePath, ImportResolverT resolver); + +// Unload a previously loaded module, freeing all segments and selectors. +void neUnloadModule(NeModuleT *mod); + +// Look up an export by ordinal number. +// Returns true and fills seg/off/sel if found. +bool neLookupExport(const NeModuleT *mod, uint16_t ordinal, uint16_t *seg, uint16_t *off, uint16_t *sel); + +// Look up an export by name (searches the resident name table in the loaded file). +// Returns the ordinal if found, 0 if not found. +uint16_t neLookupExportByName(const NeModuleT *mod, const char *name, const char *filePath); + +// Extend a loaded segment by extraSize bytes. +// The new space is zeroed. *oldSizeOut receives the original size +// (i.e., the offset of the new area within the segment). +// Returns true on success. Fails if the result would exceed 64K. +bool neExtendSegment(NeModuleT *mod, int segIdx, uint32_t extraSize, uint32_t *oldSizeOut); + +// Debug: dump module information to stderr. +void neDumpModule(const NeModuleT *mod); + +#endif // NELOAD_H diff --git a/win31drv/thunk.c b/win31drv/thunk.c new file mode 100644 index 0000000..e0413a2 --- /dev/null +++ b/win31drv/thunk.c @@ -0,0 +1,1110 @@ +// ============================================================================ +// thunk.c - 32-bit to 16-bit protected mode thunking layer +// +// This module provides the mechanism for DJGPP 32-bit code to call into +// 16-bit Windows driver code. It uses DPMI to create 16-bit code, data, +// and stack segments, and installs a small relay thunk in the 16-bit code +// segment that handles the 32/16-bit transition. +// +// Architecture: +// The 32-bit caller writes parameters to a shared data area in DOS +// memory, writes configuration (stack and data segment selectors) to +// the relay's CS-relative data area, then does a far call (lcall) to +// the 16-bit relay. +// +// The relay code (running in 16-bit mode but with the caller's 32-bit +// SS still active, since lcall doesn't change SS) performs: +// 1. Saves DS and the 32-bit return address +// 2. Saves SS:ESP (32-bit values via operand-size prefixes) +// 3. Loads DS from its config area to point to the shared data +// 4. Switches SS:SP to a dedicated 16-bit stack +// 5. Pushes Pascal-convention parameters from DS onto the 16-bit stack +// 6. Far-calls the target driver function +// 7. Saves DX:AX return value +// 8. Restores SS:ESP to the caller's 32-bit stack +// 9. Restores DS to the caller's flat data segment +// 10. Pushes the 32-bit return address back onto the 32-bit stack +// 11. Does an operand-size-prefixed retf to return to 32-bit code +// +// Key insight: When the 32-bit lcall transfers to the 16-bit relay, +// SS is unchanged (same-privilege far call). DJGPP's SS has B=1 +// (32-bit stack), so ESP is used for all stack operations even in +// 16-bit code. This lets the relay safely save/restore the full +// 32-bit ESP before switching to the 16-bit driver stack. +// +// For 16-to-32 callbacks (Windows API stubs called by the driver), small +// 16-bit stubs use a software interrupt to transfer control to a 32-bit +// DPMI handler that dispatches to registered C callback functions. +// BX is saved/restored around the INT because the stub uses BX to pass +// the slot index, and the driver may depend on BX being preserved across +// the far call (as per Pascal calling convention: BX is not callee-saved, +// but the Windows KERNEL implementations happen to preserve it, and +// driver code like BBLT.ASM depends on this). +// ============================================================================ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "thunk.h" +#include "log.h" +#include + +// Forward declarations +static bool installRelayCode(ThunkContextT *ctx); +static uint16_t allocDescriptor16(uint32_t base, uint32_t limit, bool isCode); + +// ============================================================================ +// 16-bit relay thunk machine code +// +// This code runs in a 16-bit code segment (CS D=0). On entry, SS is the +// caller's 32-bit flat data segment (SS B=1), so stack operations use +// the full 32-bit ESP. +// +// DS-relative layout (ThunkDataT shared data, base = dataSegBase): +// [DS:0x00] = target function offset (WORD) +// [DS:0x02] = target function segment selector (WORD) +// [DS:0x04] = parameter count in 16-bit words (WORD) +// [DS:0x06+] = parameters (params[0] pushed first = leftmost param) +// +// CS-relative data area (at RELAY_DATA_START, written by 32-bit caller): +// Configuration (set before each call): +// stack16_ss - 16-bit stack segment selector +// stack16_sp - initial SP value (top of stack) +// ds16 - data segment selector for ThunkDataT +// Scratch (used by relay during execution): +// saved_eip_{lo,hi}, saved_cs_{lo,hi} - 32-bit return address +// saved_ss, saved_esp - caller's SS:ESP +// saved_ds - caller's DS +// retval_ax, retval_dx - driver return value +// ============================================================================ + +#define RELAY_DATA_START 0xC0 + +// Scratch area (used during relay execution) +#define RELAY_SAVED_EIP_LO (RELAY_DATA_START + 0) // 0xC0 +#define RELAY_SAVED_EIP_HI (RELAY_DATA_START + 2) // 0xC2 +#define RELAY_SAVED_CS_LO (RELAY_DATA_START + 4) // 0xC4 +#define RELAY_SAVED_CS_HI (RELAY_DATA_START + 6) // 0xC6 +#define RELAY_SAVED_SS (RELAY_DATA_START + 8) // 0xC8 +#define RELAY_SAVED_ESP (RELAY_DATA_START + 10) // 0xCA (4 bytes) +#define RELAY_SAVED_DS (RELAY_DATA_START + 14) // 0xCE + +// Per-call configuration (written by 32-bit caller before lcall) +#define RELAY_STACK16_SS (RELAY_DATA_START + 16) // 0xD0 +#define RELAY_STACK16_SP (RELAY_DATA_START + 18) // 0xD2 +#define RELAY_DS16 (RELAY_DATA_START + 20) // 0xD4 +#define RELAY_DS_DGROUP (RELAY_DATA_START + 22) // 0xD6 +#define RELAY_TARGET_ADDR (RELAY_DATA_START + 24) // 0xD8 (4 bytes: off + seg) + +// Return value storage +#define RELAY_RETVAL_AX (RELAY_DATA_START + 28) // 0xDC +#define RELAY_RETVAL_DX (RELAY_DATA_START + 30) // 0xDE + +// Writable data alias selector (code segments are read-only in PM; +// this selector has the same base but is a writable data segment). +// Set once by installRelayCode. +#define RELAY_SCRATCH_SEL (RELAY_DATA_START + 32) // 0xE0 +#define RELAY_SAVED_EBP (RELAY_DATA_START + 34) // 0xE2 (4 bytes) + +#define RELAY_DATA_SIZE 38 // Bytes from RELAY_DATA_START + +// Hand-assembled 16-bit relay thunk. +// Each instruction is annotated with its offset and encoding. +// +// IMPORTANT: x86 code segments are read-only in protected mode. All writes +// to the scratch data area use ES, which is loaded at entry with a writable +// data alias selector (RELAY_SCRATCH_SEL) that has the same base as CS. +// Reads can use either CS or ES since both have the same base address. +// +// Segment override prefixes: CS=0x2E, ES=0x26. Operand size prefix: 0x66. +// +// Register encoding in ModR/M (reg field): +// AX/EAX=000, CX/ECX=001, DX/EDX=010, BX/EBX=011 +// SP/ESP=100, BP/EBP=101, SI/ESI=110, DI/EDI=111 +// Segment register encoding (reg field in 8C/8E): +// ES=000, CS=001, SS=010, DS=011, FS=100, GS=101 +// Addressing mode [disp16]: mod=00, r/m=110 + +static const uint8_t kRelayCode[] = { + // ---- Load ES with writable data alias of code segment ---- + // 0x00: mov es, [cs:SCRATCH_SEL] + // 8E /0 [disp16] with CS override + // ModR/M: mod=00, reg=000(ES), r/m=110([disp16]) = 0x06 + 0x2E, 0x8E, 0x06, + (RELAY_SCRATCH_SEL & 0xFF), (RELAY_SCRATCH_SEL >> 8), // 5 bytes + + // ---- Save caller's DS (write via ES) ---- + // 0x05: mov [es:SAVED_DS], ds + 0x26, 0x8C, 0x1E, + (RELAY_SAVED_DS & 0xFF), (RELAY_SAVED_DS >> 8), // 5 bytes + + // ---- Pop 32-bit return address (4 x 16-bit pops, write via ES) ---- + // The 32-bit lcall pushed 4+4=8 bytes (EIP then CS, each 32-bit). + // With SS B=1, pop uses ESP and reads 16-bit values. + + // 0x0A: pop word [es:SAVED_EIP_LO] + 0x26, 0x8F, 0x06, + (RELAY_SAVED_EIP_LO & 0xFF), (RELAY_SAVED_EIP_LO >> 8), // 5 bytes + + // 0x0F: pop word [es:SAVED_EIP_HI] + 0x26, 0x8F, 0x06, + (RELAY_SAVED_EIP_HI & 0xFF), (RELAY_SAVED_EIP_HI >> 8), // 5 bytes + + // 0x14: pop word [es:SAVED_CS_LO] + 0x26, 0x8F, 0x06, + (RELAY_SAVED_CS_LO & 0xFF), (RELAY_SAVED_CS_LO >> 8), // 5 bytes + + // 0x19: pop word [es:SAVED_CS_HI] + 0x26, 0x8F, 0x06, + (RELAY_SAVED_CS_HI & 0xFF), (RELAY_SAVED_CS_HI >> 8), // 5 bytes + + // ---- Save caller's SS:ESP (write via ES) ---- + + // 0x1E: mov [es:SAVED_SS], ss + 0x26, 0x8C, 0x16, + (RELAY_SAVED_SS & 0xFF), (RELAY_SAVED_SS >> 8), // 5 bytes + + // 0x23: o32 mov [es:SAVED_ESP], esp + // 66 prefix, ES override, 89 /4 [disp16] + // ModR/M: mod=00, reg=100(ESP), r/m=110([disp16]) = 0x26 + 0x66, 0x26, 0x89, 0x26, + (RELAY_SAVED_ESP & 0xFF), (RELAY_SAVED_ESP >> 8), // 6 bytes + + // ---- Save caller's 32-bit EBP (write via ES) ---- + // The XOR block below zeroes EBP for safe 16-bit execution, but the + // 32-bit caller uses EBP as its frame pointer. Must save/restore it. + + // 0x29: o32 mov [es:SAVED_EBP], ebp + // ModR/M: mod=00, reg=101(EBP), r/m=110([disp16]) = 0x2E + 0x66, 0x26, 0x89, 0x2E, + (RELAY_SAVED_EBP & 0xFF), (RELAY_SAVED_EBP >> 8), // 6 bytes + + // ---- Load DS with 16-bit data segment (read from CS, OK) ---- + + // 0x2F: mov ds, [cs:DS16] + 0x2E, 0x8E, 0x1E, + (RELAY_DS16 & 0xFF), (RELAY_DS16 >> 8), // 5 bytes + + // ---- Switch to 16-bit stack (reads from CS, OK) ---- + + // 0x34: cli + 0xFA, // 1 byte + + // 0x35: mov ss, [cs:STACK16_SS] + 0x2E, 0x8E, 0x16, + (RELAY_STACK16_SS & 0xFF), (RELAY_STACK16_SS >> 8), // 5 bytes + + // 0x3A: o32 xor esp, esp + // Zero upper 16 bits of ESP. CWSDPMI uses 32-bit interrupt gates, + // so the CPU uses full ESP when pushing interrupt frames. Without + // this, stale upper bits from the 32-bit stack cause corruption. + 0x66, 0x31, 0xE4, // 3 bytes + + // 0x3D: mov sp, [cs:STACK16_SP] + 0x2E, 0x8B, 0x26, + (RELAY_STACK16_SP & 0xFF), (RELAY_STACK16_SP >> 8), // 5 bytes + + // 0x42: sti (re-enable interrupts now that stack switch is complete) + 0xFB, // 1 byte + + // ---- Push parameters from DS onto 16-bit stack ---- + // CX = param count, BX = byte offset into params array. + // Push params[0] first (leftmost, goes deepest = Pascal convention). + + // 0x43: mov cx, [ds:0x0004] + 0x8B, 0x0E, 0x04, 0x00, // 4 bytes + + // 0x47: xor bx, bx + 0x31, 0xDB, // 2 bytes + + // 0x49: test cx, cx + 0x85, 0xC9, // 2 bytes + + // 0x4B: jz +9 -> 0x56 (skip to DGROUP load if no params) + // IP after jz = 0x4D, target = 0x4D + 9 = 0x56 + 0x74, 0x09, // 2 bytes + + // 0x4D: push word [bx+0x0006] + // FF /6 [BX+disp16] + // ModR/M: mod=10, reg=110(/6=push), r/m=111(BX) = 0xB7 + 0xFF, 0xB7, 0x06, 0x00, // 4 bytes + + // 0x51: add bx, 2 + 0x83, 0xC3, 0x02, // 3 bytes + + // 0x54: loop -> 0x49 + // IP after loop = 0x56, relative = 0x49 - 0x56 = -13 = 0xF3 + 0xE2, 0xF3, // 2 bytes + + // ---- Load driver's DGROUP into DS and ES, then far call via CS config ---- + // The target address was written to CS:RELAY_TARGET_ADDR by the 32-bit + // caller. We load DS=DGROUP so the driver runs with its own data segment. + // ES is also set to DGROUP since many Win3.x drivers assume ES=DS on entry. + + // 0x56: mov ds, [cs:DS_DGROUP] + 0x2E, 0x8E, 0x1E, + (RELAY_DS_DGROUP & 0xFF), (RELAY_DS_DGROUP >> 8), // 5 bytes + + // 0x5B: push ds + 0x1E, // 1 byte + + // 0x5C: pop es (ES = DS = DGROUP) + 0x07, // 1 byte + + // ---- Zero upper 16 bits of all GP registers ---- + // DJGPP 32-bit code leaves stale values in the upper halves. + // 16-bit driver code using 67h prefix (32-bit addressing) would + // pick up these stale bits, causing accesses outside segment limits. + // EBP was saved earlier (at 0x29) so the 32-bit caller can recover it. + + // 0x5D: o32 xor eax, eax + 0x66, 0x31, 0xC0, // 3 bytes + // 0x60: o32 xor ebx, ebx + 0x66, 0x31, 0xDB, // 3 bytes + // 0x63: o32 xor ecx, ecx + 0x66, 0x31, 0xC9, // 3 bytes + // 0x66: o32 xor edx, edx + 0x66, 0x31, 0xD2, // 3 bytes + // 0x69: o32 xor ebp, ebp + 0x66, 0x31, 0xED, // 3 bytes + // 0x6C: o32 xor esi, esi + 0x66, 0x31, 0xF6, // 3 bytes + // 0x6F: o32 xor edi, edi + 0x66, 0x31, 0xFF, // 3 bytes + + // ---- Far call to driver function via CS config ---- + + // 0x72: call far [cs:TARGET_ADDR] + // CS override (2E), FF /3 [disp16] + // ModR/M: mod=00, reg=011(/3=call far), r/m=110([disp16]) = 0x1E + 0x2E, 0xFF, 0x1E, + (RELAY_TARGET_ADDR & 0xFF), (RELAY_TARGET_ADDR >> 8), // 5 bytes + + // ---- Reload ES (driver may have clobbered it) ---- + + // 0x77: mov es, [cs:SCRATCH_SEL] + 0x2E, 0x8E, 0x06, + (RELAY_SCRATCH_SEL & 0xFF), (RELAY_SCRATCH_SEL >> 8), // 5 bytes + + // ---- Save return value (DX:AX) via ES ---- + + // 0x7C: mov [es:RETVAL_AX], ax + 0x26, 0xA3, + (RELAY_RETVAL_AX & 0xFF), (RELAY_RETVAL_AX >> 8), // 4 bytes + + // 0x80: mov [es:RETVAL_DX], dx + 0x26, 0x89, 0x16, + (RELAY_RETVAL_DX & 0xFF), (RELAY_RETVAL_DX >> 8), // 5 bytes + + // ---- Restore caller's 32-bit EBP (read from CS, OK) ---- + + // 0x85: o32 mov ebp, [cs:SAVED_EBP] + // ModR/M: mod=00, reg=101(EBP), r/m=110([disp16]) = 0x2E + 0x66, 0x2E, 0x8B, 0x2E, + (RELAY_SAVED_EBP & 0xFF), (RELAY_SAVED_EBP >> 8), // 6 bytes + + // ---- Restore caller's SS:ESP (reads from CS, OK) ---- + + // 0x8B: cli + 0xFA, // 1 byte + + // 0x8C: mov ss, [cs:SAVED_SS] + 0x2E, 0x8E, 0x16, + (RELAY_SAVED_SS & 0xFF), (RELAY_SAVED_SS >> 8), // 5 bytes + + // 0x91: o32 mov esp, [cs:SAVED_ESP] + 0x66, 0x2E, 0x8B, 0x26, + (RELAY_SAVED_ESP & 0xFF), (RELAY_SAVED_ESP >> 8), // 6 bytes + + // 0x97: sti (re-enable interrupts now that stack restore is complete) + 0xFB, // 1 byte + + // ---- Restore caller's DS (read from CS, OK) ---- + + // 0x98: mov ds, [cs:SAVED_DS] + 0x2E, 0x8E, 0x1E, + (RELAY_SAVED_DS & 0xFF), (RELAY_SAVED_DS >> 8), // 5 bytes + + // ---- Push 32-bit return address back onto 32-bit stack ---- + // Order: CS_HI deepest, EIP_LO on top, so o32 retf reads them correctly. + + // 0x9D: push word [cs:SAVED_CS_HI] + 0x2E, 0xFF, 0x36, + (RELAY_SAVED_CS_HI & 0xFF), (RELAY_SAVED_CS_HI >> 8), // 5 bytes + + // 0xA2: push word [cs:SAVED_CS_LO] + 0x2E, 0xFF, 0x36, + (RELAY_SAVED_CS_LO & 0xFF), (RELAY_SAVED_CS_LO >> 8), // 5 bytes + + // 0xA7: push word [cs:SAVED_EIP_HI] + 0x2E, 0xFF, 0x36, + (RELAY_SAVED_EIP_HI & 0xFF), (RELAY_SAVED_EIP_HI >> 8), // 5 bytes + + // 0xAC: push word [cs:SAVED_EIP_LO] + 0x2E, 0xFF, 0x36, + (RELAY_SAVED_EIP_LO & 0xFF), (RELAY_SAVED_EIP_LO >> 8), // 5 bytes + + // ---- Restore return value (reads from CS, OK) ---- + + // 0xB1: mov ax, [cs:RETVAL_AX] + 0x2E, 0xA1, + (RELAY_RETVAL_AX & 0xFF), (RELAY_RETVAL_AX >> 8), // 4 bytes + + // 0xB5: mov dx, [cs:RETVAL_DX] + 0x2E, 0x8B, 0x16, + (RELAY_RETVAL_DX & 0xFF), (RELAY_RETVAL_DX >> 8), // 5 bytes + + // ---- 32-bit far return ---- + + // 0xBA: o32 retf + 0x66, 0xCB, // 2 bytes + + // Code ends at 0xBC. Padding to RELAY_DATA_START (0xC0). + 0x90, 0x90, 0x90, 0x90, // 4 NOP + + // ---- Data area (at offset 0xC0) ---- + // Scratch: saved_eip(4), saved_cs(4), saved_ss(2), saved_esp(4), + // saved_ds(2) + // Per-call: stack16_ss(2), stack16_sp(2), ds16(2), ds_dgroup(2), + // target_addr(4) + // Return: retval_ax(2), retval_dx(2) + // Once: scratch_sel(2) + // Caller: saved_ebp(4) + // Total: 38 bytes + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +}; + +#define RELAY_CODE_SIZE sizeof(kRelayCode) + +// ============================================================================ +// 16-to-32 callback mechanism +// +// For Windows API stubs that the driver calls, we create small 16-bit code +// snippets that use a software interrupt (INT 0x66, chosen to avoid +// conflicts) to transfer control to a 32-bit DPMI handler. The handler +// looks up the callback by its slot index (passed in BX) and invokes the +// registered C function. +// +// Each callback stub looks like: +// push bx ; 53 (1 byte) - save BX +// mov bx, ; BB xx xx (3 bytes) +// int 0x66 ; CD 66 (2 bytes) +// pop bx ; 5B (1 byte) - restore BX +// retf ; CA xx xx (3 bytes) - Pascal callee cleanup +// Total: 10 bytes per stub +// ============================================================================ + +#define CALLBACK_STUB_SIZE 10 +#define CALLBACK_INT_NUM 0x66 + +// Callback registry +static ThunkCallbackT gCallbacks[THUNK_MAX_CALLBACKS]; +static uint16_t gCallbackParamWords[THUNK_MAX_CALLBACKS]; +static uint16_t gCallbackCount = 0; +static ThunkContextT *gCallbackCtx = NULL; + +// DPMI interrupt handler for callback dispatching +static __dpmi_paddr gOldCbVec; +static volatile bool gHandlerInstalled = false; +static bool gThunkDebug = false; + +// Diagnostic: monitor 3 bytes at gDiagWatchSel:gDiagWatchOff for corruption. +// Set gDiagWatchSel nonzero to enable. Logs when bytes change. +static uint16_t gDiagWatchSel = 0; +static uint32_t gDiagWatchOff = 0; +static uint8_t gDiagWatchBytes[3] = {0}; + +// Shared area for passing parameters from the interrupt handler +static uint16_t gCbParams[THUNK_MAX_PARAMS]; +static uint32_t gCbRetVal; + +// Saved register frame for the raw callback handler (same layout as INT 10h). +typedef struct __attribute__((packed)) { + uint32_t edi; // +0 + uint32_t esi; // +4 + uint32_t ebp; // +8 + uint32_t _reserved; // +12 + uint32_t ebx; // +16 + uint32_t edx; // +20 + uint32_t ecx; // +24 + uint32_t eax; // +28 + uint32_t es; // +32 + uint32_t ds; // +36 + uint32_t eip; // +40 + uint32_t cs; // +44 + uint32_t eflags; // +48 +} CbFrameT; + +// Globals for the raw callback handler assembly stub. +// Non-static so the asm symbols are accessible. +uint16_t gCbDsSel; +uint32_t gCbSavedSS; +uint32_t gCbSavedESP; +uint32_t gCbSavedGS; +uint32_t gCbSavedFS; +uint32_t gCbDgroupSel; +CbFrameT gCbFrame; +uint8_t gCbStack[16384] __attribute__((aligned(16))); +uint32_t gCbStackTop; + +// Worker function called from the assembly stub. +// Diagnostic: set by cbIntWorker to prove it was called +volatile uint32_t gCbWorkerCalled = 0; +volatile uint32_t gCbWorkerLastSS = 0; +volatile uint32_t gCbWorkerLastESP = 0; +volatile uint32_t gCbWorkerLastSlot = 0xDEAD; + +void cbIntWorker(CbFrameT *frame) +{ + gCbWorkerCalled++; + gCbWorkerLastSS = gCbSavedSS; + gCbWorkerLastESP = gCbSavedESP; + gCbWorkerLastSlot = (uint16_t)frame->ebx; + + uint16_t slot = (uint16_t)frame->ebx; + + if (slot >= gCallbackCount || !gCallbacks[slot]) { + frame->eax = (frame->eax & 0xFFFF0000); + frame->edx = (frame->edx & 0xFFFF0000); + return; + } + + // The driver far-called our stub, then the stub did push bx; INT 0x66. + // CWSDPMI pushed an IRET frame on the interrupted stack before + // dispatching to us. The driver's parameters are above the IRET + // frame, saved BX, and far return address on the interrupted stack. + // + // Stack layout from savedESP upward: + // [+0..11] = IRET frame (EIP, CS, EFLAGS - 32-bit, 12 bytes) + // [+12..13] = saved BX (from push bx in callback stub) + // [+14..15] = return IP (from driver's far call to stub) + // [+16..17] = return CS + // [+18..] = parameters (rightmost/last in Pascal at lowest addr) + + uint16_t paramWords = gCallbackParamWords[slot]; + uint16_t origSS = (uint16_t)gCbSavedSS; + uint32_t origESP = gCbSavedESP; + + if (paramWords > 0 && paramWords <= THUNK_MAX_PARAMS) { + uint32_t paramOffset = origESP + 18; + movedata(origSS, paramOffset, + _my_ds(), (unsigned)gCbParams, + paramWords * 2); + + // Reverse so gCbParams[0] = leftmost param (Pascal declaration order). + for (uint16_t i = 0; i < paramWords / 2; i++) { + uint16_t tmp = gCbParams[i]; + gCbParams[i] = gCbParams[paramWords - 1 - i]; + gCbParams[paramWords - 1 - i] = tmp; + } + } + + // Read return address (IP:CS) from the 16-bit stack above the IRET frame + // (+12 = saved BX, +14 = retIP, +16 = retCS) + uint16_t retIP = 0; + uint16_t retCS = 0; + movedata(origSS, origESP + 14, _my_ds(), (unsigned)&retIP, 2); + movedata(origSS, origESP + 16, _my_ds(), (unsigned)&retCS, 2); + + // Calculate driver's SP after retf N cleanup: + // IRET(12) + saved_bx(2) + retaddr(4) + params(N*2) + uint32_t driverSP = origESP + 18 + paramWords * 2; + + if (gThunkDebug) { + logErr("CB[%u] %u words:", slot, paramWords); + for (uint16_t i = 0; i < paramWords && i < 6; i++) { + logErr(" %04X", gCbParams[i]); + } + logErr(" ret=%04X:%04X SP=%04" PRIX32 " BP=%04X ESP32=%08" PRIX32 "\n", + retCS, retIP, driverSP, (uint16_t)frame->ebp, origESP); + fflush(stderr); + } + + // Diagnostic: check BEFORE callback dispatch + if (gDiagWatchSel != 0) { + uint8_t b0 = _farpeekb(gDiagWatchSel, gDiagWatchOff); + if (b0 != gDiagWatchBytes[0]) { + logErr("WATCH-PRE: %04X:%08" PRIX32 " changed %02X->%02X before CB[%u]\n", + gDiagWatchSel, gDiagWatchOff, + gDiagWatchBytes[0], b0, slot); + gDiagWatchBytes[0] = b0; + } + } + + gCbRetVal = gCallbacks[slot](gCbParams, paramWords); + fflush(stderr); + + // Set return value in DX:AX + frame->eax = (frame->eax & 0xFFFF0000) | (gCbRetVal & 0xFFFF); + frame->edx = (frame->edx & 0xFFFF0000) | (gCbRetVal >> 16); + + // Diagnostic: check AFTER callback dispatch + if (gDiagWatchSel != 0) { + uint8_t b0 = _farpeekb(gDiagWatchSel, gDiagWatchOff); + uint8_t b1 = _farpeekb(gDiagWatchSel, gDiagWatchOff + 1); + uint8_t b2 = _farpeekb(gDiagWatchSel, gDiagWatchOff + 2); + if (b0 != gDiagWatchBytes[0] || b1 != gDiagWatchBytes[1] || b2 != gDiagWatchBytes[2]) { + logErr("WATCH-POST: %04X:%08" PRIX32 " changed %02X %02X %02X->%02X %02X %02X after CB[%u]\n", + gDiagWatchSel, gDiagWatchOff, + gDiagWatchBytes[0], gDiagWatchBytes[1], gDiagWatchBytes[2], + b0, b1, b2, slot); + gDiagWatchBytes[0] = b0; + gDiagWatchBytes[1] = b1; + gDiagWatchBytes[2] = b2; + } + } +} + +// Defined in the file-scope asm block below +extern void cbIntRawHandler(void); + +// Raw callback interrupt handler. +// +// Key insight: in x86 protected mode, code segments are NOT writable. +// We use FS (loaded with our DS selector) for all writes. CS-relative +// reads are fine (readable code segment). +__asm__( + " .text\n" + " .p2align 4\n" + " .globl _cbIntRawHandler\n" + "_cbIntRawHandler:\n" + + // Save original FS (may be DGROUP in driver context), then load + // FS with our writable DS selector so we can access C globals. + " pushl %eax\n" + " pushl %ecx\n" + " xorl %eax, %eax\n" + " movw %fs, %ax\n" + " movw %cs:_gCbDsSel, %cx\n" + " movw %cx, %fs\n" + " movl %eax, %fs:_gCbSavedFS\n" + + // Diagnostic: increment gCbWorkerCalled via FS to prove entry + " movl %fs:_gCbWorkerCalled, %eax\n" + " incl %eax\n" + " movl %eax, %fs:_gCbWorkerCalled\n" + + " popl %ecx\n" + " popl %eax\n" + + // Save GP registers via FS (writable data segment) + " movl %eax, %fs:_gCbFrame+28\n" + " movl %ecx, %fs:_gCbFrame+24\n" + " movl %edx, %fs:_gCbFrame+20\n" + " movl %ebx, %fs:_gCbFrame+16\n" + " movl %ebp, %fs:_gCbFrame+8\n" + " movl %esi, %fs:_gCbFrame+4\n" + " movl %edi, %fs:_gCbFrame+0\n" + + // Save segment registers (ES, DS, and GS) + " xorl %eax, %eax\n" + " movw %es, %ax\n" + " movl %eax, %fs:_gCbFrame+32\n" + " movw %ds, %ax\n" + " movl %eax, %fs:_gCbFrame+36\n" + " movw %gs, %ax\n" + " movl %eax, %fs:_gCbSavedGS\n" + + // Save IRET frame from interrupted stack + " movl (%esp), %eax\n" + " movl %eax, %fs:_gCbFrame+40\n" + " movl 4(%esp), %eax\n" + " movl %eax, %fs:_gCbFrame+44\n" + " movl 8(%esp), %eax\n" + " movl %eax, %fs:_gCbFrame+48\n" + + // Save interrupted SS:ESP + " movl %esp, %fs:_gCbSavedESP\n" + " xorl %eax, %eax\n" + " movw %ss, %ax\n" + " movl %eax, %fs:_gCbSavedSS\n" + + // Switch to our handler stack (DS/ES/SS = our DS, ESP = handler stack) + " movw %fs:_gCbDsSel, %ax\n" + " movw %ax, %ds\n" + " movw %ax, %es\n" + " movw %ax, %ss\n" + " movl _gCbStackTop, %esp\n" + + // Call C worker + " leal _gCbFrame, %eax\n" + " pushl %eax\n" + " call _cbIntWorker\n" + " addl $4, %esp\n" + + // Restore interrupted SS:ESP (reads via CS are allowed) + " movl %cs:_gCbSavedESP, %eax\n" + " movl %cs:_gCbSavedSS, %ecx\n" + " movw %cx, %ss\n" + " movl %eax, %esp\n" + + // ---- EFLAGS writeback SKIPPED (writes to original stack may fault) ---- + // cbIntWorker does not modify EFLAGS, so this is safe to skip. + + // Restore GP registers (reads via CS) + " movl %cs:_gCbFrame+0, %edi\n" + " movl %cs:_gCbFrame+4, %esi\n" + " movl %cs:_gCbFrame+8, %ebp\n" + " movl %cs:_gCbFrame+16, %ebx\n" + " movl %cs:_gCbFrame+20, %edx\n" + " movl %cs:_gCbFrame+24, %ecx\n" + + // Restore segment registers (FS, GS, ES, DS) + // Always set FS and GS to DGROUP (not saved values) because the + // DPMI host may modify FS/GS when dispatching interrupts. + " movl %cs:_gCbDgroupSel, %eax\n" + " movw %ax, %fs\n" + " movw %ax, %gs\n" + " movl %cs:_gCbFrame+32, %eax\n" + " movw %ax, %es\n" + " movl %cs:_gCbFrame+36, %eax\n" + " movw %ax, %ds\n" + + // Restore EAX last + " movl %cs:_gCbFrame+28, %eax\n" + + " iret\n" +); + + +// ============================================================================ +// Public functions +// ============================================================================ + +void thunkSanitizeCbFrame(uint16_t freedSel) +{ + if ((uint16_t)gCbFrame.es == freedSel) { + gCbFrame.es = 0; + } + if ((uint16_t)gCbFrame.ds == freedSel) { + gCbFrame.ds = 0; + } +} + + +void thunkSetDebug(bool debug) +{ + gThunkDebug = debug; +} + + +void thunkSetWatch(uint16_t sel, uint32_t off) +{ + gDiagWatchSel = sel; + gDiagWatchOff = off; + if (sel != 0) { + gDiagWatchBytes[0] = _farpeekb(sel, off); + gDiagWatchBytes[1] = _farpeekb(sel, off + 1); + gDiagWatchBytes[2] = _farpeekb(sel, off + 2); + logErr("WATCH: set %04X:%08" PRIX32 " = %02X %02X %02X\n", + sel, off, gDiagWatchBytes[0], gDiagWatchBytes[1], gDiagWatchBytes[2]); + } +} + + +bool thunkInit(ThunkContextT *ctx) +{ + memset(ctx, 0, sizeof(ThunkContextT)); + + // Allocate conventional (DOS) memory for all 16-bit segments. + // + // Layout in DOS memory block: + // Offset 0x0000: Relay code (256 bytes, includes CS-relative data) + // Offset 0x0100: Callback stubs (THUNK_MAX_CALLBACKS * 10 = 1280 bytes) + // Offset 0x0600: Shared data area / ThunkDataT (256 bytes) + // Offset 0x0700: 16-bit stack (8192 bytes) + // Offset 0x2700: (end) + // + // Total: 0x2700 = 9984 bytes = 624 paragraphs + + uint32_t relayOff = 0x0000; + uint32_t callbackOff = 0x0100; + uint32_t dataOff = 0x0600; + uint32_t stackOff = 0x0700; + uint32_t totalSize = 0x2700; + uint16_t paragraphs = (totalSize + 15) / 16; + + int dosSel; + int dosSeg = __dpmi_allocate_dos_memory(paragraphs, &dosSel); + if (dosSeg < 0) { + logErr("thunk: failed to allocate %" PRIu32 " bytes of DOS memory\n", totalSize); + return false; + } + + ctx->dosMemSeg = dosSeg; + ctx->dosMemSel = dosSel; + ctx->dosMemSize = totalSize; + + uint32_t dosBase = (uint32_t)dosSeg * 16; + + logErr("thunk: DOS mem at 0x%05" PRIX32 "-0x%05" PRIX32, + dosBase, dosBase + totalSize - 1); + if (0x8134 >= dosBase && 0x8134 < dosBase + totalSize) { + logErr(" ** 0x8134 INSIDE thunk block at offset 0x%04" PRIX32 " **", + (uint32_t)(0x8134 - dosBase)); + } + logErr("\n"); + + // Zero the entire area + { + uint8_t zeroBuf[256]; + memset(zeroBuf, 0, sizeof(zeroBuf)); + for (uint32_t off = 0; off < totalSize; off += 256) { + uint32_t chunk = totalSize - off; + if (chunk > 256) { + chunk = 256; + } + dosmemput(zeroBuf, chunk, dosBase + off); + } + } + + // Create 16-bit code segment descriptor for relay + callbacks + ctx->relayCodeBase = dosBase + relayOff; + ctx->relayCodeSize = callbackOff + THUNK_MAX_CALLBACKS * CALLBACK_STUB_SIZE; + ctx->relayCodeSel = allocDescriptor16(ctx->relayCodeBase, + ctx->relayCodeSize - 1, true); + if (ctx->relayCodeSel == 0) { + logErr("thunk: failed to create relay code segment\n"); + goto fail; + } + + // Create 16-bit data segment descriptor for shared data (ThunkDataT) + ctx->dataSegBase = dosBase + dataOff; + ctx->dataSegSize = 256; + ctx->dataSegSel = allocDescriptor16(ctx->dataSegBase, + ctx->dataSegSize - 1, false); + if (ctx->dataSegSel == 0) { + logErr("thunk: failed to create data segment\n"); + goto fail; + } + + // Create 16-bit stack segment descriptor + ctx->stackBase = dosBase + stackOff; + ctx->stackSize = totalSize - stackOff; + ctx->stackSel = allocDescriptor16(ctx->stackBase, + ctx->stackSize - 1, false); + if (ctx->stackSel == 0) { + logErr("thunk: failed to create stack segment\n"); + goto fail; + } + + // Install the relay code into the code segment area + if (!installRelayCode(ctx)) { + goto fail; + } + + // Install the interrupt handler for 16-to-32 callbacks + gCallbackCtx = ctx; + gCallbackCount = 0; + memset(gCallbacks, 0, sizeof(gCallbacks)); + + gCbDsSel = _my_ds(); + gCbStackTop = (uint32_t)gCbStack + sizeof(gCbStack); + + __dpmi_get_protected_mode_interrupt_vector(CALLBACK_INT_NUM, &gOldCbVec); + + __dpmi_paddr newVec; + newVec.offset32 = (unsigned long)cbIntRawHandler; + newVec.selector = _my_cs(); + if (__dpmi_set_protected_mode_interrupt_vector(CALLBACK_INT_NUM, &newVec) != 0) { + logErr("thunk: failed to install callback interrupt handler\n"); + goto fail; + } + gHandlerInstalled = true; + + ctx->initialized = true; + return true; + +fail: + thunkShutdown(ctx); + return false; +} + + +void thunkShutdown(ThunkContextT *ctx) +{ + // Restore interrupt handler + if (gHandlerInstalled) { + __dpmi_set_protected_mode_interrupt_vector(CALLBACK_INT_NUM, &gOldCbVec); + gHandlerInstalled = false; + } + + // Free descriptors + if (ctx->relayCodeSel) { + __dpmi_free_ldt_descriptor(ctx->relayCodeSel); + ctx->relayCodeSel = 0; + } + if (ctx->dataSegSel) { + __dpmi_free_ldt_descriptor(ctx->dataSegSel); + ctx->dataSegSel = 0; + } + if (ctx->stackSel) { + __dpmi_free_ldt_descriptor(ctx->stackSel); + ctx->stackSel = 0; + } + + // Free DOS memory + if (ctx->dosMemSel) { + __dpmi_free_dos_memory(ctx->dosMemSel); + ctx->dosMemSeg = 0; + ctx->dosMemSel = 0; + ctx->dosMemSize = 0; + } + + gCallbackCtx = NULL; + gCallbackCount = 0; + ctx->initialized = false; +} + + +uint32_t thunkCall16(ThunkContextT *ctx, uint16_t targetSel, uint16_t targetOff, + const uint16_t *params, uint16_t paramCount) +{ + if (!ctx->initialized) { + logErr("thunk: not initialized\n"); + return 0; + } + if (paramCount > THUNK_MAX_PARAMS) { + logErr("thunk: too many parameters (%u)\n", paramCount); + return 0; + } + + // Build the ThunkDataT in the shared data segment (DOS memory) + ThunkDataT td; + td.targetOff = targetOff; + td.targetSeg = targetSel; + td.paramCount = paramCount; + if (paramCount > 0) { + memcpy(td.params, params, paramCount * 2); + } + dosmemput(&td, 6 + paramCount * 2, ctx->dataSegBase); + + // Write relay configuration to the CS-relative data area. + // The relay reads stack/DS/DGROUP/target config from here. + struct __attribute__((packed)) { + uint16_t stack16Ss; + uint16_t stack16Sp; + uint16_t ds16; + uint16_t dgroupSel; + uint16_t targetOff; + uint16_t targetSeg; + } relayConfig; + // Windows 3.x drivers assume SS == DS == DGROUP. Some drivers + // (VBESVGA BBLT.ASM) do PrestoChangeoSelector(SS, WorkSelector) to + // create a code alias of DGROUP, then retf into compiled blit code + // stored in the data segment. If SS != DGROUP, the code alias has + // the wrong base and the CPU executes garbage, corrupting memory. + // When DGROUP is available, use it as SS with SP near the top of the + // 64K segment (stack grows downward). + if (ctx->dgroupSel) { + relayConfig.stack16Ss = ctx->dgroupSel; + relayConfig.stack16Sp = 0xFFF0; // Top of 64K DGROUP, 16-byte aligned + } else { + relayConfig.stack16Ss = ctx->stackSel; + relayConfig.stack16Sp = ctx->stackSize; + } + relayConfig.ds16 = ctx->dataSegSel; + relayConfig.dgroupSel = ctx->dgroupSel ? ctx->dgroupSel : ctx->dataSegSel; + relayConfig.targetOff = targetOff; + relayConfig.targetSeg = targetSel; + dosmemput(&relayConfig, sizeof(relayConfig), + ctx->relayCodeBase + RELAY_STACK16_SS); + + // Build the 48-bit far pointer for lcall: 32-bit offset + 16-bit selector. + // Relay entry point is at offset 0 in the code segment. + struct __attribute__((packed)) { + uint32_t offset; + uint16_t selector; + } farTarget; + farTarget.offset = 0; + farTarget.selector = ctx->relayCodeSel; + + // Far-call to the 16-bit relay. The relay handles everything: + // DS/SS switching, parameter pushing, calling the driver, and returning. + // The 32-bit side just does the lcall and collects the result. + uint32_t result; + + // The S3 driver uses GS and FS segment overrides (0x65/0x64 prefixes) + // to access DGROUP data. The relay sets DS and ES to DGROUP but not + // GS or FS, so we must pre-load both with the DGROUP selector. + // Also store it for the callback handler to restore on exit. + uint16_t dgroupSel = relayConfig.dgroupSel; + gCbDgroupSel = dgroupSel; + + // Diagnostic: check watched byte before lcall + if (gDiagWatchSel != 0) { + uint8_t b = _farpeekb(gDiagWatchSel, gDiagWatchOff); + if (b != gDiagWatchBytes[0]) { + logErr("WATCH-LCALL-PRE: %02X->%02X target=%04X:%04X\n", + gDiagWatchBytes[0], b, targetSel, targetOff); + gDiagWatchBytes[0] = b; + } + } + + __asm__ volatile ( + // Save ES, GS, and FS + "push %%es\n\t" + "push %%gs\n\t" + "push %%fs\n\t" + + // Set GS and FS = DGROUP + "movw %[dgroup], %%gs\n\t" + "movw %[dgroup], %%fs\n\t" + + "lcall *%[farTarget]\n\t" + + // Combine DX:AX into EAX + "shll $16, %%edx\n\t" + "movzwl %%ax, %%eax\n\t" + "orl %%edx, %%eax\n\t" + + // Restore FS, GS, and ES + "pop %%fs\n\t" + "pop %%gs\n\t" + "pop %%es\n\t" + + : "=a" (result) + : [farTarget] "m" (farTarget), + [dgroup] "r" (dgroupSel) + : "ebx", "ecx", "edx", "esi", "edi", "memory", "cc" + ); + + // Diagnostic: check watched byte after lcall returns + if (gDiagWatchSel != 0) { + uint8_t b = _farpeekb(gDiagWatchSel, gDiagWatchOff); + if (b != gDiagWatchBytes[0]) { + logErr("WATCH-LCALL-POST: %02X->%02X target=%04X:%04X\n", + gDiagWatchBytes[0], b, targetSel, targetOff); + gDiagWatchBytes[0] = b; + } + } + + return result; +} + + +uint32_t thunkCall16v(ThunkContextT *ctx, uint16_t targetSel, uint16_t targetOff, + uint16_t paramCount, ...) +{ + uint16_t params[THUNK_MAX_PARAMS]; + va_list ap; + + va_start(ap, paramCount); + for (uint16_t i = 0; i < paramCount && i < THUNK_MAX_PARAMS; i++) { + params[i] = (uint16_t)va_arg(ap, unsigned int); + } + va_end(ap); + + return thunkCall16(ctx, targetSel, targetOff, params, paramCount); +} + + +bool thunkRegisterCallback(ThunkContextT *ctx, ThunkCallbackT callback, + uint16_t paramWords, FarPtr16T *result) +{ + if (gCallbackCount >= THUNK_MAX_CALLBACKS) { + logErr("thunk: callback table full\n"); + return false; + } + + uint16_t slot = gCallbackCount; + gCallbacks[slot] = callback; + gCallbackParamWords[slot] = paramWords; + gCallbackCount++; + + // Build the 16-bit stub code: + // 53 push bx (1 byte, save caller's BX) + // BB xx xx mov bx, slot (3 bytes) + // CD 66 int CALLBACK_INT_NUM (2 bytes) + // 5B pop bx (1 byte, restore caller's BX) + // CA xx xx retf param_bytes (3 bytes, Pascal callee cleanup) + // Total: 10 bytes + + uint16_t paramBytes = paramWords * 2; + + uint8_t stub[CALLBACK_STUB_SIZE]; + stub[0] = 0x53; // push bx + stub[1] = 0xBB; // mov bx, imm16 + stub[2] = (uint8_t)(slot & 0xFF); + stub[3] = (uint8_t)(slot >> 8); + stub[4] = 0xCD; // int imm8 + stub[5] = CALLBACK_INT_NUM; + stub[6] = 0x5B; // pop bx + stub[7] = 0xCA; // retf imm16 + stub[8] = (uint8_t)(paramBytes & 0xFF); + stub[9] = (uint8_t)(paramBytes >> 8); + + // Write the stub into the callback area (offset 0x0100 in code segment) + uint32_t stubOffset = 0x0100 + slot * CALLBACK_STUB_SIZE; + uint32_t stubAddr = ctx->relayCodeBase + stubOffset; + dosmemput(stub, CALLBACK_STUB_SIZE, stubAddr); + + // Return the 16-bit far pointer to this stub + result->segment = ctx->relayCodeSel; + result->offset = (uint16_t)stubOffset; + + return true; +} + + +// ============================================================================ +// Internal helpers +// ============================================================================ + +static bool installRelayCode(ThunkContextT *ctx) +{ + // Write the hand-assembled relay code into the code segment in DOS memory. + // We use the DOS memory PM selector for writing (the code segment is + // read-only by its descriptor, but the underlying memory is the same). + dosmemput(kRelayCode, RELAY_CODE_SIZE, ctx->relayCodeBase); + + // Write the DOS memory PM selector into RELAY_SCRATCH_SEL so the relay + // can load it into ES for writable access to the data area. + uint16_t scratchSel = (uint16_t)ctx->dosMemSel; + dosmemput(&scratchSel, 2, ctx->relayCodeBase + RELAY_SCRATCH_SEL); + + return true; +} + + +static uint16_t allocDescriptor16(uint32_t base, uint32_t limit, bool isCode) +{ + int sel = __dpmi_allocate_ldt_descriptors(1); + if (sel < 0) { + return 0; + } + + if (__dpmi_set_segment_base_address(sel, base) < 0) { + __dpmi_free_ldt_descriptor(sel); + return 0; + } + + if (__dpmi_set_segment_limit(sel, limit) < 0) { + __dpmi_free_ldt_descriptor(sel); + return 0; + } + + // Access rights for 16-bit segments: + // Code (readable, non-conforming): byte5=0xFA, byte6=0x00 + // Data (writable): byte5=0xF2, byte6=0x00 + // byte5: P=1, DPL=3, S=1, Type=1010(code) or 0010(data) + // byte6: G=0, D=0(16-bit), 0, AVL=0, limit_hi=0 + uint16_t rights = isCode ? 0x00FA : 0x00F2; + if (__dpmi_set_descriptor_access_rights(sel, rights) < 0) { + __dpmi_free_ldt_descriptor(sel); + return 0; + } + + return (uint16_t)sel; +} diff --git a/win31drv/thunk.h b/win31drv/thunk.h new file mode 100644 index 0000000..f6c4131 --- /dev/null +++ b/win31drv/thunk.h @@ -0,0 +1,135 @@ +#ifndef THUNK_H +#define THUNK_H + +#include +#include +#include "wintypes.h" + +// ============================================================================ +// 32-to-16 bit thunking layer +// +// Provides a mechanism for 32-bit DJGPP code to call into 16-bit Windows +// driver code running in 16-bit protected mode segments. +// +// Architecture: +// - A small 16-bit relay thunk is placed in a 16-bit code segment +// - Parameters are pre-built on a dedicated 16-bit stack +// - The relay handles 32/16-bit return address translation +// - Driver functions use Pascal calling convention (callee cleans stack) +// +// The relay thunk code: +// 1. Pops the 32-bit return address (8 bytes: 32-bit CS + 32-bit EIP) +// 2. Pushes parameters from the shared data area onto the 16-bit stack +// 3. Does a 16-bit far call to the target driver function +// 4. Saves the return value (DX:AX) +// 5. Pushes the 32-bit return address back +// 6. Does a 32-bit far return (o32 retf) +// ============================================================================ + +// ============================================================================ +// Thunk shared data area (in 16-bit addressable memory) +// +// The 32-bit side writes target address and parameters here before +// calling the relay. The relay reads from here. +// ============================================================================ + +#define THUNK_MAX_PARAMS 32 // Max 16-bit words of parameters per call + +typedef struct __attribute__((packed)) { + uint16_t targetOff; // 0x00: target function offset + uint16_t targetSeg; // 0x02: target function segment (selector) + uint16_t paramCount; // 0x04: number of 16-bit parameter words + uint16_t params[THUNK_MAX_PARAMS]; // 0x06: parameter words +} ThunkDataT; + +// ============================================================================ +// Thunk context (initialized once, used for all calls) +// ============================================================================ + +typedef struct { + // 16-bit relay code segment + uint16_t relayCodeSel; // Selector for relay code segment + uint32_t relayCodeBase; // Linear base address of relay code + uint16_t relayCodeSize; // Size of relay code + + // 16-bit data segment (for ThunkDataT) + uint16_t dataSegSel; // Selector for shared data segment + uint32_t dataSegBase; // Linear base address of data segment + uint16_t dataSegSize; // Size of data segment + + // 16-bit stack segment + uint16_t stackSel; // Selector for 16-bit stack + uint32_t stackBase; // Linear base address of stack + uint16_t stackSize; // Size of stack + + // DOS memory for all 16-bit segments (single allocation) + int dosMemSeg; // Real-mode segment + int dosMemSel; // PM selector from DOS alloc + uint32_t dosMemSize; // Total bytes allocated + + // Driver's auto data segment (DGROUP) selector. + // Set this before calling thunkCall16 so the relay loads DS correctly. + uint16_t dgroupSel; + + bool initialized; +} ThunkContextT; + +// ============================================================================ +// Thunk layer functions +// ============================================================================ + +// Initialize the thunking infrastructure. +// Allocates DOS memory, creates 16-bit segments, installs relay code. +bool thunkInit(ThunkContextT *ctx); + +// Enable or disable verbose callback tracing. +void thunkSetDebug(bool debug); + +// Set a watchpoint on 3 bytes at sel:off. Logs any changes during callbacks. +void thunkSetWatch(uint16_t sel, uint32_t off); + +// Shut down the thunking infrastructure and free resources. +void thunkShutdown(ThunkContextT *ctx); + +// Call a 16-bit function via the thunk. +// targetSel:targetOff - far address of the 16-bit function +// params - array of 16-bit parameter words in Pascal order: +// params[0] = leftmost parameter (pushed first, deepest) +// params[N-1] = rightmost parameter (pushed last, top) +// paramCount - number of 16-bit words in params +// +// Returns DX:AX combined as a uint32_t (AX in low 16, DX in high 16). +// For functions returning WORD, just use the low 16 bits. +uint32_t thunkCall16(ThunkContextT *ctx, uint16_t targetSel, uint16_t targetOff, const uint16_t *params, uint16_t paramCount); + +// Convenience: call with individual parameters (up to 12 words). +uint32_t thunkCall16v(ThunkContextT *ctx, uint16_t targetSel, uint16_t targetOff, uint16_t paramCount, ...); + +// ============================================================================ +// 16-bit stub generation +// +// For Windows API stubs that the driver calls back into, we need 16-bit +// entry points that thunk UP to 32-bit code. +// ============================================================================ + +// Callback function type for 16-to-32 callbacks. +// Receives the parameters as an array of 16-bit words. +// Returns DX:AX as uint32_t. +typedef uint32_t (*ThunkCallbackT)(uint16_t *params, uint16_t paramCount); + +// Maximum number of registered callbacks +#define THUNK_MAX_CALLBACKS 128 + +// Register a callback and get a 16-bit far pointer (sel:off) that, +// when called from 16-bit code, will invoke the callback in 32-bit. +// paramWords = number of 16-bit parameter words the function expects +// (used by the stub to clean the stack with retf N). +// Returns true on success. +bool thunkRegisterCallback(ThunkContextT *ctx, ThunkCallbackT callback, uint16_t paramWords, FarPtr16T *result); + +// Sanitize the callback frame after freeing a selector. +// If the saved ES or DS in the callback interrupt frame matches freedSel, +// zero it out so the IRET doesn't try to load a freed descriptor. +void thunkSanitizeCbFrame(uint16_t freedSel); + +#endif // THUNK_H diff --git a/win31drv/winddi.h b/win31drv/winddi.h new file mode 100644 index 0000000..3ab2fea --- /dev/null +++ b/win31drv/winddi.h @@ -0,0 +1,260 @@ +#ifndef WINDDI_H +#define WINDDI_H + +#include "wintypes.h" + +// ============================================================================ +// GDIINFO - Device capabilities structure filled by Enable() +// This is the 16-bit Windows 3.1 DDK GDIINFO structure. +// ============================================================================ + +typedef struct __attribute__((packed)) { + int16_t dpVersion; // 0x00: driver version (0x030A for 3.10) + int16_t dpTechnology; // 0x02: device technology + int16_t dpHorzSize; // 0x04: horizontal size in mm + int16_t dpVertSize; // 0x06: vertical size in mm + int16_t dpHorzRes; // 0x08: horizontal resolution (pixels) + int16_t dpVertRes; // 0x0A: vertical resolution (pixels) + int16_t dpBitsPixel; // 0x0C: bits per pixel + int16_t dpPlanes; // 0x0E: number of bit planes + int16_t dpNumBrushes; // 0x10: number of device brushes + int16_t dpNumPens; // 0x12: number of device pens + int16_t dpNumFonts; // 0x14: number of device fonts + int16_t dpNumColors; // 0x16: number of colors in color table + int16_t dpDEVICEsize; // 0x18: size of PDEVICE structure + uint16_t dpCurves; // 0x1A: curve capabilities + uint16_t dpLines; // 0x1C: line capabilities + uint16_t dpPolygonals; // 0x1E: polygon capabilities + uint16_t dpText; // 0x20: text capabilities + uint16_t dpClip; // 0x22: clipping capabilities + uint16_t dpRaster; // 0x24: raster capabilities + int16_t dpAspectX; // 0x26: x aspect ratio + int16_t dpAspectY; // 0x28: y aspect ratio + int16_t dpAspectXY; // 0x2A: diagonal aspect ratio + int16_t dpStyleLen; // 0x2C: length of styled line segment + Point16T dpMLoWin; // 0x2E: metric lo-res window + Point16T dpMLoVpt; // 0x32: metric lo-res viewport + Point16T dpMHiWin; // 0x36: metric hi-res window + Point16T dpMHiVpt; // 0x3A: metric hi-res viewport + Point16T dpELoWin; // 0x3E: english lo-res window + Point16T dpELoVpt; // 0x42: english lo-res viewport + Point16T dpEHiWin; // 0x46: english hi-res window + Point16T dpEHiVpt; // 0x4A: english hi-res viewport + Point16T dpTwpWin; // 0x4E: twips window + Point16T dpTwpVpt; // 0x52: twips viewport + int16_t dpLogPixelsX; // 0x56: logical pixels per inch X + int16_t dpLogPixelsY; // 0x58: logical pixels per inch Y + int16_t dpDCManage; // 0x5A: DC management flags + uint16_t reserved1[5]; // 0x5C: reserved + uint16_t dpPalColors; // 0x66: number of palette colors + uint16_t dpPalReserved; // 0x68: number of reserved palette entries + uint16_t dpPalResolution; // 0x6A: palette DAC resolution (bits per gun) +} GdiInfo16T; + +// dpTechnology values +#define DT_PLOTTER 0 +#define DT_RASDISPLAY 1 +#define DT_RASPRINTER 2 +#define DT_RASCAMERA 3 +#define DT_CHARSTREAM 4 +#define DT_METAFILE 5 +#define DT_DISPFILE 6 + +// dpRaster capability bits +#define RC_BITBLT 0x0001 +#define RC_BANDING 0x0002 +#define RC_SCALING 0x0004 +#define RC_BITMAP64 0x0008 +#define RC_GDI20_OUTPUT 0x0010 +#define RC_DI_BITMAP 0x0080 +#define RC_PALETTE 0x0100 +#define RC_DIBTODEV 0x0200 +#define RC_BIGFONT 0x0400 +#define RC_STRETCHBLT 0x0800 +#define RC_FLOODFILL 0x1000 +#define RC_STRETCHDIB 0x2000 + +// ============================================================================ +// PDEVICE - Physical device descriptor +// The first word indicates the type. The rest is driver-specific. +// We allocate a generous buffer for the driver to fill in. +// ============================================================================ + +#define PDEVICE_MAX_SIZE 4096 + +typedef struct __attribute__((packed)) { + int16_t pdType; // 0 = memory bitmap, nonzero = physical device + uint8_t pdData[PDEVICE_MAX_SIZE - 2]; // driver-specific data +} PDevice16T; + +// ============================================================================ +// DRAWMODE - Drawing mode structure (passed to BitBlt, Output, etc.) +// ============================================================================ + +typedef struct __attribute__((packed)) { + int16_t rop2; // 0x00: raster operation (R2_*) + int16_t bkMode; // 0x02: background mode (TRANSPARENT=1, OPAQUE=2) + uint32_t bkColor; // 0x04: background color (physical) + uint32_t textColor; // 0x08: text color (physical) + int16_t tBreakExtra; // 0x0C: total break extra + int16_t breakExtra; // 0x0E: break extra per char + int16_t breakErr; // 0x10: accumulated break error + int16_t breakRem; // 0x12: break remainder + int16_t breakCount; // 0x14: break count + int16_t charExtra; // 0x16: extra pixels per char + uint32_t lbkColor; // 0x18: logical background color + uint32_t ltextColor; // 0x1C: logical text color + uint16_t icrBk; // 0x20: index to background color + uint16_t icrText; // 0x22: index to text color +} DrawMode16T; + +// Background mode constants +#define BM_TRANSPARENT 1 +#define BM_OPAQUE 2 + +// ============================================================================ +// Logical brush (for RealizeObject) +// ============================================================================ + +typedef struct __attribute__((packed)) { + uint16_t lbStyle; // Brush style + uint32_t lbColor; // Brush color (COLORREF) + int16_t lbHatch; // Hatch pattern + uint32_t lbBkColor; // Background color (Win 3.1) +} LogBrush16T; + +// Brush styles +#define BS_SOLID 0 +#define BS_HOLLOW 1 +#define BS_NULL 1 +#define BS_HATCHED 2 +#define BS_PATTERN 3 +#define BS_DIBPATTERN 5 + +// Hatch styles +#define HS_HORIZONTAL 0 +#define HS_VERTICAL 1 +#define HS_FDIAGONAL 2 +#define HS_BDIAGONAL 3 +#define HS_CROSS 4 +#define HS_DIAGCROSS 5 + +// ============================================================================ +// Logical pen (for RealizeObject) +// ============================================================================ + +typedef struct __attribute__((packed)) { + uint16_t lopnStyle; // Pen style + Point16T lopnWidth; // Pen width + uint32_t lopnColor; // Pen color (COLORREF) +} LogPen16T; + +// Pen styles +#define PS_SOLID 0 +#define PS_DASH 1 +#define PS_DOT 2 +#define PS_DASHDOT 3 +#define PS_DASHDOTDOT 4 +#define PS_NULL 5 +#define PS_INSIDEFRAME 6 + +// ============================================================================ +// CURSORINFO - Cursor shape description +// ============================================================================ + +typedef struct __attribute__((packed)) { + int16_t csHotX; // Hotspot X + int16_t csHotY; // Hotspot Y + int16_t csWidth; // Cursor width + int16_t csHeight; // Cursor height + int16_t csWidthB; // Width in bytes + int16_t csColor; // Planes * bitsPixel +} CursorInfo16T; + +// ============================================================================ +// Enable() style parameter values +// ============================================================================ + +#define ENABLE_INQUIRE 0 // First call: fill GDIINFO +#define ENABLE_ENABLE 1 // Second call: initialize PDEVICE + +// ============================================================================ +// Output() style values +// ============================================================================ + +#define OS_ARC 3 +#define OS_SCANLINES 4 +#define OS_RECTANGLE 6 +#define OS_ELLIPSE 7 +#define OS_MARKER 8 +#define OS_POLYLINE 18 +#define OS_ALTPOLYGON 22 +#define OS_WINDPOLYGON 20 +#define OS_PIE 23 +#define OS_POLYMARKER 24 +#define OS_CHORD 39 +#define OS_CIRCLE 55 +#define OS_ROUNDRECT 72 + +// ============================================================================ +// Control() function codes +// ============================================================================ + +#define CTRL_GETSCALINGFACTOR 14 +#define CTRL_RESETDEVICE 128 +#define CTRL_MOUSETRAILS 39 + +// ============================================================================ +// RealizeObject() styles +// ============================================================================ + +#define OBJ_PEN 1 +#define OBJ_BRUSH 2 +#define OBJ_FONT 3 + +// ============================================================================ +// Physical brush/pen structures (driver-specific, maximum size) +// ============================================================================ + +#define PHYS_OBJ_MAX_SIZE 128 + +typedef struct __attribute__((packed)) { + uint8_t data[PHYS_OBJ_MAX_SIZE]; +} PhysObj16T; + +// ============================================================================ +// DIBENGINE structures (for drivers that use the DIB engine) +// ============================================================================ + +// DIB_BitmapInfo passed to DIB engine functions +typedef struct __attribute__((packed)) { + int16_t bmType; + int16_t bmWidth; + int16_t bmHeight; + int16_t bmWidthBytes; + uint8_t bmPlanes; + uint8_t bmBitsPixel; + uint32_t bmBits; // Far pointer to bits (as DWORD) + uint32_t bmWidthPlanes; + uint32_t bmBitsLong; // Selector:0 far pointer + uint16_t bmSegmentIndex; + uint16_t bmScanSegment; + uint16_t bmFillBytes; + uint16_t reserved1; + uint16_t reserved2; +} DibBitmapInfo16T; + +// DIB engine PDEVICE extension (placed at start of PDEVICE by DIB-based drivers) +typedef struct __attribute__((packed)) { + int16_t deType; // Device type + uint16_t deWidth; // Width in pixels + uint16_t deHeight; // Height in pixels + uint16_t deWidthBytes; // Bytes per scan line + uint8_t dePlanes; // Number of planes + uint8_t deBitsPixel; // Bits per pixel + uint32_t delpPDevice; // Pointer to next PDEVICE + uint32_t dlpColorTable; // Pointer to color table + // ... additional fields follow +} DibPDevice16T; + +#endif // WINDDI_H diff --git a/win31drv/windrv.c b/win31drv/windrv.c new file mode 100644 index 0000000..c6aadf3 --- /dev/null +++ b/win31drv/windrv.c @@ -0,0 +1,3301 @@ +// ============================================================================ +// windrv.c - Main driver interface +// +// Implements the public windrv.h API by coordinating the NE loader, +// thunking layer, and Windows API stubs to load and use Windows 3.x +// display drivers from DOS programs compiled with DJGPP. +// ============================================================================ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "windrv.h" +#include "wintypes.h" +#include "winddi.h" +#include "neformat.h" +#include "neload.h" +#include "thunk.h" +#include "winstub.h" +#include "log.h" + +// ============================================================================ +// Driver instance structure (opaque handle) +// ============================================================================ + +struct WdrvDriverS { + NeModuleT neMod; + char filePath[256]; + + // DDI entry point addresses (16-bit selector:offset) + struct { + uint16_t sel; + uint16_t off; + bool present; + } ddiEntry[DDI_MAX_ORDINAL]; + + // Device info from Enable (style=0) call + GdiInfo16T gdiInfo; + bool gdiInfoValid; + + // GDI objects embedded within DGROUP. + // Windows 3.x drivers expect all GDI objects (PDEVICE, brush, + // drawMode) to share the same segment, because in Win3.1 they + // are all in the global GDI heap. When the driver does e.g. + // "lds si, lpBrush" it expects DS to still cover DGROUP. + // We achieve this by allocating objects at offsets within the + // DGROUP segment, so every far pointer uses autoDataSel. + uint32_t dgroupObjBase; // Start offset of object area in DGROUP + + // Physical device structure (within DGROUP) + uint16_t pdevOff; // Offset within DGROUP + uint32_t pdevLinear; // Linear address for C access + uint32_t pdevSize; // Allocated size + + // Logical brush (within DGROUP, input to RealizeObject) + uint16_t logBrushOff; + uint32_t logBrushLinear; + + // Physical brush (within DGROUP, output of RealizeObject) + uint16_t brushOff; + uint32_t brushLinear; + uint32_t brushRealizedColor; // Color of last realized brush + bool brushRealized; + + // Logical pen (within DGROUP, input to RealizeObject) + uint16_t logPenOff; + uint32_t logPenLinear; + + // Physical pen (within DGROUP, output of RealizeObject) + uint16_t penOff; + uint32_t penLinear; + uint32_t penRealizedColor; + bool penRealized; + + // Physical color (within DGROUP, output of ColorInfo) + uint16_t physColorOff; + uint32_t physColorLinear; + + // Draw mode (within DGROUP) + uint16_t drawModeOff; + uint32_t drawModeLinear; + + // Current state + bool enabled; + uint32_t currentColor; + + // Video RAM mapping + void *vramPtr; + uint32_t vramPhysAddr; + uint32_t vramSize; + uint32_t vramLinear; + int32_t pitch; + + // Display Y offset: the S3 driver writes an 8x8 color brush pattern + // to a fixed VRAM location (~(144,1)-(151,8)) during dithered fills. + // We shift the CRTC display start down by this many scanlines so the + // scratch area is off-screen, and add the offset to all Y coordinates. + int16_t dispYOffset; + bool isS3; +}; + +// ============================================================================ +// Global state +// ============================================================================ + +static ThunkContextT gThunkCtx; +static StubContextT gStubCtx; +static bool gInitialized = false; +static int32_t gLastError = WDRV_OK; +static bool gDebug = false; +static bool gIsS3 = false; + + +// Forward declarations +static FarPtr16T importResolver(const char *moduleName, uint16_t ordinal, const char *funcName); +static bool resolveDriverEntries(struct WdrvDriverS *drv); +static bool extendDgroupForObjects(struct WdrvDriverS *drv); +static bool allocPDevice(struct WdrvDriverS *drv); +static bool allocDrawMode(struct WdrvDriverS *drv); +static bool allocBrushBuffers(struct WdrvDriverS *drv); +static bool allocPenBuffers(struct WdrvDriverS *drv); +static bool realizeBrush(struct WdrvDriverS *drv, uint32_t color); +static bool realizePen(struct WdrvDriverS *drv, uint32_t color); +static uint32_t colorToPhys(struct WdrvDriverS *drv, uint32_t colorRef); +static void setDisplayStart(struct WdrvDriverS *drv, uint32_t byteOffset); + +static void freeDrawObjects(struct WdrvDriverS *drv); +static uint16_t alloc16BitBlock(uint32_t size, uint32_t *linearOut); +static void free16BitBlock(uint16_t sel, uint32_t linear); +static void setError(int32_t err); +static void waitForEngine(void); + +static void dbg(const char *fmt, ...); +static void patchPrologs(NeModuleT *mod); +static void patchVflatdStackBug(NeModuleT *mod); +static void patchVflatdBypassCall(NeModuleT *mod); +static bool installInt10hReflector(void); +static void removeInt10hReflector(void); +static bool installDpmi300Proxy(void); +static void removeDpmi300Proxy(void); +static bool patchDoInt10h(struct WdrvDriverS *drv); +static bool patchBiosDataAccess(struct WdrvDriverS *drv); +static void patchWinFlags(struct WdrvDriverS *drv, uint16_t oldFlags, uint16_t newFlags); +static bool installInt2FhHandler(void); +static void removeInt2FhHandler(void); +static bool installExceptionCapture(void); +static void removeExceptionCapture(void); + +// ============================================================================ +// INT 10h (Video BIOS) reflector +// +// Win 3.x display drivers call INT 10h for video mode setting and BIOS +// queries. In protected mode, these calls won't reach the real-mode BIOS +// unless we intercept them and use DPMI to simulate a real-mode interrupt. +// ============================================================================ + +static __dpmi_paddr gOldInt10hVec; +static bool gInt10hInstalled = false; + +// Globals for the raw INT 10h handler assembly stub. +// Non-static so the asm symbols (prefixed with _) are accessible. +uint16_t gInt10hDsSel; // DJGPP DS selector +uint32_t gInt10hSavedSS; // Interrupted SS +uint32_t gInt10hSavedESP; // Interrupted ESP +uint32_t gInt10hSavedFS; // Interrupted FS +uint8_t gInt10hStack[4096] __attribute__((aligned(16))); // Handler stack +uint32_t gInt10hStackTop; // Top of handler stack + +// Diagnostic: count INT 10h handler entries +volatile uint32_t gInt10hEntryCount = 0; +volatile uint32_t gInt10hLastSS = 0; +volatile uint32_t gInt10hLastESP = 0; + +// ============================================================================ +// Exception capture - captures primary fault CS:EIP before DJGPP's handler +// (which may itself crash handling exceptions from 16-bit code). +// +// DPMI 0.9 exception frame on the locked exception stack: +// ESP+0x00: Return EIP (to DPMI host, for RETF) +// ESP+0x04: Return CS +// ESP+0x08: Error code +// ESP+0x0C: Faulting EIP +// ESP+0x10: Faulting CS +// ESP+0x14: Faulting EFLAGS +// ESP+0x18: Faulting ESP +// ESP+0x1C: Faulting SS +// ============================================================================ + +volatile uint32_t gFaultCaptured = 0; +volatile uint32_t gFaultNum = 0; +volatile uint32_t gFaultErr = 0; +volatile uint32_t gFaultEIP = 0; +volatile uint32_t gFaultCS = 0; +volatile uint32_t gFaultESP = 0; +volatile uint32_t gFaultSS = 0; +volatile uint32_t gFaultEAX = 0; +volatile uint32_t gFaultEBX = 0; +volatile uint32_t gFaultECX = 0; +volatile uint32_t gFaultEDX = 0; +volatile uint32_t gFaultESI = 0; +volatile uint32_t gFaultEDI = 0; +volatile uint32_t gFaultEBP = 0; +volatile uint32_t gFaultDS = 0; +volatile uint32_t gFaultES = 0; +uint8_t gFaultStack[4096] __attribute__((aligned(16))); +uint32_t gFaultStackTop; + +// Packed 48-bit far pointers for chaining to old exception handlers. +// ljmp indirect reads offset32 + selector16 (6 bytes), so no padding allowed. +typedef struct __attribute__((packed)) { + uint32_t offset; + uint16_t selector; +} FarPtr48T; + +FarPtr48T gOldExc0dFar; +FarPtr48T gOldExc0eFar; + +static __dpmi_paddr gOldExc0D; +static __dpmi_paddr gOldExc0E; +static bool gExcCaptureInstalled = false; + +static __dpmi_paddr gOldInt2FhVec; +FarPtr48T gOldInt2FhFar; +static bool gInt2FhInstalled = false; + +// Saved register state for the raw INT 10h handler. +// Layout matches the save/restore sequence in the assembly stub. +typedef struct __attribute__((packed)) { + uint32_t edi; // +0 + uint32_t esi; // +4 + uint32_t ebp; // +8 + uint32_t _reserved; // +12 (alignment padding) + uint32_t ebx; // +16 + uint32_t edx; // +20 + uint32_t ecx; // +24 + uint32_t eax; // +28 + uint32_t es; // +32 (zero-extended from 16-bit) + uint32_t ds; // +36 (zero-extended from 16-bit) + uint32_t eip; // +40 (from IRET frame) + uint32_t cs; // +44 (from IRET frame) + uint32_t eflags; // +48 (from IRET frame) +} Int10FrameT; + +// Non-static so the asm symbol _gInt10Frame is accessible. +Int10FrameT gInt10Frame; + +// Worker function called from the assembly stub. +// Non-static so the asm symbol _int10hWorker is accessible. +void int10hWorker(Int10FrameT *frame) +{ + __dpmi_regs rRegs; + memset(&rRegs, 0, sizeof(rRegs)); + + uint16_t func = (uint16_t)frame->eax; + + // NOTE: No dbg()/logErr() here — file I/O from the INT 10h handler corrupts + // callback state (observed: GlobalDOSAlloc params garbled after 4F15h stub). + + rRegs.x.ax = (uint16_t)frame->eax; + rRegs.x.bx = (uint16_t)frame->ebx; + rRegs.x.cx = (uint16_t)frame->ecx; + rRegs.x.dx = (uint16_t)frame->edx; + rRegs.x.si = (uint16_t)frame->esi; + rRegs.x.di = (uint16_t)frame->edi; + rRegs.x.bp = (uint16_t)frame->ebp; + + // VBE Set Mode: translate S3 OEM modes to VESA standard modes. + if (func == 0x4F02) { + uint16_t origBX = rRegs.x.bx; + uint16_t modeNum = origBX & 0x3FFF; + uint16_t flags = origBX & 0xC000; + + uint16_t vesaMode = modeNum; + switch (modeNum) { + case 0x0201: vesaMode = 0x0101; break; // 640x480x256 + case 0x0202: vesaMode = 0x0103; break; // 800x600x256 + case 0x0203: vesaMode = 0x0103; break; // 800x600x256 + case 0x0204: vesaMode = 0x0105; break; // 1024x768x256 + case 0x0205: vesaMode = 0x0105; break; // 1024x768x256 + } + + if (vesaMode != modeNum) { + rRegs.x.bx = flags | vesaMode; + logErr("INT10: VBE mode 0x%04X -> 0x%04X (S3 OEM -> VESA)\n", + origBX, rRegs.x.bx); + } + } + + // Stub out VBE functions we cannot support. + if (func == 0x4F0A || func == 0x4F15) { + frame->eax = (frame->eax & 0xFFFF0000) | 0x0100; + return; + } + + // ================================================================ + // Translate ES for real-mode reflection. + // + // The driver's ES is a PM selector. Real-mode INT 10h expects a + // real-mode paragraph segment. If ES points to conventional memory + // (<1MB), compute the real-mode segment directly. If ES points to + // extended memory (>=1MB), bounce through the DOS transfer buffer. + // + // Only specific sub-functions use ES as a buffer pointer. Each + // function family uses a different offset register: + // VBE 4Fxx: ES:DI + // AH=10h: ES:DX (palette) + // AH=11h: ES:BP (font data) + // AH=1Bh: ES:DI (state info) + // ================================================================ + uint16_t pmES = (uint16_t)frame->es; + bool useTB = false; + uint32_t tb = 0; + uint32_t copySize = 0; + bool copyIn = false; // PM -> transfer buffer before INT + bool copyOut = false; // transfer buffer -> PM after INT + + // Identify which offset register this function uses, and determine + // the exact copy size and direction. pmOff holds the PM-side + // offset from the appropriate register; offReg identifies which + // real-mode register to update after translation. + // 0 = DI, 1 = DX, 2 = BP + uint16_t pmOff = 0; + int offReg = 0; + bool needsES = false; + + uint8_t ah = (uint8_t)(func >> 8); + uint8_t al = (uint8_t)(func & 0xFF); + + if ((func & 0xFF00) == 0x4F00) { + // VBE functions — ES:DI + offReg = 0; + pmOff = rRegs.x.di; + if (al == 0x00) { + needsES = true; copyIn = true; copyOut = true; copySize = 512; + } else if (al == 0x01) { + needsES = true; copyOut = true; copySize = 256; + } else if (al == 0x04) { + needsES = true; copyIn = true; copyOut = true; copySize = 1024; + } else if (al == 0x09) { + needsES = true; copyIn = true; + copySize = rRegs.x.cx * 4; + if (copySize > 4096) { + copySize = 4096; + } + } + } else if (ah == 0x10) { + // Palette functions — ES:DX + offReg = 1; + pmOff = rRegs.x.dx; + if (al == 0x02) { + // Set All Palette Registers: 17 bytes (16 regs + overscan) + needsES = true; copyIn = true; copySize = 17; + } else if (al == 0x09) { + // Read All Palette Registers: 17 bytes + needsES = true; copyOut = true; copySize = 17; + } else if (al == 0x12) { + // Set Block of DAC Color Registers: CX * 3 bytes + needsES = true; copyIn = true; + copySize = rRegs.x.cx * 3; + if (copySize > 4096) { + copySize = 4096; + } + } else if (al == 0x17) { + // Read Block of DAC Color Registers: CX * 3 bytes + needsES = true; copyOut = true; + copySize = rRegs.x.cx * 3; + if (copySize > 4096) { + copySize = 4096; + } + } + } else if (ah == 0x11) { + // Character generator — ES:BP + offReg = 2; + pmOff = rRegs.x.bp; + if (al == 0x00 || al == 0x10) { + // Load User Font: CX chars * BH bytes/char + needsES = true; copyIn = true; + copySize = rRegs.x.cx * (rRegs.x.bx >> 8); + if (copySize > 8192) { + copySize = 8192; + } + } + // AL=20/21 set interrupt vectors to ES:BP — the address must + // point at resident data, not a temporary buffer, so skip. + } else if (ah == 0x1B) { + // Functionality/State Info — ES:DI, 64-byte buffer + offReg = 0; + pmOff = rRegs.x.di; + needsES = true; copyOut = true; copySize = 64; + } + + if (pmES != 0 && needsES && copySize > 0) { + unsigned long esBase; + __dpmi_get_segment_base_address(pmES, &esBase); + + if (esBase < 0x100000) { + // Conventional memory: compute real-mode ES + offset directly. + uint32_t linear = esBase + pmOff; + rRegs.x.es = (uint16_t)(linear >> 4); + uint16_t rmOff = (uint16_t)(linear & 0x0F); + if (offReg == 0) { + rRegs.x.di = rmOff; + } else if (offReg == 1) { + rRegs.x.dx = rmOff; + } else { + rRegs.x.bp = rmOff; + } + } else { + // Extended memory: bounce through the DOS transfer buffer. + tb = _go32_info_block.linear_address_of_transfer_buffer; + + if (copyIn) { + movedata(pmES, pmOff, _dos_ds, tb, copySize); + } + + rRegs.x.es = (uint16_t)(tb >> 4); + uint16_t rmOff = (uint16_t)(tb & 0x0F); + if (offReg == 0) { + rRegs.x.di = rmOff; + } else if (offReg == 1) { + rRegs.x.dx = rmOff; + } else { + rRegs.x.bp = rmOff; + } + useTB = true; + } + } + + __dpmi_simulate_real_mode_interrupt(0x10, &rRegs); + + if (useTB && copyOut) { + movedata(_dos_ds, tb, pmES, pmOff, copySize); + } + + // Update return registers. + frame->eax = (frame->eax & 0xFFFF0000) | rRegs.x.ax; + frame->ebx = (frame->ebx & 0xFFFF0000) | rRegs.x.bx; + frame->ecx = (frame->ecx & 0xFFFF0000) | rRegs.x.cx; + frame->edx = (frame->edx & 0xFFFF0000) | rRegs.x.dx; + frame->esi = (frame->esi & 0xFFFF0000) | rRegs.x.si; + frame->ebp = (frame->ebp & 0xFFFF0000) | rRegs.x.bp; + frame->eflags = (frame->eflags & 0xFFFF0000) | rRegs.x.flags; + + if (!needsES) { + // No ES translation was done — pass through real-mode DI + frame->edi = (frame->edi & 0xFFFF0000) | rRegs.x.di; + } + + // Log VBE failures + if ((func & 0xFF00) == 0x4F00) { + uint16_t retAX = (uint16_t)frame->eax; + if (retAX != 0x004F) { + logErr("INT10: VBE func %04X returned AX=%04X (FAILED)\n", + func, retAX); + } + } +} + +// Raw INT 10h handler stub in assembly. +// +// The _go32_dpmi_allocate_iret_wrapper mechanism fails when an interrupt +// fires during 16-bit code execution — software interrupts are dispatched +// on the CURRENT stack (DPMI spec), so the wrapper tries to build its +// _go32_dpmi_registers structure on the 16-bit stack with a different +// SS base, producing an invalid pointer (observed: regs=0x7a2, page fault). +// +// This handler avoids the problem by: +// 1. Saving ALL registers to a global structure using CS-relative +// addressing (CS base == DS base in DJGPP) +// 2. Switching SS:ESP to a dedicated 32-bit handler stack in DJGPP's +// data segment (so SS base == DS base, safe for C library calls) +// 3. Calling the C worker function +// 4. Restoring SS:ESP and all registers from the global structure +// 5. Returning via IRET +// +// NOT re-entrant — uses global state. Acceptable because the handler +// doesn't enable interrupts, and INT 10h is a software interrupt that +// cannot nest (our worker uses DPMI INT 31h, not INT 10h). +// +// Uses FS for writes (code segments are read-only in protected mode). +// CS-relative reads are fine (readable code segment). +__asm__( + " .text\n" + " .p2align 4\n" + " .globl _int10hRawHandler\n" + "_int10hRawHandler:\n" + + // ---- Save original FS, then load FS with our writable DS selector ---- + " pushl %eax\n" + " pushl %ecx\n" + " xorl %eax, %eax\n" + " movw %fs, %ax\n" + " movw %cs:_gInt10hDsSel, %cx\n" + " movw %cx, %fs\n" + " movl %eax, %fs:_gInt10hSavedFS\n" + + // Diagnostic: increment entry counter, save SS and ESP + " movl %fs:_gInt10hEntryCount, %eax\n" + " incl %eax\n" + " movl %eax, %fs:_gInt10hEntryCount\n" + " xorl %eax, %eax\n" + " movw %ss, %ax\n" + " movl %eax, %fs:_gInt10hLastSS\n" + " movl %esp, %fs:_gInt10hLastESP\n" + + " popl %ecx\n" + " popl %eax\n" + + // ---- Save all GP registers to global frame via FS (writable) ---- + " movl %eax, %fs:_gInt10Frame+28\n" + " movl %ecx, %fs:_gInt10Frame+24\n" + " movl %edx, %fs:_gInt10Frame+20\n" + " movl %ebx, %fs:_gInt10Frame+16\n" + " movl %ebp, %fs:_gInt10Frame+8\n" + " movl %esi, %fs:_gInt10Frame+4\n" + " movl %edi, %fs:_gInt10Frame+0\n" + + // ---- Save segment registers (zero-extended to 32 bits) ---- + " xorl %eax, %eax\n" + " movw %es, %ax\n" + " movl %eax, %fs:_gInt10Frame+32\n" + " movw %ds, %ax\n" + " movl %eax, %fs:_gInt10Frame+36\n" + + // ---- Save IRET frame from interrupted stack (SS:ESP) ---- + " movl (%esp), %eax\n" + " movl %eax, %fs:_gInt10Frame+40\n" + " movl 4(%esp), %eax\n" + " movl %eax, %fs:_gInt10Frame+44\n" + " movl 8(%esp), %eax\n" + " movl %eax, %fs:_gInt10Frame+48\n" + + // ---- Save SS:ESP and switch to DJGPP handler stack ---- + " movl %esp, %fs:_gInt10hSavedESP\n" + " xorl %eax, %eax\n" + " movw %ss, %ax\n" + " movl %eax, %fs:_gInt10hSavedSS\n" + + " movw %cs:_gInt10hDsSel, %ax\n" + " movw %ax, %ds\n" + " movw %ax, %es\n" + " movw %ax, %ss\n" + " movl _gInt10hStackTop, %esp\n" + + // ---- Call C worker: int10hWorker(&gInt10Frame) ---- + " leal _gInt10Frame, %eax\n" + " pushl %eax\n" + " call _int10hWorker\n" + " addl $4, %esp\n" + + // ---- Restore SS:ESP (back to interrupted code's stack) ---- + " movl %cs:_gInt10hSavedESP, %eax\n" + " movl %cs:_gInt10hSavedSS, %ecx\n" + " movw %cx, %ss\n" + " movl %eax, %esp\n" + + // ---- Write modified EFLAGS back to IRET frame on stack ---- + // The C worker updates frame->eflags with real-mode return flags + // (e.g. CF for VBE success/failure). Write it back so IRET uses it. + " movl %cs:_gInt10Frame+48, %eax\n" + " movl %eax, 8(%esp)\n" + + // ---- Restore GP registers from global frame (CS reads OK) ---- + " movl %cs:_gInt10Frame+0, %edi\n" + " movl %cs:_gInt10Frame+4, %esi\n" + " movl %cs:_gInt10Frame+8, %ebp\n" + " movl %cs:_gInt10Frame+16, %ebx\n" + " movl %cs:_gInt10Frame+20, %edx\n" + " movl %cs:_gInt10Frame+24, %ecx\n" + + // ---- Restore segment registers (FS/GS always set to DGROUP) ---- + " movl %cs:_gCbDgroupSel, %eax\n" + " movw %ax, %fs\n" + " movw %ax, %gs\n" + " movl %cs:_gInt10Frame+32, %eax\n" + " movw %ax, %es\n" + " movl %cs:_gInt10Frame+36, %eax\n" + " movw %ax, %ds\n" + + // ---- Restore EAX last (was used as scratch) ---- + " movl %cs:_gInt10Frame+28, %eax\n" + + " iret\n" +); + + +// ============================================================================ +// DPMI 0x300h (Simulate Real Mode Interrupt) proxy +// +// The VBESVGA driver's DoInt10h calls DPMI INT 31h AX=0300h from 16-bit +// code to perform real-mode INT 10h for VBE BIOS calls. CWSDPMI does +// not correctly service this DPMI function when the INT 31h originates +// from a 16-bit code segment inside a 32-bit DPMI client. +// +// Fix: after the driver's entry point has been called (which patches +// DoInt10h for 386 via SetupInt10h), we change the single "CD 31" +// (INT 31h) instruction in DoInt10h to "CD 64" (INT 64h). Our INT 64h +// handler reads the Real Mode Call Structure (RMCS) that DoInt10h built +// on the 16-bit stack, calls __dpmi_simulate_real_mode_interrupt from +// 32-bit code (which CWSDPMI handles correctly), and writes the results +// back to the RMCS so DoInt10h can unpack them normally. +// ============================================================================ + +#define DPMI300_INT_NUM 0x64 + +static __dpmi_paddr gOldDpmi300Vec; +static bool gDpmi300Installed = false; + +// Globals for the raw handler assembly stub +uint16_t gDpmi300DsSel; +uint32_t gDpmi300SavedSS; +uint32_t gDpmi300SavedESP; +uint32_t gDpmi300SavedFS; +uint32_t gDpmi300SavedDS; +uint32_t gDpmi300SavedES; +uint32_t gDpmi300SavedGS; +uint32_t gDpmi300RmcsSel; // ES at time of interrupt (RMCS segment) +uint32_t gDpmi300RmcsEdi; // EDI at time of interrupt (RMCS offset) +uint32_t gDpmi300IntNum; // EBX at time of interrupt (BL=int number) +uint8_t gDpmi300Stack[4096] __attribute__((aligned(16))); +uint32_t gDpmi300StackTop; + +// Worker: reads RMCS, performs real-mode interrupt, writes results back. +// The DPMI RMCS layout is byte-compatible with DJGPP's __dpmi_regs (50 bytes). +void dpmi300Worker(void) +{ + uint16_t rmcsSel = (uint16_t)gDpmi300RmcsSel; + uint32_t rmcsOff = gDpmi300RmcsEdi; + uint8_t intNum = (uint8_t)gDpmi300IntNum; + + __dpmi_regs regs; + memset(®s, 0, sizeof(regs)); + movedata(rmcsSel, rmcsOff, _my_ds(), (unsigned)®s, 50); + + dbg("DPMI300: INT %02Xh AX=%04X BX=%04X ES=%04X DI=%04X SS:SP=%04X:%04X\n", + intNum, regs.x.ax, regs.x.bx, regs.x.es, regs.x.di, + regs.x.ss, regs.x.sp); + + __dpmi_simulate_real_mode_interrupt(intNum, ®s); + + dbg("DPMI300: result AX=%04X\n", regs.x.ax); + + // Dump VBE info buffer contents for VBE 4F00h + if (intNum == 0x10 && regs.x.ax == 0x004F) { + uint32_t bufLin = (uint32_t)regs.x.es * 16 + regs.x.di; + uint8_t hdr[32]; + dosmemget(bufLin, 32, hdr); + dbg("DPMI300: VBE buf[0..3]=%c%c%c%c ver=%02X%02X modes=%02X%02X:%02X%02X\n", + hdr[0], hdr[1], hdr[2], hdr[3], + hdr[5], hdr[4], + hdr[0x0F], hdr[0x0E], hdr[0x11], hdr[0x10]); + // Mode list pointer at offset 0x0E: offset(word) + segment(word) + uint16_t modesOff = hdr[0x0E] | ((uint16_t)hdr[0x0F] << 8); + uint16_t modesSeg = hdr[0x10] | ((uint16_t)hdr[0x11] << 8); + dbg("DPMI300: VBE modes ptr %04X:%04X (buf at %04X:%04X)\n", + modesSeg, modesOff, regs.x.es, regs.x.di); + // Read first 16 mode numbers + uint32_t modesLin = (uint32_t)modesSeg * 16 + modesOff; + uint16_t modes[16]; + dosmemget(modesLin, 32, modes); + dbg("DPMI300: VBE modes:"); + for (int i = 0; i < 16 && modes[i] != 0xFFFF; i++) { + dbg(" %03X", modes[i]); + } + dbg("\n"); + } + + movedata(_my_ds(), (unsigned)®s, rmcsSel, rmcsOff, 50); +} + +extern void dpmi300RawHandler(void); + +// Raw INT 64h handler. Same save/restore pattern as the INT 10h reflector +// but simpler: we only need the RMCS pointer (ES:EDI) and interrupt number +// (BL) from the interrupted context. All GP and segment registers are +// preserved across the call — the only visible side effect is that the +// RMCS on the driver's stack is updated and the carry flag is cleared. +__asm__( + " .text\n" + " .p2align 4\n" + " .globl _dpmi300RawHandler\n" + "_dpmi300RawHandler:\n" + + // ---- Save FS, load FS with our DS selector ---- + " pushl %eax\n" + " pushl %ecx\n" + " xorl %eax, %eax\n" + " movw %fs, %ax\n" + " movw %cs:_gDpmi300DsSel, %cx\n" + " movw %cx, %fs\n" + " movl %eax, %fs:_gDpmi300SavedFS\n" + + // ---- Save communication values: ES (RMCS sel), EDI, EBX ---- + " xorl %eax, %eax\n" + " movw %es, %ax\n" + " movl %eax, %fs:_gDpmi300SavedES\n" + " movl %eax, %fs:_gDpmi300RmcsSel\n" + " movl %edi, %fs:_gDpmi300RmcsEdi\n" + " movl %ebx, %fs:_gDpmi300IntNum\n" + + // ---- Save remaining segment registers ---- + " xorl %eax, %eax\n" + " movw %ds, %ax\n" + " movl %eax, %fs:_gDpmi300SavedDS\n" + " movw %gs, %ax\n" + " movl %eax, %fs:_gDpmi300SavedGS\n" + + // ---- Restore scratch, then PUSHAL to save all GP regs ---- + " popl %ecx\n" + " popl %eax\n" + " pushal\n" + + // ---- Save interrupted SS:ESP and switch to handler stack ---- + " movw %cs:_gDpmi300DsSel, %ax\n" + " movw %ax, %fs\n" + " movl %esp, %fs:_gDpmi300SavedESP\n" + " xorl %eax, %eax\n" + " movw %ss, %ax\n" + " movl %eax, %fs:_gDpmi300SavedSS\n" + + " movw %fs:_gDpmi300DsSel, %ax\n" + " movw %ax, %ds\n" + " movw %ax, %es\n" + " movw %ax, %ss\n" + " movl _gDpmi300StackTop, %esp\n" + + // ---- Call C worker ---- + " call _dpmi300Worker\n" + + // ---- Restore interrupted SS:ESP ---- + " movl %cs:_gDpmi300SavedSS, %ecx\n" + " movl %cs:_gDpmi300SavedESP, %eax\n" + " movw %cx, %ss\n" + " movl %eax, %esp\n" + + // ---- POPAL to restore all GP registers ---- + " popal\n" + + // ---- Restore segment registers ---- + " pushl %eax\n" + " movl %cs:_gDpmi300SavedFS, %eax\n" + " movw %ax, %fs\n" + " movl %cs:_gDpmi300SavedGS, %eax\n" + " movw %ax, %gs\n" + " movl %cs:_gDpmi300SavedES, %eax\n" + " movw %ax, %es\n" + " movl %cs:_gDpmi300SavedDS, %eax\n" + " movw %ax, %ds\n" + " popl %eax\n" + + // ---- Clear carry flag in IRET frame EFLAGS (success) ---- + " andl $0xFFFFFFFE, 8(%esp)\n" + + " iret\n" +); + + +// Worker function for exception handler — logs full diagnostics and exits. +// Non-static so the asm symbol _faultWorker is accessible. +void faultWorker(void) +{ + logErr("\n=== EXCEPTION #%" PRIu32 " ===\n", gFaultNum); + logErr(" CS:EIP = %04" PRIX32 ":%08" PRIX32 " error=%04" PRIX32 "\n", + gFaultCS, gFaultEIP, gFaultErr); + logErr(" SS:ESP = %04" PRIX32 ":%08" PRIX32 "\n", gFaultSS, gFaultESP); + logErr(" eax=%08" PRIX32 " ebx=%08" PRIX32 " ecx=%08" PRIX32 " edx=%08" PRIX32 "\n", + gFaultEAX, gFaultEBX, gFaultECX, gFaultEDX); + logErr(" esi=%08" PRIX32 " edi=%08" PRIX32 " ebp=%08" PRIX32 "\n", + gFaultESI, gFaultEDI, gFaultEBP); + logErr(" ds=%04" PRIX32 " es=%04" PRIX32 "\n", gFaultDS, gFaultES); + + // Dump instruction bytes at CS:EIP using _farpeekb + // (movedata fails on 16-bit code segments in fault context) + uint16_t faultSel = (uint16_t)gFaultCS; + uint32_t faultOff = gFaultEIP; + unsigned long csBase; + if (__dpmi_get_segment_base_address(faultSel, &csBase) == 0) { + unsigned csLimit = __dpmi_get_segment_limit(faultSel); + logErr(" cs: base=%08lX limit=%04X\n", csBase, csLimit); + logErr(" code:"); + for (int i = 0; i < 16 && (faultOff + i) <= csLimit; i++) { + logErr(" %02X", _farpeekb(faultSel, faultOff + i)); + } + logErr("\n"); + } + + // Dump segment info for DS and ES + unsigned long dsBase; + unsigned long esBase; + if ((uint16_t)gFaultDS != 0 && __dpmi_get_segment_base_address((uint16_t)gFaultDS, &dsBase) == 0) { + logErr(" ds: base=%08lX\n", dsBase); + } + if ((uint16_t)gFaultES != 0 && __dpmi_get_segment_base_address((uint16_t)gFaultES, &esBase) == 0) { + logErr(" es: base=%08lX\n", esBase); + } + + // Dump 32 words from the faulting stack using _farpeekw + if ((uint16_t)gFaultSS != 0) { + unsigned ssLimit = __dpmi_get_segment_limit((uint16_t)gFaultSS); + logErr(" ss: limit=%04X\n", ssLimit); + logErr(" stack:"); + for (int i = 0; i < 32 && (gFaultESP + i * 2 + 1) <= ssLimit; i++) { + if (i == 16) { + logErr("\n "); + } + logErr(" %04X", _farpeekw((uint16_t)gFaultSS, gFaultESP + i * 2)); + } + logErr("\n"); + } + + // Exit cleanly via DOS + __asm__ volatile ("movl $0x4CFF, %%eax; int $0x21" ::: "eax"); + __builtin_unreachable(); +} + +// Raw exception handlers for GPF (#13) and PF (#14). +// +// These capture fault state (GP registers, segment registers, instruction +// bytes) then switch to a private stack and call faultWorker() to log +// full diagnostics and exit cleanly (avoiding secondary crashes from +// DJGPP's handler trying to process faults from 16-bit code). +// +// DPMI exception frame on stack: +// ESP+0x00: Return EIP (to DPMI host, for RETF) +// ESP+0x04: Return CS +// ESP+0x08: Error code +// ESP+0x0C: Faulting EIP +// ESP+0x10: Faulting CS +// ESP+0x14: Faulting EFLAGS +// ESP+0x18: Faulting ESP +// ESP+0x1C: Faulting SS +// +// After pushing EAX, offsets shift by +4. + +__asm__( + " .text\n" + " .p2align 4\n" + " .globl _exc0dRawHandler\n" + "_exc0dRawHandler:\n" + " pushl %eax\n" + " movw %cs:_gInt10hDsSel, %ax\n" + " movw %ax, %fs\n" + " cmpl $0, %fs:_gFaultCaptured\n" + " jne 1f\n" + // First fault — capture everything + " movl $1, %fs:_gFaultCaptured\n" + " movl $13, %fs:_gFaultNum\n" + // Save GP registers via FS + " popl %eax\n" + " movl %eax, %fs:_gFaultEAX\n" + " movl %ebx, %fs:_gFaultEBX\n" + " movl %ecx, %fs:_gFaultECX\n" + " movl %edx, %fs:_gFaultEDX\n" + " movl %esi, %fs:_gFaultESI\n" + " movl %edi, %fs:_gFaultEDI\n" + " movl %ebp, %fs:_gFaultEBP\n" + " xorl %eax, %eax\n" + " movw %ds, %ax\n" + " movl %eax, %fs:_gFaultDS\n" + " movw %es, %ax\n" + " movl %eax, %fs:_gFaultES\n" + // Save exception frame fields (no pushed EAX shift now) + " movl 0x08(%esp), %eax\n" + " movl %eax, %fs:_gFaultErr\n" + " movl 0x0C(%esp), %eax\n" + " movl %eax, %fs:_gFaultEIP\n" + " movl 0x10(%esp), %eax\n" + " movl %eax, %fs:_gFaultCS\n" + " movl 0x18(%esp), %eax\n" + " movl %eax, %fs:_gFaultESP\n" + " movl 0x1C(%esp), %eax\n" + " movl %eax, %fs:_gFaultSS\n" + // Switch to our private stack and call faultWorker + " movw %fs:_gInt10hDsSel, %ax\n" + " movw %ax, %ds\n" + " movw %ax, %es\n" + " movw %ax, %ss\n" + " movl _gFaultStackTop, %esp\n" + " call _faultWorker\n" + // faultWorker doesn't return, but just in case: + " hlt\n" + "1:\n" + // Secondary fault — chain to old handler + " popl %eax\n" + " ljmp *%cs:_gOldExc0dFar\n" +); + +__asm__( + " .text\n" + " .p2align 4\n" + " .globl _exc0eRawHandler\n" + "_exc0eRawHandler:\n" + " pushl %eax\n" + " movw %cs:_gInt10hDsSel, %ax\n" + " movw %ax, %fs\n" + " cmpl $0, %fs:_gFaultCaptured\n" + " jne 1f\n" + // First fault — capture everything + " movl $1, %fs:_gFaultCaptured\n" + " movl $14, %fs:_gFaultNum\n" + " popl %eax\n" + " movl %eax, %fs:_gFaultEAX\n" + " movl %ebx, %fs:_gFaultEBX\n" + " movl %ecx, %fs:_gFaultECX\n" + " movl %edx, %fs:_gFaultEDX\n" + " movl %esi, %fs:_gFaultESI\n" + " movl %edi, %fs:_gFaultEDI\n" + " movl %ebp, %fs:_gFaultEBP\n" + " xorl %eax, %eax\n" + " movw %ds, %ax\n" + " movl %eax, %fs:_gFaultDS\n" + " movw %es, %ax\n" + " movl %eax, %fs:_gFaultES\n" + " movl 0x08(%esp), %eax\n" + " movl %eax, %fs:_gFaultErr\n" + " movl 0x0C(%esp), %eax\n" + " movl %eax, %fs:_gFaultEIP\n" + " movl 0x10(%esp), %eax\n" + " movl %eax, %fs:_gFaultCS\n" + " movl 0x18(%esp), %eax\n" + " movl %eax, %fs:_gFaultESP\n" + " movl 0x1C(%esp), %eax\n" + " movl %eax, %fs:_gFaultSS\n" + " movw %fs:_gInt10hDsSel, %ax\n" + " movw %ax, %ds\n" + " movw %ax, %es\n" + " movw %ax, %ss\n" + " movl _gFaultStackTop, %esp\n" + " call _faultWorker\n" + " hlt\n" + "1:\n" + " popl %eax\n" + " ljmp *%cs:_gOldExc0eFar\n" +); + + +// Raw INT 2Fh handler for Windows API emulation. +// +// Windows 3.x display drivers call INT 2Fh to check for the Windows +// Enhanced Mode environment. Without this handler, the calls are +// reflected to real mode where DOS returns "not installed", causing +// the driver's initialization to fail. +// +// Handled functions: +// AX=1600h: Windows Enhanced Mode installation check +// Returns AL=03h, AH=0Ah (Windows 3.10 Enhanced Mode) +// AX=4000h-400Ah: Virtual DMA Services (VDS) +// Returns carry clear (success, no-op) +// AX=4010h+: Windows/386 VMM API calls +// Returns AX=0 (not present, proceed normally) +// +// All other INT 2Fh calls are chained to the previous handler. +// This handler modifies only AX and returns via IRET, so no stack +// switching is needed (unlike the INT 10h handler). +extern void int2FhRawHandler(void); + +__asm__( + " .text\n" + " .p2align 4\n" + " .globl _int2FhRawHandler\n" + "_int2FhRawHandler:\n" + " cmpw $0x1600, %ax\n" + " je 1f\n" + " cmpb $0x40, %ah\n" + " je 3f\n" + " ljmp *%cs:_gOldInt2FhFar\n" + "1:\n" + // Windows 3.10 Enhanced Mode is "running" + " movw $0x0A03, %ax\n" + " iret\n" + "3:\n" + // AH=40h: VDS and Windows/386 API calls + // VDS calls (AL=00h-0Ah): return carry clear (success, no-op) + // VMM calls (AL=10h+): return AX=0 (not present) + " cmpb $0x0A, %al\n" + " jbe 4f\n" + // VMM/Win386 API: not present + " xorw %ax, %ax\n" + " iret\n" + "4:\n" + // VDS: success (carry clear) + " clc\n" + " iret\n" +); + + + + +// ============================================================================ +// Library initialization +// ============================================================================ + +int32_t wdrvInit(void) +{ + if (gInitialized) { + return WDRV_OK; + } + + // Initialize the thunking layer + if (!thunkInit(&gThunkCtx)) { + setError(WDRV_ERR_THUNK_FAILED); + return gLastError; + } + + // Initialize the Windows API stubs + if (!stubInit(&gStubCtx, &gThunkCtx)) { + thunkShutdown(&gThunkCtx); + setError(WDRV_ERR_INIT); + return gLastError; + } + + // Install PM interrupt reflector for INT 10h. + // CWSDPMI's default reflection doesn't work correctly when the + // interrupt fires from 16-bit code segments (stack frame mismatch). + if (!installInt10hReflector()) { + logErr("windrv: warning: could not install INT 10h reflector\n"); + } + + // Install DPMI 0x300h proxy on INT 64h. + // CWSDPMI doesn't correctly handle INT 31h AX=0300h (simulate real-mode + // interrupt) when called from 16-bit code segments within a 32-bit DPMI + // client. DoInt10h in the VBESVGA driver calls INT 31h from 16-bit code + // to perform VBE calls. We redirect those to our proxy which performs the + // same operation from 32-bit code via __dpmi_simulate_real_mode_interrupt. + if (!installDpmi300Proxy()) { + logErr("windrv: warning: could not install DPMI 300h proxy\n"); + } + + // Install PM handler for INT 2Fh (Windows API emulation). + // The driver calls INT 2Fh AX=1600h to check for Windows Enhanced + // Mode. Without this, the check fails and Enable() returns 0. + // This raw handler only intercepts specific AX values and chains + // to the old handler for everything else, so it's safe for + // DJGPP/CWSDPMI internal INT 2Fh usage. + if (!installInt2FhHandler()) { + logErr("windrv: warning: could not install INT 2Fh handler\n"); + } + + // Install exception capture to diagnose primary fault CS:EIP + // (must be after installInt10hReflector which sets gInt10hDsSel) + if (!installExceptionCapture()) { + logErr("windrv: warning: could not install exception capture\n"); + } + + // Enable near pointer access for direct memory operations + if (__djgpp_nearptr_enable() == 0) { + logErr("windrv: warning: near pointer access not available\n"); + } + + gInitialized = true; + setError(WDRV_OK); + return WDRV_OK; +} + + +void wdrvShutdown(void) +{ + if (!gInitialized) { + return; + } + + removeExceptionCapture(); + removeInt2FhHandler(); + removeDpmi300Proxy(); + removeInt10hReflector(); + stubShutdown(&gStubCtx); + thunkShutdown(&gThunkCtx); + __djgpp_nearptr_disable(); + + gInitialized = false; +} + + +// ============================================================================ +// Driver loading +// ============================================================================ + +WdrvHandleT wdrvLoadDriver(const char *driverPath) +{ + if (!gInitialized) { + setError(WDRV_ERR_INIT); + return NULL; + } + + struct WdrvDriverS *drv = (struct WdrvDriverS *)calloc(1, sizeof(struct WdrvDriverS)); + if (!drv) { + setError(WDRV_ERR_NO_MEMORY); + return NULL; + } + + strncpy(drv->filePath, driverPath, sizeof(drv->filePath) - 1); + + // Load the NE module + if (gDebug) { + extern void neSetDebug(bool enable); + neSetDebug(true); + } + + if (!neLoadModule(&drv->neMod, driverPath, importResolver)) { + setError(WDRV_ERR_LOAD_FAILED); + free(drv); + return NULL; + } + + stubSetModule(&gStubCtx, &drv->neMod); + + if (gDebug) { + neDumpModule(&drv->neMod); + } + + // Extend DGROUP to include space for GDI objects (PDEVICE, brush, etc.) + if (!extendDgroupForObjects(drv)) { + setError(WDRV_ERR_NO_MEMORY); + neUnloadModule(&drv->neMod); + free(drv); + return NULL; + } + + // Set the driver's DGROUP selector so the thunk loads DS correctly + gThunkCtx.dgroupSel = drv->neMod.autoDataSel; + dbg("windrv: DGROUP selector = 0x%04X\n", gThunkCtx.dgroupSel); + + // Patch Windows PROLOG_0 sequences in all code segments. + // In real Windows, the module loader converts the 3-byte prolog + // "mov ax, ds; nop" (8C D8 90) to "mov ax, " (B8 xx xx) + // so that AX always gets the correct DGROUP selector regardless of + // the current DS value at function entry. Without this, internal + // near/far calls within the driver (where AX has been clobbered) + // will fault when the prolog tries to load DS from AX. + patchPrologs(&drv->neMod); + + // Patch the VFLATD initialization routine's stack imbalance bug. + // The function at seg5:0x2368 pushes 20 bytes of intermediate values + // during API calls but never cleans them before ret. In Windows 3.x + // the caller restores SP from BP so this is harmless, but our thunk + // relies on a clean ret. + patchVflatdStackBug(&drv->neMod); + + // Bypass the VFLATD API call for framebuffer mapping. + // The driver checks [8889] to choose between VFLATD (VxD call through + // a far pointer at [0D76]) and DPMI (INT 31h to map physical memory). + // Since VFLATD isn't available, force the DPMI path which uses standard + // DPMI functions (0800h, 0007h, 0008h) that CWSDPMI supports. + patchVflatdBypassCall(&drv->neMod); + + // Resolve DDI entry points + if (!resolveDriverEntries(drv)) { + setError(WDRV_ERR_NO_ENTRY); + neUnloadModule(&drv->neMod); + free(drv); + return NULL; + } + + // Verify that at least Enable and Disable are present + if (!drv->ddiEntry[DDI_ORD_ENABLE].present || + !drv->ddiEntry[DDI_ORD_DISABLE].present) { + logErr("windrv: driver missing Enable (%d) or Disable (%d)\n", + drv->ddiEntry[DDI_ORD_ENABLE].present, + drv->ddiEntry[DDI_ORD_DISABLE].present); + setError(WDRV_ERR_NO_ENTRY); + neUnloadModule(&drv->neMod); + free(drv); + return NULL; + } + + // Verify segment integrity after loading + if (drv->ddiEntry[DDI_ORD_ENABLE].present) { + uint16_t codeSel = drv->ddiEntry[DDI_ORD_ENABLE].sel; + uint16_t codeOff = drv->ddiEntry[DDI_ORD_ENABLE].off; + + // Find the segment's stored linear address + int segIdx = drv->neMod.exports[DDI_ORD_ENABLE].segIndex - 1; + uint32_t storedLinear = drv->neMod.segments[segIdx].linearAddr; + + // Read actual descriptor base from DPMI + uint32_t descBase = 0; + __dpmi_get_segment_base_address(codeSel, (unsigned long *)&descBase); + + // Read via flat pointer (linearAddr + offset) + uint8_t *flatPtr = (uint8_t *)(storedLinear + codeOff); + uint8_t flatBytes[8]; + for (int i = 0; i < 8; i++) { + flatBytes[i] = flatPtr[i]; + } + + // Read via far pointer (selector:offset) + uint8_t farBytes[8]; + for (int i = 0; i < 8; i++) { + farBytes[i] = _farpeekb(codeSel, codeOff + i); + } + + // Read raw 8-byte LDT descriptor + uint8_t rawDesc[8]; + __dpmi_get_descriptor(codeSel, rawDesc); + uint32_t ldtBase = (uint32_t)rawDesc[2] | ((uint32_t)rawDesc[3] << 8) | + ((uint32_t)rawDesc[4] << 16) | ((uint32_t)rawDesc[7] << 24); + uint32_t ldtLimit = (uint32_t)rawDesc[0] | ((uint32_t)rawDesc[1] << 8) | + ((uint32_t)(rawDesc[6] & 0x0F) << 16); + + dbg("windrv: dsBase=0x%08X ptrVal=0x%08" PRIX32 + " descBase=0x%08" PRIX32 " ldtBase=0x%08" PRIX32 + " ldtLimit=0x%05" PRIX32 "\n", + __djgpp_base_address, storedLinear, descBase, ldtBase, ldtLimit); + dbg("windrv: rawDesc: %02X %02X %02X %02X %02X %02X %02X %02X\n", + rawDesc[0], rawDesc[1], rawDesc[2], rawDesc[3], + rawDesc[4], rawDesc[5], rawDesc[6], rawDesc[7]); + dbg("windrv: flat[%p]: %02X %02X %02X %02X %02X %02X %02X %02X\n", + flatPtr, + flatBytes[0], flatBytes[1], flatBytes[2], flatBytes[3], + flatBytes[4], flatBytes[5], flatBytes[6], flatBytes[7]); + dbg("windrv: far[%04X:%04X]: %02X %02X %02X %02X %02X %02X %02X %02X\n", + codeSel, codeOff, + farBytes[0], farBytes[1], farBytes[2], farBytes[3], + farBytes[4], farBytes[5], farBytes[6], farBytes[7]); + } + + // Patch DoInt10h's INT 31h -> INT 64h BEFORE calling the entry point. + // The entry point calls SetupInt10h which self-modifies the Code segment + // (patches PUSHAD/POPAD on 386). We patch first so that when the entry + // point later calls DoInt10h for VBE queries, it uses our proxy. + patchDoInt10h(drv); + patchBiosDataAccess(drv); + + // Call the NE module entry point (driver_initialization). + // This runs the driver's one-time init code: + // - SetupInt10h: allocates a real-mode stack for VBE INT 10h calls + // - dev_initialization: sets ScreenSelector, checks CPU type, VDD query + // Without this, DoInt10h uses an uninitialized stack and all VBE calls + // fail, causing the driver's Enable to hit its fatal error path. + if (drv->neMod.neHeader.entryPointCS != 0) { + uint16_t epSegIdx = drv->neMod.neHeader.entryPointCS - 1; + if (epSegIdx < drv->neMod.segmentCount) { + uint16_t epSel = drv->neMod.segments[epSegIdx].selector; + uint16_t epOff = drv->neMod.neHeader.entryPointIP; + dbg("windrv: calling entry point at %04X:%04X\n", epSel, epOff); + uint32_t epResult = thunkCall16(&gThunkCtx, epSel, epOff, NULL, 0); + dbg("windrv: entry point returned %u\n", (uint16_t)epResult); + } + } + + setError(WDRV_OK); + return drv; +} + + +void wdrvUnloadDriver(WdrvHandleT handle) +{ + if (!handle) { + return; + } + + freeDrawObjects(handle); + // PDEVICE and other objects are in DGROUP - freed by neUnloadModule + neUnloadModule(&handle->neMod); + free(handle); +} + + +int32_t wdrvGetInfo(WdrvHandleT handle, WdrvInfoT *info) +{ + if (!handle) { + return WDRV_ERR_NOT_LOADED; + } + + memset(info, 0, sizeof(WdrvInfoT)); + memcpy(info->driverName, handle->neMod.moduleName, sizeof(info->driverName) - 1); + info->driverName[sizeof(info->driverName) - 1] = '\0'; + + // If we've queried GDIINFO, fill in from that + if (handle->gdiInfoValid) { + info->driverVersion = handle->gdiInfo.dpVersion; + info->maxWidth = handle->gdiInfo.dpHorzRes; + info->maxHeight = handle->gdiInfo.dpVertRes; + info->maxBpp = handle->gdiInfo.dpBitsPixel * handle->gdiInfo.dpPlanes; + info->numColors = handle->gdiInfo.dpNumColors; + info->rasterCaps = handle->gdiInfo.dpRaster; + } + + info->hasBitBlt = handle->ddiEntry[DDI_ORD_BITBLT].present; + info->hasOutput = handle->ddiEntry[DDI_ORD_OUTPUT].present; + info->hasPixel = handle->ddiEntry[DDI_ORD_PIXEL].present; + info->hasStretchBlt = handle->ddiEntry[DDI_ORD_STRETCHBLT].present; + info->hasExtTextOut = handle->ddiEntry[DDI_ORD_EXTTEXTOUT].present; + info->hasSetPalette = handle->ddiEntry[DDI_ORD_SETPALETTE].present; + info->hasSetCursor = handle->ddiEntry[DDI_ORD_SETCURSOR].present; + + return WDRV_OK; +} + + +// ============================================================================ +// Mode setting +// ============================================================================ + +int32_t wdrvEnable(WdrvHandleT handle, int32_t width, int32_t height, int32_t bpp) +{ + if (!handle) { + return WDRV_ERR_NOT_LOADED; + } + + (void)width; + (void)height; + (void)bpp; + + // Allocate the PDEVICE structure + if (!allocPDevice(handle)) { + setError(WDRV_ERR_NO_MEMORY); + return gLastError; + } + + // Allocate draw mode and physical objects + if (!allocDrawMode(handle)) { + setError(WDRV_ERR_NO_MEMORY); + return gLastError; + } + + if (!allocBrushBuffers(handle)) { + setError(WDRV_ERR_NO_MEMORY); + return gLastError; + } + + if (!allocPenBuffers(handle)) { + setError(WDRV_ERR_NO_MEMORY); + return gLastError; + } + + // ================================================================ + // Enable the display driver (DDK standard order). + // + // WORD PASCAL Enable(LPDEVICE lpDevice, WORD style, + // LPSTR lpDeviceType, LPSTR lpOutputFile, + // LPGDIINFO lpData) + // + // Per the DDK and VBESVGA source, the correct call order is: + // + // Step 1: Enable(gdiInfoBuf, style=1/InquireInfo) — returns GDIINFO + // lpDevice is a GDIINFO-sized buffer (NOT the PDEVICE). + // The driver reads SYSTEM.INI settings and returns mode info. + // + // Step 2: Enable(pdevBuf, style=0/EnableDevice) — initializes device + // lpDevice is the PDEVICE buffer. The driver copies its + // physical device template there and sets the video mode. + // ================================================================ + + // Allocate a 16-bit "DISPLAY" string for lpDeviceType + uint32_t devTypeLin; + uint16_t devTypeSel = alloc16BitBlock(16, &devTypeLin); + if (devTypeSel) { + memcpy((void *)devTypeLin, "DISPLAY", 8); + } + + uint16_t dgSel = handle->neMod.autoDataSel; + uint16_t params[9]; + + // ================================================================ + // Step 1: Enable(style=1/InquireInfo) — get GDIINFO + // + // lpDevice = separate GDIINFO buffer (driver writes GDIINFO here). + // [0x8894] starts at 0x00, so S3 driver runs full mode selection + // (reads SCREEN-SIZE, COLOR-FORMAT, etc. from SYSTEM.INI). + // ================================================================ + // Allocate 256 bytes — some drivers (e.g. S3) write extended + // GDIINFO fields beyond the standard 108-byte structure. + uint32_t gdiInfoLinear; + uint16_t gdiInfoSel = alloc16BitBlock(256, &gdiInfoLinear); + if (gdiInfoSel == 0) { + if (devTypeSel) { + free16BitBlock(devTypeSel, devTypeLin); + } + setError(WDRV_ERR_NO_MEMORY); + return gLastError; + } + + params[0] = gdiInfoSel; // lpDevice = GDIINFO buffer (NOT PDEVICE!) + params[1] = 0; + params[2] = ENABLE_ENABLE; // style = 1 (InquireInfo) + params[3] = devTypeSel; // lpDeviceType = "DISPLAY" + params[4] = 0; + params[5] = 0; // lpOutputFile = NULL + params[6] = 0; + params[7] = 0; // lpData = NULL + params[8] = 0; + + dbg("windrv: calling Enable(style=1, InquireInfo)\n"); + + uint32_t result = thunkCall16(&gThunkCtx, + handle->ddiEntry[DDI_ORD_ENABLE].sel, + handle->ddiEntry[DDI_ORD_ENABLE].off, + params, 9); + + logErr("windrv: Enable(style=1) returned %u\n", (uint16_t)result); + + // Read GDIINFO from the buffer + memcpy(&handle->gdiInfo, (void *)gdiInfoLinear, sizeof(GdiInfo16T)); + handle->gdiInfoValid = true; + free16BitBlock(gdiInfoSel, gdiInfoLinear); + + logErr("windrv: GDIINFO: %dx%d %dbpp %dplanes, PDEVICE size=%d\n", + handle->gdiInfo.dpHorzRes, handle->gdiInfo.dpVertRes, + handle->gdiInfo.dpBitsPixel, handle->gdiInfo.dpPlanes, + handle->gdiInfo.dpDEVICEsize); + + // ================================================================ + // For VGA-class drivers (1bpp, 4 planes), repatch __WINFLAGS from + // WF_ENHANCED to WF_STANDARD. VGA.DRV's physical_enable hangs in + // Enhanced mode because it tries to communicate with the VDD. + // ================================================================ + if (handle->gdiInfoValid && + handle->gdiInfo.dpBitsPixel == 1 && handle->gdiInfo.dpPlanes == 4) { + uint16_t enhFlags = WF_PMODE | WF_CPU386 | WF_ENHANCED; + uint16_t stdFlags = WF_PMODE | WF_CPU386 | WF_STANDARD; + patchWinFlags(handle, enhFlags, stdFlags); + } + + // ================================================================ + // Step 2: Enable(style=0/EnableDevice) — initialize PDEVICE + mode + // + // lpDevice = the PDEVICE buffer. The driver copies its physical + // device template there and calls physical_enable (sets INT 10h + // video mode, initializes hardware). + // ================================================================ + params[0] = dgSel; + params[1] = handle->pdevOff; + params[2] = ENABLE_INQUIRE; // style = 0 (EnableDevice) + params[3] = devTypeSel; // lpDeviceType = "DISPLAY" + params[4] = 0; + params[5] = 0; // lpOutputFile = NULL + params[6] = 0; + params[7] = 0; // lpData = NULL + params[8] = 0; + + dbg("windrv: calling Enable(style=0, EnableDevice)\n"); + + result = thunkCall16(&gThunkCtx, + handle->ddiEntry[DDI_ORD_ENABLE].sel, + handle->ddiEntry[DDI_ORD_ENABLE].off, + params, 9); + + logErr("windrv: Enable(style=0) returned %u\n", (uint16_t)result); + + if (devTypeSel) { + free16BitBlock(devTypeSel, devTypeLin); + } + + if ((uint16_t)result == 0) { + setError(WDRV_ERR_ENABLE_FAILED); + return gLastError; + } + + // Log PDEVICE after EnableDevice + { + uint16_t crtcBase = (inportb(0x3CC) & 0x01) ? 0x3D4 : 0x3B4; + outportb(crtcBase, 0x13); + uint8_t cr13 = inportb(crtcBase + 1); + logErr("windrv: CR13 after Enable(style=0): 0x%02X (pitch=%u)\n", + cr13, (uint16_t)cr13 * 8); + } + + { + DibPDevice16T *pd = (DibPDevice16T *)handle->pdevLinear; + logErr("windrv: PDEVICE: deType=0x%04X deWidth=%u deHeight=%u " + "deWidthBytes=%u dePlanes=%u deBitsPixel=%u\n", + pd->deType, pd->deWidth, pd->deHeight, + pd->deWidthBytes, pd->dePlanes, pd->deBitsPixel); + + // Dump all PDEVICE bytes + uint8_t *pdb = (uint8_t *)handle->pdevLinear; + uint32_t pdSize = handle->pdevSize < 64 ? handle->pdevSize : 64; + logErr("windrv: PDEVICE hex (%lu bytes):", (unsigned long)pdSize); + for (uint32_t bi = 0; bi < pdSize; bi++) { + logErr(" %02X", pdb[bi]); + } + logErr("\n"); + + // If EnableDevice left deWidth/deHeight/deBitsPixel as zero, + // fill them from GDIINFO + if (pd->deWidth == 0 && handle->gdiInfoValid) { + pd->deWidth = (uint16_t)handle->gdiInfo.dpHorzRes; + } + if (pd->deHeight == 0 && handle->gdiInfoValid) { + pd->deHeight = (uint16_t)handle->gdiInfo.dpVertRes; + } + if (pd->deBitsPixel == 0 && handle->gdiInfoValid) { + pd->deBitsPixel = (uint8_t)handle->gdiInfo.dpBitsPixel; + } + } + + // Query current VBE mode for diagnostics + { + __dpmi_regs vr; + memset(&vr, 0, sizeof(vr)); + vr.x.ax = 0x4F03; // VBE Return Current VBE Mode + __dpmi_int(0x10, &vr); + logErr("windrv: VBE current mode: AX=%04X BX=%04X (mode=0x%03X)\n", + vr.x.ax, vr.x.bx, vr.x.bx & 0x3FFF); + + uint16_t crtcBase = (inportb(0x3CC) & 0x01) ? 0x3D4 : 0x3B4; + outportb(crtcBase, 0x13); + uint8_t cr13 = inportb(crtcBase + 1); + logErr("windrv: CR13 after Enable complete: 0x%02X (pitch=%u)\n", + cr13, (uint16_t)cr13 * 8); + + // Read display start address (CR0C:CR0D + S3 extensions CR31, CR51, CR69) + outportb(crtcBase, 0x0C); + uint8_t cr0c = inportb(crtcBase + 1); + outportb(crtcBase, 0x0D); + uint8_t cr0d = inportb(crtcBase + 1); + outportb(crtcBase, 0x31); + uint8_t cr31 = inportb(crtcBase + 1); + outportb(crtcBase, 0x51); + uint8_t cr51 = inportb(crtcBase + 1); + uint32_t dispStart = ((uint32_t)cr0c << 8) | cr0d; + dispStart |= ((uint32_t)(cr31 & 0x30)) << 12; // bits 17:16 + dispStart |= ((uint32_t)(cr51 & 0x03)) << 18; // bits 19:18 + logErr("windrv: display start: CR0C=0x%02X CR0D=0x%02X CR31=0x%02X CR51=0x%02X -> offset 0x%lX (byte %lu)\n", + cr0c, cr0d, cr31, cr51, (unsigned long)dispStart, (unsigned long)(dispStart * 4)); + } + + // Check that our pre-allocated PDEVICE is large enough + if (handle->gdiInfo.dpDEVICEsize > 0 && + (uint32_t)handle->gdiInfo.dpDEVICEsize > handle->pdevSize) { + logErr("windrv: PDEVICE too small (%u < %d), max is %d\n", + (unsigned)handle->pdevSize, handle->gdiInfo.dpDEVICEsize, + PDEVICE_MAX_SIZE); + setError(WDRV_ERR_NO_MEMORY); + return gLastError; + } + + // Try to set up a default draw mode + DrawMode16T *dm = (DrawMode16T *)handle->drawModeLinear; + dm->rop2 = R2_COPYPEN; + dm->bkMode = BM_OPAQUE; + dm->bkColor = 0x00FFFFFF; + dm->textColor = 0x00000000; + + // Map video RAM for direct access. + // Query VBE to get the linear framebuffer physical address and total + // VRAM size, then map the FULL VRAM via DPMI 0800h. The driver's own + // Enable only maps the visible framebuffer, but other DDI functions + // (e.g. SetPalette) access off-screen VRAM areas that need to be mapped. + handle->vramPhysAddr = 0xA0000; + handle->vramSize = 0x10000; + { + // Get current VBE mode number + __dpmi_regs vr; + memset(&vr, 0, sizeof(vr)); + vr.x.ax = 0x4F03; + __dpmi_int(0x10, &vr); + uint16_t curMode = vr.x.bx & 0x3FFF; + + if (vr.x.ax == 0x004F && curMode >= 0x100) { + // Query VBE controller info for total VRAM + unsigned long tbuf = __tb & 0xFFFFF; + uint16_t tbSeg = (uint16_t)(tbuf >> 4); + uint16_t tbOff = (uint16_t)(tbuf & 0x0F); + + memset(&vr, 0, sizeof(vr)); + vr.x.ax = 0x4F00; + vr.x.es = tbSeg; + vr.x.di = tbOff; + // Write "VBE2" signature to get VBE 2.0+ info + dosmemput("VBE2", 4, tbuf); + __dpmi_int(0x10, &vr); + + uint32_t totalVram = 0; + if (vr.x.ax == 0x004F) { + uint16_t mem64k; + dosmemget(tbuf + 0x12, 2, &mem64k); + totalVram = (uint32_t)mem64k * 65536UL; + dbg("windrv: VBE total VRAM: %" PRIu32 " bytes (%" PRIu32 " KB)\n", + totalVram, totalVram / 1024); + } + + // Query mode info for LFB physical base + memset(&vr, 0, sizeof(vr)); + vr.x.ax = 0x4F01; + vr.x.cx = curMode; + vr.x.es = tbSeg; + vr.x.di = tbOff; + __dpmi_int(0x10, &vr); + + if (vr.x.ax == 0x004F) { + uint32_t physBase; + dosmemget(tbuf + 0x28, 4, &physBase); + dbg("windrv: VBE LFB physical base: 0x%08lX\n", (unsigned long)physBase); + + if (physBase != 0) { + handle->vramPhysAddr = physBase; + // Map at least 4MB even if VBE reports less — drivers + // access off-screen VRAM (cursor masks, palette tables, + // pattern caches) beyond the visible framebuffer. + if (totalVram < 4UL * 1024 * 1024) { + totalVram = 4UL * 1024 * 1024; + } + handle->vramSize = totalVram; + dbg("windrv: VRAM size after fixup: 0x%lX\n", + (unsigned long)handle->vramSize); + } + } + } + } + + // Map physical VRAM for direct access + __dpmi_meminfo mi; + mi.address = handle->vramPhysAddr; + mi.size = handle->vramSize; + if (__dpmi_physical_address_mapping(&mi) == 0) { + handle->vramLinear = mi.address; + handle->vramPtr = (void *)(mi.address + __djgpp_conventional_base); + dbg("windrv: mapped VRAM: phys=0x%08lX size=0x%lX linear=0x%08lX\n", + (unsigned long)handle->vramPhysAddr, + (unsigned long)handle->vramSize, + (unsigned long)handle->vramLinear); + } + + handle->pitch = handle->gdiInfo.dpHorzRes * + ((handle->gdiInfo.dpBitsPixel + 7) / 8); + + // Realize a default white brush + if (handle->ddiEntry[DDI_ORD_REALIZEOBJECT].present) { + if (!realizeBrush(handle, 0x00FFFFFF)) { + dbg("windrv: warning: initial RealizeObject(brush) failed\n"); + } + } + + // Check if this is a hardware (S3-style) or software (DIB) driver. + // deType == 0xFFFF indicates a DIB engine / software renderer. + DibPDevice16T *pd = (DibPDevice16T *)handle->pdevLinear; + bool isHardwareDriver = (pd->deType >= 0); + + // Detect S3 hardware by probing the chip ID register (CR30). + // Only S3 chips need cursor disable and display start offset. + outportb(0x3D4, 0x38); + outportb(0x3D5, 0x48); // unlock S3 registers + outportb(0x3D4, 0x30); + uint8_t cr30 = inportb(0x3D5); + bool isS3 = (cr30 >= 0x81 && cr30 <= 0xE1); + handle->isS3 = isS3; + gIsS3 = isS3; + dbg("windrv: S3 chip ID probe: CR30=0x%02X isS3=%d\n", cr30, isS3); + + // VGA-class drivers (1bpp, 4 planes) run as basic VGA even on S3 + // hardware — they don't use the S3 accelerator or scratch area. + bool isVgaClass = handle->gdiInfoValid && + handle->gdiInfo.dpBitsPixel == 1 && + handle->gdiInfo.dpPlanes == 4; + + if (isHardwareDriver && isS3 && !isVgaClass) { + // Disable the hardware cursor. S3 Trio64 (and compatible) drivers + // may enable a default cursor during Enable that we don't manage. + // CR45 bit 0 = hardware cursor enable on S3. + outportb(0x3D4, 0x45); + outportb(0x3D5, inportb(0x3D5) & ~0x01); + + // Shift the visible display down by 10 scanlines so the S3 driver's + // pattern scratch area at VRAM (144,1)-(151,8) is off-screen. + // All drawing Y coordinates are offset by dispYOffset to compensate. + handle->dispYOffset = 10; + setDisplayStart(handle, (uint32_t)handle->dispYOffset * handle->pitch); + } else { + // Non-S3 hardware, VGA-class, or software/DIB driver: no S3 + // scratch area, no display start shift. + handle->dispYOffset = 0; + } + + handle->enabled = true; + + // Watch the area ~0x4B8 bytes before end-of-.text. Corruption + // in VBESVGA consistently zeros a byte near this offset. + { + extern char etext; + uint32_t etextOff = (uint32_t)&etext; + uint32_t watchOff = etextOff - 0x4B8; + dbg("windrv: etext=0x%08" PRIX32 " watch=0x%08" PRIX32 "\n", + etextOff, watchOff); + thunkSetWatch(_my_ds(), watchOff); + } + + setError(WDRV_OK); + return WDRV_OK; +} + + +int32_t wdrvDisable(WdrvHandleT handle) +{ + if (!handle || !handle->enabled) { + return WDRV_ERR_NOT_ENABLED; + } + + // Call Disable(lpDevice) + // VOID PASCAL Disable(LPDEVICE lpDevice) + // 1 far pointer = 2 words + uint16_t params[2]; + params[0] = handle->neMod.autoDataSel; // lpDevice seg (DGROUP) + params[1] = handle->pdevOff; // lpDevice off + + dbg("windrv: calling Disable()\n"); + + // Reset display start to 0 before Disable restores text mode + if (handle->dispYOffset != 0) { + setDisplayStart(handle, 0); + handle->dispYOffset = 0; + } + + waitForEngine(); + + thunkCall16(&gThunkCtx, + handle->ddiEntry[DDI_ORD_DISABLE].sel, + handle->ddiEntry[DDI_ORD_DISABLE].off, + params, 2); + + dbg("windrv: Disable() returned\n"); + + handle->enabled = false; + + setError(WDRV_OK); + return WDRV_OK; +} + + +// ============================================================================ +// Drawing operations +// ============================================================================ + +int32_t wdrvBitBlt(WdrvHandleT handle, WdrvBitBltParamsT *p) +{ + if (!handle || !handle->enabled) { + logErr("windrv: BitBlt: not enabled (handle=%p enabled=%d)\n", + (void *)handle, handle ? handle->enabled : -1); + return WDRV_ERR_NOT_ENABLED; + } + if (!handle->ddiEntry[DDI_ORD_BITBLT].present) { + logErr("windrv: BitBlt: not present\n"); + return WDRV_ERR_UNSUPPORTED; + } + + // BOOL PASCAL BitBlt(LPDEVICE lpDstDev, WORD DstX, WORD DstY, + // LPDEVICE lpSrcDev, WORD SrcX, WORD SrcY, + // WORD xExt, WORD yExt, DWORD Rop3, + // LPBRUSH lpBrush, LPDRAWMODE lpDrawMode) + // + // Pascal push order (left to right): + // lpDstDev(2w), DstX(1w), DstY(1w), + // lpSrcDev(2w), SrcX(1w), SrcY(1w), + // xExt(1w), yExt(1w), Rop3(2w), + // lpBrush(2w), lpDrawMode(2w) + // Total: 16 words + + uint16_t dgSel = handle->neMod.autoDataSel; + uint16_t params[16]; + int i = 0; + + // Determine if the ROP uses the source. The 8-bit ROP is in bits 23-16. + // If flipping the source bit doesn't change any output bit, source is + // not used and lpSrcDev must be NULL per the DDI spec. + uint8_t rop8 = (uint8_t)(p->rop3 >> 16); + bool ropNeedsSrc = (((rop8 >> 2) ^ rop8) & 0x33) != 0; + + // lpDstDev + params[i++] = dgSel; + params[i++] = handle->pdevOff; + // DstX, DstY (offset Y into hidden-scanline region) + params[i++] = (uint16_t)p->dstX; + params[i++] = (uint16_t)(p->dstY + handle->dispYOffset); + // lpSrcDev (NULL for pattern-only ROPs, screen PDEVICE otherwise) + if (ropNeedsSrc) { + params[i++] = dgSel; + params[i++] = handle->pdevOff; + } else { + params[i++] = 0; + params[i++] = 0; + } + // SrcX, SrcY (offset Y for screen-to-screen blits) + params[i++] = (uint16_t)p->srcX; + params[i++] = (uint16_t)(p->srcY + handle->dispYOffset); + // xExt, yExt + params[i++] = (uint16_t)p->width; + params[i++] = (uint16_t)p->height; + // Rop3 (DWORD: high word first in Pascal push order) + params[i++] = (uint16_t)(p->rop3 >> 16); + params[i++] = (uint16_t)(p->rop3 & 0xFFFF); + // lpBrush + params[i++] = dgSel; + params[i++] = handle->brushOff; + // lpDrawMode + params[i++] = dgSel; + params[i++] = handle->drawModeOff; + + dbg("windrv: BitBlt dst=%04X:%04X (%d,%d) src=%04X:%04X (%d,%d) %dx%d rop=0x%08lX brush=%04X:%04X dm=%04X:%04X\n", + dgSel, handle->pdevOff, p->dstX, p->dstY, + ropNeedsSrc ? dgSel : 0, ropNeedsSrc ? handle->pdevOff : 0, + p->srcX, p->srcY, + p->width, p->height, (unsigned long)p->rop3, + dgSel, handle->brushOff, dgSel, handle->drawModeOff); + + waitForEngine(); + + uint32_t result = thunkCall16(&gThunkCtx, + handle->ddiEntry[DDI_ORD_BITBLT].sel, + handle->ddiEntry[DDI_ORD_BITBLT].off, + params, i); + + waitForEngine(); + + dbg("windrv: BitBlt returned %lu\n", (unsigned long)(result & 0xFFFF)); + + return ((int16_t)(result & 0xFFFF)) ? WDRV_OK : WDRV_ERR_UNSUPPORTED; +} + + +int32_t wdrvFillRect(WdrvHandleT handle, int16_t x, int16_t y, int16_t w, int16_t h, uint32_t color) +{ + if (!handle || !handle->enabled) { + return WDRV_ERR_NOT_ENABLED; + } + + // Realize brush with the requested color + if (!handle->brushRealized || handle->brushRealizedColor != color) { + if (handle->ddiEntry[DDI_ORD_REALIZEOBJECT].present) { + realizeBrush(handle, color); + } + } + + // If driver supports BitBlt, use PATCOPY + if (handle->ddiEntry[DDI_ORD_BITBLT].present) { + WdrvBitBltParamsT bp; + memset(&bp, 0, sizeof(bp)); + bp.dstX = x; + bp.dstY = y; + bp.srcX = 0; + bp.srcY = 0; + bp.width = w; + bp.height = h; + bp.rop3 = PATCOPY; + return wdrvBitBlt(handle, &bp); + } + + // Fall back to Output with rectangle + if (handle->ddiEntry[DDI_ORD_OUTPUT].present) { + // Allocate 16-bit memory for the point array and pen + // Output(lpDstDev, style, count, lpPoints, lpPen, lpBrush, lpDrawMode, lpClipRect) + // For rectangle: style=OS_RECTANGLE, count=2 (top-left, bottom-right) + + // Build 2-point rectangle (offset Y into hidden-scanline region) + Point16T pts[2]; + pts[0].x = x; + pts[0].y = y + handle->dispYOffset; + pts[1].x = x + w; + pts[1].y = y + h + handle->dispYOffset; + + uint32_t ptsLinear; + uint16_t ptsSel = alloc16BitBlock(sizeof(pts), &ptsLinear); + if (ptsSel == 0) { + return WDRV_ERR_NO_MEMORY; + } + memcpy((void *)ptsLinear, pts, sizeof(pts)); + + // Output params (Pascal order): + // lpDstDev(2w), style(1w), count(1w), lpPoints(2w), + // lpPen(2w), lpBrush(2w), lpDrawMode(2w), lpClipRect(2w) + // Total: 14 words + uint16_t dgSel = handle->neMod.autoDataSel; + uint16_t params[14]; + int i = 0; + params[i++] = dgSel; // lpDstDev seg + params[i++] = handle->pdevOff; // lpDstDev off + params[i++] = OS_RECTANGLE; // style + params[i++] = 2; // count + params[i++] = ptsSel; // lpPoints seg + params[i++] = 0; // lpPoints off + params[i++] = 0; // lpPen seg (NULL) + params[i++] = 0; // lpPen off + params[i++] = dgSel; // lpBrush seg + params[i++] = handle->brushOff; // lpBrush off + params[i++] = dgSel; // lpDrawMode seg + params[i++] = handle->drawModeOff; // lpDrawMode off + params[i++] = 0; // lpClipRect seg (NULL = no clip) + params[i++] = 0; // lpClipRect off + + waitForEngine(); + + uint32_t result = thunkCall16(&gThunkCtx, + handle->ddiEntry[DDI_ORD_OUTPUT].sel, + handle->ddiEntry[DDI_ORD_OUTPUT].off, + params, i); + + free16BitBlock(ptsSel, ptsLinear); + + return ((int16_t)(result & 0xFFFF)) ? WDRV_OK : WDRV_ERR_UNSUPPORTED; + } + + return WDRV_ERR_UNSUPPORTED; +} + + +int32_t wdrvSetPixel(WdrvHandleT handle, int16_t x, int16_t y, uint32_t color) +{ + if (!handle || !handle->enabled) { + return WDRV_ERR_NOT_ENABLED; + } + if (!handle->ddiEntry[DDI_ORD_PIXEL].present) { + return WDRV_ERR_UNSUPPORTED; + } + + // DWORD PASCAL Pixel(LPDEVICE lpDevice, WORD x, WORD y, + // DWORD color, LPDRAWMODE lpDrawMode) + // Pascal push order: + // lpDevice(2w), x(1w), y(1w), color(2w), lpDrawMode(2w) + // Total: 8 words + + // Convert COLORREF to physical color via ColorInfo DDI + uint32_t physColor = colorToPhys(handle, color); + + // Set draw mode to COPYPEN for setting pixels + DrawMode16T *dm = (DrawMode16T *)handle->drawModeLinear; + dm->rop2 = R2_COPYPEN; + + uint16_t dgSel = handle->neMod.autoDataSel; + uint16_t params[8]; + int i = 0; + params[i++] = dgSel; // lpDevice seg + params[i++] = handle->pdevOff; // lpDevice off + params[i++] = (uint16_t)x; // x + params[i++] = (uint16_t)(y + handle->dispYOffset); // y (offset) + params[i++] = (uint16_t)(physColor >> 16); // color high + params[i++] = (uint16_t)(physColor); // color low + params[i++] = dgSel; // lpDrawMode seg + params[i++] = handle->drawModeOff; // lpDrawMode off + + waitForEngine(); + + thunkCall16(&gThunkCtx, + handle->ddiEntry[DDI_ORD_PIXEL].sel, + handle->ddiEntry[DDI_ORD_PIXEL].off, + params, i); + + waitForEngine(); + + return WDRV_OK; +} + + +uint32_t wdrvGetPixel(WdrvHandleT handle, int16_t x, int16_t y) +{ + if (!handle || !handle->enabled) { + return 0; + } + if (!handle->ddiEntry[DDI_ORD_PIXEL].present) { + return 0; + } + + // Pixel with color = -1 (0xFFFFFFFF) reads instead of writes + DrawMode16T *dm = (DrawMode16T *)handle->drawModeLinear; + dm->rop2 = R2_COPYPEN; + + uint16_t dgSel = handle->neMod.autoDataSel; + uint16_t params[8]; + int i = 0; + params[i++] = dgSel; + params[i++] = handle->pdevOff; + params[i++] = (uint16_t)x; + params[i++] = (uint16_t)(y + handle->dispYOffset); + params[i++] = 0xFFFF; // color = -1 means "get pixel" + params[i++] = 0xFFFF; + params[i++] = dgSel; + params[i++] = handle->drawModeOff; + + return thunkCall16(&gThunkCtx, + handle->ddiEntry[DDI_ORD_PIXEL].sel, + handle->ddiEntry[DDI_ORD_PIXEL].off, + params, i); +} + + +int32_t wdrvPolyline(WdrvHandleT handle, Point16T *points, int16_t count, uint32_t color) +{ + if (!handle || !handle->enabled) { + return WDRV_ERR_NOT_ENABLED; + } + if (!handle->ddiEntry[DDI_ORD_OUTPUT].present) { + return WDRV_ERR_UNSUPPORTED; + } + + // Realize a physical pen (driver expects RealizeObject output, not a logical pen) + if (!handle->penRealized || handle->penRealizedColor != color) { + if (!realizePen(handle, color)) { + return WDRV_ERR_UNSUPPORTED; + } + } + + // Allocate 16-bit memory for the point array, offsetting Y coordinates + uint32_t ptsSize = count * sizeof(Point16T); + uint32_t ptsLinear; + uint16_t ptsSel = alloc16BitBlock(ptsSize, &ptsLinear); + if (ptsSel == 0) { + return WDRV_ERR_NO_MEMORY; + } + memcpy((void *)ptsLinear, points, ptsSize); + { + Point16T *dst = (Point16T *)ptsLinear; + for (int16_t pi = 0; pi < count; pi++) { + dst[pi].y += handle->dispYOffset; + } + } + + // Output(lpDstDev, style, count, lpPoints, lpPen, lpBrush, lpDrawMode, lpClipRect) + uint16_t dgSel = handle->neMod.autoDataSel; + uint16_t params[14]; + int i = 0; + params[i++] = dgSel; + params[i++] = handle->pdevOff; + params[i++] = OS_POLYLINE; + params[i++] = count; + params[i++] = ptsSel; + params[i++] = 0; + params[i++] = dgSel; // lpPen in DGROUP (physical pen) + params[i++] = handle->penOff; + params[i++] = 0; // lpBrush = NULL + params[i++] = 0; + params[i++] = dgSel; + params[i++] = handle->drawModeOff; + params[i++] = 0; // lpClipRect = NULL + params[i++] = 0; + + waitForEngine(); + + uint32_t result = thunkCall16(&gThunkCtx, + handle->ddiEntry[DDI_ORD_OUTPUT].sel, + handle->ddiEntry[DDI_ORD_OUTPUT].off, + params, i); + + waitForEngine(); + + free16BitBlock(ptsSel, ptsLinear); + + return ((int16_t)(result & 0xFFFF)) ? WDRV_OK : WDRV_ERR_UNSUPPORTED; +} + + +int32_t wdrvRectangle(WdrvHandleT handle, int16_t x, int16_t y, int16_t w, int16_t h, uint32_t color) +{ + // Use Output with OS_RECTANGLE for outlined rectangle + if (!handle || !handle->enabled) { + return WDRV_ERR_NOT_ENABLED; + } + if (!handle->ddiEntry[DDI_ORD_OUTPUT].present) { + return WDRV_ERR_UNSUPPORTED; + } + + // Realize a physical pen (driver expects RealizeObject output, not a logical pen) + if (!handle->penRealized || handle->penRealizedColor != color) { + if (!realizePen(handle, color)) { + return WDRV_ERR_UNSUPPORTED; + } + } + + Point16T pts[2]; + pts[0].x = x; + pts[0].y = y + handle->dispYOffset; + pts[1].x = x + w; + pts[1].y = y + h + handle->dispYOffset; + + uint32_t ptsLinear; + uint16_t ptsSel = alloc16BitBlock(sizeof(pts), &ptsLinear); + if (ptsSel == 0) { + return WDRV_ERR_NO_MEMORY; + } + memcpy((void *)ptsLinear, pts, sizeof(pts)); + + uint16_t dgSel = handle->neMod.autoDataSel; + uint16_t params[14]; + int i = 0; + params[i++] = dgSel; + params[i++] = handle->pdevOff; + params[i++] = OS_RECTANGLE; + params[i++] = 2; + params[i++] = ptsSel; + params[i++] = 0; + params[i++] = dgSel; // lpPen in DGROUP (physical pen) + params[i++] = handle->penOff; + params[i++] = dgSel; + params[i++] = handle->brushOff; + params[i++] = dgSel; + params[i++] = handle->drawModeOff; + params[i++] = 0; // lpClipRect = NULL + params[i++] = 0; + + waitForEngine(); + + uint32_t result = thunkCall16(&gThunkCtx, + handle->ddiEntry[DDI_ORD_OUTPUT].sel, + handle->ddiEntry[DDI_ORD_OUTPUT].off, + params, i); + + waitForEngine(); + + free16BitBlock(ptsSel, ptsLinear); + + return ((int16_t)(result & 0xFFFF)) ? WDRV_OK : WDRV_ERR_UNSUPPORTED; +} + + +// ============================================================================ +// Palette operations +// ============================================================================ + +int32_t wdrvSetPalette(WdrvHandleT handle, int32_t startIndex, int32_t count, const uint8_t *colors) +{ + if (!handle || !handle->enabled) { + return WDRV_ERR_NOT_ENABLED; + } + if (!handle->ddiEntry[DDI_ORD_SETPALETTE].present) { + return WDRV_ERR_UNSUPPORTED; + } + + // SetPalette(nStartIndex:WORD, nNumEntries:WORD, lpPalette:DWORD) + // Pascal order: nStartIndex(1w), nNumEntries(1w), lpPalette(2w) + // Total: 4 words + + // Allocate 16-bit memory for the palette data + uint32_t palSize = count * 4; // RGBQUAD per entry + uint32_t palLinear; + uint16_t palSel = alloc16BitBlock(palSize, &palLinear); + if (palSel == 0) { + return WDRV_ERR_NO_MEMORY; + } + memcpy((void *)palLinear, colors, palSize); + + uint16_t params[4]; + params[0] = (uint16_t)startIndex; + params[1] = (uint16_t)count; + params[2] = palSel; + params[3] = 0; + + thunkCall16(&gThunkCtx, + handle->ddiEntry[DDI_ORD_SETPALETTE].sel, + handle->ddiEntry[DDI_ORD_SETPALETTE].off, + params, 4); + + free16BitBlock(palSel, palLinear); + return WDRV_OK; +} + + +// ============================================================================ +// Framebuffer access +// ============================================================================ + +void *wdrvGetFramebuffer(WdrvHandleT handle) +{ + if (!handle || !handle->enabled) { + return NULL; + } + return handle->vramPtr; +} + + +int32_t wdrvGetPitch(WdrvHandleT handle) +{ + if (!handle || !handle->enabled) { + return 0; + } + return handle->pitch; +} + + +// ============================================================================ +// Error handling +// ============================================================================ + +int32_t wdrvGetLastError(void) +{ + return gLastError; +} + + +const char *wdrvGetLastErrorString(void) +{ + switch (gLastError) { + case WDRV_OK: return "no error"; + case WDRV_ERR_INIT: return "initialization failed"; + case WDRV_ERR_NO_DPMI: return "DPMI not available"; + case WDRV_ERR_FILE_NOT_FOUND: return "file not found"; + case WDRV_ERR_BAD_FORMAT: return "not a valid NE executable"; + case WDRV_ERR_LOAD_FAILED: return "failed to load driver"; + case WDRV_ERR_NO_MEMORY: return "out of memory"; + case WDRV_ERR_RELOC_FAILED: return "relocation failed"; + case WDRV_ERR_NO_ENTRY: return "required DDI entry not found"; + case WDRV_ERR_ENABLE_FAILED: return "driver Enable() failed"; + case WDRV_ERR_THUNK_FAILED: return "thunk setup failed"; + case WDRV_ERR_NOT_LOADED: return "no driver loaded"; + case WDRV_ERR_NOT_ENABLED: return "driver not enabled"; + case WDRV_ERR_UNSUPPORTED: return "operation not supported"; + default: return "unknown error"; + } +} + + +void wdrvSetDebug(bool enable) +{ + gDebug = enable; + extern void neSetDebug(bool enable); + neSetDebug(enable); + thunkSetDebug(enable); + stubSetDebug(enable); +} + + +void wdrvDumpSegmentBases(WdrvHandleT handle) +{ + if (!handle) { + return; + } + + logErr("=== NE Module Segment Bases ===\n"); + for (int i = 0; i < handle->neMod.segmentCount; i++) { + LoadedSegT *seg = &handle->neMod.segments[i]; + unsigned long base = 0; + __dpmi_get_segment_base_address(seg->selector, &base); + unsigned long limit = __dpmi_get_segment_limit(seg->selector); + logErr(" seg[%d] sel=%04X base=0x%08lX limit=0x%08lX size=%" PRIu32 " %s\n", + i + 1, seg->selector, base, limit, seg->size, + seg->isCode ? "CODE" : "DATA"); + } + + unsigned long dgBase = 0; + __dpmi_get_segment_base_address(handle->neMod.autoDataSel, &dgBase); + logErr(" DGROUP sel=%04X base=0x%08lX\n", handle->neMod.autoDataSel, dgBase); + logErr(" pdevOff=%04X brushOff=%04X drawModeOff=%04X\n", + handle->pdevOff, handle->brushOff, handle->drawModeOff); + logErr(" dgroupObjBase=0x%" PRIX32 " pdevLinear=0x%" PRIX32 "\n", + handle->dgroupObjBase, handle->pdevLinear); +} + + +// ============================================================================ +// Internal implementation +// ============================================================================ + +static FarPtr16T importResolver(const char *moduleName, uint16_t ordinal, const char *funcName) +{ + return stubResolveImport(&gStubCtx, moduleName, ordinal, funcName); +} + + +static bool resolveDriverEntries(struct WdrvDriverS *drv) +{ + // Resolve all known DDI ordinals + static const uint16_t ddiOrdinals[] = { + DDI_ORD_BITBLT, DDI_ORD_COLORINFO, DDI_ORD_CONTROL, + DDI_ORD_DISABLE, DDI_ORD_ENABLE, DDI_ORD_ENUMDFFONTS, + DDI_ORD_ENUMOBJ, DDI_ORD_OUTPUT, DDI_ORD_PIXEL, + DDI_ORD_REALIZEOBJECT, DDI_ORD_STRBLT, DDI_ORD_SCANLR, + DDI_ORD_DEVICEMODE, DDI_ORD_EXTTEXTOUT, DDI_ORD_GETCHARWIDTH, + DDI_ORD_DEVICEBITMAP, DDI_ORD_FASTBORDER, DDI_ORD_SETATTRIBUTE, + DDI_ORD_DIBTODEVICE, DDI_ORD_CREATEBITMAP, DDI_ORD_DELETEBITMAP, + DDI_ORD_SELECTBITMAP, DDI_ORD_BITMAPBITS, DDI_ORD_RECLIP, + DDI_ORD_GETPALETTE, DDI_ORD_SETPALETTE, DDI_ORD_SETPALETTETRANS, + DDI_ORD_UPDATECOLORS, DDI_ORD_STRETCHBLT, DDI_ORD_STRETCHDIBITS, + DDI_ORD_SELECTPALETTE, + DDI_ORD_INQUIRE, DDI_ORD_SETCURSOR, DDI_ORD_MOVECURSOR, + DDI_ORD_CHECKCRSR, + 0 // Sentinel + }; + + int found = 0; + for (int i = 0; ddiOrdinals[i] != 0; i++) { + uint16_t ord = ddiOrdinals[i]; + uint16_t seg; + uint16_t off; + uint16_t sel; + + if (neLookupExport(&drv->neMod, ord, &seg, &off, &sel)) { + drv->ddiEntry[ord].sel = sel; + drv->ddiEntry[ord].off = off; + drv->ddiEntry[ord].present = true; + found++; + + dbg("windrv: DDI ord %u -> %04X:%04X\n", ord, sel, off); + } + } + + dbg("windrv: resolved %d DDI entry points\n", found); + return found > 0; +} + + +// Extend DGROUP to include space for GDI objects. +// Layout within the extension area (16-byte aligned): +// +0x0000: PDEVICE (4096 bytes) +// +0x1000: PhysBrush (128 bytes) +// +0x1080: LogBrush (16 bytes) +// +0x1090: DrawMode (48 bytes) +// +0x10C0: PhysPen (128 bytes) +// +0x1140: LogPen (16 bytes) +// Total: 0x1150 bytes +#define DGROUP_OBJ_PDEV_OFF 0x0000 +#define DGROUP_OBJ_BRUSH_OFF 0x1000 +#define DGROUP_OBJ_LOGBRUSH_OFF 0x1080 +#define DGROUP_OBJ_DRAWMODE_OFF 0x1090 +#define DGROUP_OBJ_PEN_OFF 0x10C0 +#define DGROUP_OBJ_LOGPEN_OFF 0x1140 +#define DGROUP_OBJ_PHYSCOLOR_OFF 0x1150 +#define DGROUP_OBJ_TOTAL_SIZE 0x1158 + +static bool extendDgroupForObjects(struct WdrvDriverS *drv) +{ + int dgIdx = drv->neMod.neHeader.autoDataSegIndex - 1; + if (dgIdx < 0 || dgIdx >= drv->neMod.segmentCount) { + logErr("windrv: no DGROUP segment\n"); + return false; + } + + uint32_t oldSize = drv->neMod.segments[dgIdx].size; + + // Align object area start to 16 bytes + uint32_t objBase = (oldSize + 15) & ~15; + + // The S3 driver uses DGROUP offsets well beyond the initial data for + // graphics engine working buffers (e.g., 0xA6E8, 0xBEE8). In Windows + // 3.x, DGROUP is typically the full 64K segment. Extend to 64K to + // ensure the driver has all the working space it expects. + uint32_t targetSize = 0x10000; + if (objBase + DGROUP_OBJ_TOTAL_SIZE > targetSize) { + logErr("windrv: DGROUP objects don't fit in 64K\n"); + return false; + } + uint32_t extraBytes = targetSize - oldSize; + + uint32_t oldSizeOut; + if (!neExtendSegment(&drv->neMod, dgIdx, extraBytes, &oldSizeOut)) { + return false; + } + + uint32_t dgLinear = drv->neMod.segments[dgIdx].linearAddr; + + // Initialize DGROUP stack management fields if needed. In real Windows, + // KERNEL sets these during module loading. VGA.DRV ships with + // [0x0A]=0xFFFF which its stack check function interprets as "no stack + // space available", causing all deep functions (BitBlt, etc.) to fail. + // Only patch if the original data has the 0xFFFF sentinel. + { + uint16_t *dgWords = (uint16_t *)dgLinear; + if (dgWords[5] == 0xFFFF) { + dgWords[5] = (uint16_t)objBase; // [0x0A] pStackBot + dbg("windrv: patched DGROUP stack bottom [0x0A] from FFFF to %04X\n", + (uint16_t)objBase); + } + if (dgWords[4] == 0xFFFF) { + dgWords[4] = 0xFFFE; // [0x08] pStackMin + } + } + + drv->dgroupObjBase = objBase; + + drv->pdevOff = (uint16_t)(objBase + DGROUP_OBJ_PDEV_OFF); + drv->pdevLinear = dgLinear + objBase + DGROUP_OBJ_PDEV_OFF; + drv->pdevSize = PDEVICE_MAX_SIZE; + + drv->brushOff = (uint16_t)(objBase + DGROUP_OBJ_BRUSH_OFF); + drv->brushLinear = dgLinear + objBase + DGROUP_OBJ_BRUSH_OFF; + + drv->logBrushOff = (uint16_t)(objBase + DGROUP_OBJ_LOGBRUSH_OFF); + drv->logBrushLinear = dgLinear + objBase + DGROUP_OBJ_LOGBRUSH_OFF; + + drv->drawModeOff = (uint16_t)(objBase + DGROUP_OBJ_DRAWMODE_OFF); + drv->drawModeLinear = dgLinear + objBase + DGROUP_OBJ_DRAWMODE_OFF; + + drv->penOff = (uint16_t)(objBase + DGROUP_OBJ_PEN_OFF); + drv->penLinear = dgLinear + objBase + DGROUP_OBJ_PEN_OFF; + + drv->logPenOff = (uint16_t)(objBase + DGROUP_OBJ_LOGPEN_OFF); + drv->logPenLinear = dgLinear + objBase + DGROUP_OBJ_LOGPEN_OFF; + + drv->physColorOff = (uint16_t)(objBase + DGROUP_OBJ_PHYSCOLOR_OFF); + drv->physColorLinear = dgLinear + objBase + DGROUP_OBJ_PHYSCOLOR_OFF; + + dbg("windrv: DGROUP extended by %" PRIu32 " bytes (old=%" PRIu32 " new=%" PRIu32 ")\n", + extraBytes, oldSize, drv->neMod.segments[dgIdx].size); + dbg("windrv: DGROUP objects: pdev=%04X brush=%04X logBrush=%04X drawMode=%04X pen=%04X logPen=%04X\n", + drv->pdevOff, drv->brushOff, drv->logBrushOff, drv->drawModeOff, drv->penOff, drv->logPenOff); + + return true; +} + + +static bool allocPDevice(struct WdrvDriverS *drv) +{ + // PDEVICE is pre-allocated within DGROUP by extendDgroupForObjects + memset((void *)drv->pdevLinear, 0, drv->pdevSize); + return true; +} + + +static bool allocDrawMode(struct WdrvDriverS *drv) +{ + // DrawMode is pre-allocated within DGROUP + DrawMode16T *dm = (DrawMode16T *)drv->drawModeLinear; + memset(dm, 0, sizeof(DrawMode16T)); + dm->rop2 = R2_COPYPEN; + dm->bkMode = BM_OPAQUE; + return true; +} + + +static bool allocBrushBuffers(struct WdrvDriverS *drv) +{ + // Both brushes are pre-allocated within DGROUP + LogBrush16T *lb = (LogBrush16T *)drv->logBrushLinear; + memset(lb, 0, sizeof(LogBrush16T)); + lb->lbStyle = BS_SOLID; + lb->lbColor = 0x00FFFFFF; + + memset((void *)drv->brushLinear, 0, PHYS_OBJ_MAX_SIZE); + drv->brushRealized = false; + return true; +} + + +static bool allocPenBuffers(struct WdrvDriverS *drv) +{ + // Both pens are pre-allocated within DGROUP + LogPen16T *lp = (LogPen16T *)drv->logPenLinear; + memset(lp, 0, sizeof(LogPen16T)); + lp->lopnStyle = PS_SOLID; + lp->lopnWidth.x = 1; + lp->lopnWidth.y = 0; + lp->lopnColor = 0x00000000; + + memset((void *)drv->penLinear, 0, PHYS_OBJ_MAX_SIZE); + drv->penRealized = false; + return true; +} + + +static uint32_t colorToPhys(struct WdrvDriverS *drv, uint32_t colorRef) +{ + if (!drv->ddiEntry[DDI_ORD_COLORINFO].present) { + return colorRef; + } + + // DWORD PASCAL ColorInfo(LPDEVICE lpDevice, DWORD dwColorIn, + // LPDWORD lpPhysColor) + // Pascal push order: lpDevice(2w), dwColorIn(2w), lpPhysColor(2w) + uint16_t dgSel = drv->neMod.autoDataSel; + uint16_t params[6]; + params[0] = dgSel; // lpDevice seg + params[1] = drv->pdevOff; // lpDevice off + params[2] = (uint16_t)(colorRef >> 16); // dwColorIn high + params[3] = (uint16_t)(colorRef); // dwColorIn low + params[4] = dgSel; // lpPhysColor seg + params[5] = drv->physColorOff; // lpPhysColor off + + // Clear the output buffer + *(uint32_t *)drv->physColorLinear = 0; + + waitForEngine(); + + thunkCall16(&gThunkCtx, + drv->ddiEntry[DDI_ORD_COLORINFO].sel, + drv->ddiEntry[DDI_ORD_COLORINFO].off, + params, 6); + + waitForEngine(); + + uint32_t physColor = *(uint32_t *)drv->physColorLinear; + dbg("windrv: ColorInfo(0x%06lX) -> phys 0x%08lX\n", + (unsigned long)colorRef, (unsigned long)physColor); + + return physColor; +} + + +static void setDisplayStart(struct WdrvDriverS *drv, uint32_t byteOffset) +{ + (void)drv; + + // S3 display start address is in units of 4 bytes (DWORDs). + // CR0C:CR0D = bits 15:0, CR31[5:4] = bits 17:16, CR51[1:0] = bits 19:18 + uint32_t startAddr = byteOffset / 4; + uint16_t crtcBase = (inportb(0x3CC) & 0x01) ? 0x3D4 : 0x3B4; + + // Unlock S3 registers + outportb(crtcBase, 0x38); + outportb(crtcBase + 1, 0x48); + outportb(crtcBase, 0x39); + outportb(crtcBase + 1, 0xA5); + + // Write display start address bits 15:0 + outportb(crtcBase, 0x0D); + outportb(crtcBase + 1, (uint8_t)(startAddr & 0xFF)); + outportb(crtcBase, 0x0C); + outportb(crtcBase + 1, (uint8_t)((startAddr >> 8) & 0xFF)); + + // Write bits 17:16 to CR31 + outportb(crtcBase, 0x31); + uint8_t cr31 = inportb(crtcBase + 1); + cr31 = (cr31 & ~0x30) | (uint8_t)(((startAddr >> 16) & 0x03) << 4); + outportb(crtcBase + 1, cr31); + + // Write bits 19:18 to CR51 + outportb(crtcBase, 0x51); + uint8_t cr51 = inportb(crtcBase + 1); + cr51 = (cr51 & ~0x03) | (uint8_t)((startAddr >> 18) & 0x03); + outportb(crtcBase + 1, cr51); + + dbg("windrv: display start set to byte offset %lu (reg=0x%lX)\n", + (unsigned long)byteOffset, (unsigned long)startAddr); +} + + +static bool realizeBrush(struct WdrvDriverS *drv, uint32_t color) +{ + if (!drv->ddiEntry[DDI_ORD_REALIZEOBJECT].present) { + return false; + } + + uint16_t dgSel = drv->neMod.autoDataSel; + + // Set up the logical brush + LogBrush16T *lb = (LogBrush16T *)drv->logBrushLinear; + lb->lbStyle = BS_SOLID; + lb->lbColor = color; + lb->lbHatch = 0; + + // Clear the physical brush buffer + memset((void *)drv->brushLinear, 0, PHYS_OBJ_MAX_SIZE); + + // RealizeObject(lpDevice, nStyle, lpInObj, lpOutObj, lpTextXForm) + // Pascal push order: left-to-right + uint16_t params[9]; + params[0] = dgSel; // lpDevice seg + params[1] = drv->pdevOff; // lpDevice off + params[2] = OBJ_BRUSH; // nStyle + params[3] = dgSel; // lpInObj seg + params[4] = drv->logBrushOff; // lpInObj off + params[5] = dgSel; // lpOutObj seg + params[6] = drv->brushOff; // lpOutObj off + params[7] = 0; // lpTextXForm seg (NULL) + params[8] = 0; // lpTextXForm off (NULL) + + waitForEngine(); + + uint32_t result = thunkCall16(&gThunkCtx, + drv->ddiEntry[DDI_ORD_REALIZEOBJECT].sel, + drv->ddiEntry[DDI_ORD_REALIZEOBJECT].off, + params, 9); + + waitForEngine(); + + dbg("windrv: RealizeObject(brush, color=0x%06lX) returned %d\n", + (unsigned long)color, (int16_t)(result & 0xFFFF)); + + if ((int16_t)(result & 0xFFFF) > 0) { + drv->brushRealized = true; + drv->brushRealizedColor = color; + + // Dump the first 16 bytes of the realized brush + uint8_t *bdata = (uint8_t *)drv->brushLinear; + dbg("windrv: brush[0..15]:"); + for (int k = 0; k < 16; k++) { + dbg(" %02X", bdata[k]); + } + dbg("\n"); + + return true; + } + + return false; +} + + +static bool realizePen(struct WdrvDriverS *drv, uint32_t color) +{ + if (!drv->ddiEntry[DDI_ORD_REALIZEOBJECT].present) { + return false; + } + + uint16_t dgSel = drv->neMod.autoDataSel; + + // Set up the logical pen + LogPen16T *lp = (LogPen16T *)drv->logPenLinear; + lp->lopnStyle = PS_SOLID; + lp->lopnWidth.x = 1; + lp->lopnWidth.y = 0; + lp->lopnColor = color; + + // Clear the physical pen buffer + memset((void *)drv->penLinear, 0, PHYS_OBJ_MAX_SIZE); + + // RealizeObject(lpDevice, nStyle, lpInObj, lpOutObj, lpTextXForm) + // Pascal push order: left-to-right + uint16_t params[9]; + params[0] = dgSel; // lpDevice seg + params[1] = drv->pdevOff; // lpDevice off + params[2] = OBJ_PEN; // nStyle + params[3] = dgSel; // lpInObj seg + params[4] = drv->logPenOff; // lpInObj off + params[5] = dgSel; // lpOutObj seg + params[6] = drv->penOff; // lpOutObj off + params[7] = 0; // lpTextXForm seg (NULL) + params[8] = 0; // lpTextXForm off (NULL) + + waitForEngine(); + + uint32_t result = thunkCall16(&gThunkCtx, + drv->ddiEntry[DDI_ORD_REALIZEOBJECT].sel, + drv->ddiEntry[DDI_ORD_REALIZEOBJECT].off, + params, 9); + + waitForEngine(); + + dbg("windrv: RealizeObject(pen, color=0x%06lX) returned %d\n", + (unsigned long)color, (int16_t)(result & 0xFFFF)); + + if ((int16_t)(result & 0xFFFF) > 0) { + drv->penRealized = true; + drv->penRealizedColor = color; + + // Dump the first 16 bytes of the realized pen + uint8_t *pdata = (uint8_t *)drv->penLinear; + dbg("windrv: pen[0..15]:"); + for (int k = 0; k < 16; k++) { + dbg(" %02X", pdata[k]); + } + dbg("\n"); + + return true; + } + + return false; +} + + +static void freeDrawObjects(struct WdrvDriverS *drv) +{ + // Objects are embedded in DGROUP - freed when module is unloaded + drv->brushRealized = false; + drv->penRealized = false; +} + + +static uint16_t alloc16BitBlock(uint32_t size, uint32_t *linearOut) +{ + uint8_t *mem = (uint8_t *)calloc(1, size); + if (!mem) { + return 0; + } + + uint32_t ptrVal = (uint32_t)mem; + + int sel = __dpmi_allocate_ldt_descriptors(1); + if (sel < 0) { + free(mem); + return 0; + } + + // True linear address = DJGPP pointer + DS base + __dpmi_set_segment_base_address(sel, ptrVal + __djgpp_base_address); + __dpmi_set_segment_limit(sel, size - 1); + __dpmi_set_descriptor_access_rights(sel, 0x00F2); // 16-bit data RW + + *linearOut = ptrVal; + return (uint16_t)sel; +} + + +static void free16BitBlock(uint16_t sel, uint32_t linear) +{ + if (sel) { + __dpmi_free_ldt_descriptor(sel); + } + if (linear) { + free((void *)linear); + } +} + + +static void setError(int32_t err) +{ + gLastError = err; +} + + +static void waitForEngine(void) +{ + if (!gIsS3) { + return; + } + + // Wait for the S3 graphics engine to become idle by polling GP_STAT. + // Bit 9 (0x0200) = hardware busy. + for (int i = 0; i < 100000; i++) { + uint16_t stat = inportw(0x9AE8); + if (!(stat & 0x0200)) { + break; + } + } +} + + +// Declared in file-scope asm above +extern void int10hRawHandler(void); + +static bool installInt10hReflector(void) +{ + // Save DJGPP's DS selector for the assembly stub. + // The stub uses CS-relative addressing to load this value since + // DS is undefined on PM interrupt handler entry. + gInt10hDsSel = _my_ds(); + gInt10hStackTop = (uint32_t)gInt10hStack + sizeof(gInt10hStack); + + __dpmi_get_protected_mode_interrupt_vector(0x10, &gOldInt10hVec); + + __dpmi_paddr newVec; + newVec.offset32 = (unsigned long)int10hRawHandler; + newVec.selector = _my_cs(); + + if (__dpmi_set_protected_mode_interrupt_vector(0x10, &newVec) != 0) { + return false; + } + + gInt10hInstalled = true; + return true; +} + + +static void removeInt10hReflector(void) +{ + if (gInt10hInstalled) { + __dpmi_set_protected_mode_interrupt_vector(0x10, &gOldInt10hVec); + gInt10hInstalled = false; + } +} + + +static bool installDpmi300Proxy(void) +{ + gDpmi300DsSel = _my_ds(); + gDpmi300StackTop = (uint32_t)gDpmi300Stack + sizeof(gDpmi300Stack); + + __dpmi_get_protected_mode_interrupt_vector(DPMI300_INT_NUM, &gOldDpmi300Vec); + + __dpmi_paddr newVec; + newVec.offset32 = (unsigned long)dpmi300RawHandler; + newVec.selector = _my_cs(); + + if (__dpmi_set_protected_mode_interrupt_vector(DPMI300_INT_NUM, &newVec) != 0) { + return false; + } + + gDpmi300Installed = true; + dbg("windrv: DPMI 300h proxy installed on INT %02Xh\n", DPMI300_INT_NUM); + return true; +} + + +// Search a loaded driver's code segments for the DoInt10h INT 31h instruction +// and patch it to use our proxy interrupt instead. DoInt10h builds a RMCS on +// the stack and then does: +// mov ax, 0300h ; B8 00 03 +// ... +// int 31h ; CD 31 +// We find "CD 31" within a small window after "B8 00 03" and change the 0x31 +// to DPMI300_INT_NUM (0x64). +static bool patchDoInt10h(struct WdrvDriverS *drv) +{ + bool patched = false; + + for (int s = 0; s < drv->neMod.segmentCount; s++) { + if (!drv->neMod.segments[s].isCode) { + continue; + } + + uint16_t sel = drv->neMod.segments[s].selector; + uint32_t lin = drv->neMod.segments[s].linearAddr; + uint32_t size = drv->neMod.segments[s].size; + + // Scan for "B8 00 03" (mov ax, 0300h) + for (uint32_t i = 0; i + 2 < size; i++) { + uint8_t b0 = *(uint8_t *)(lin + i); + uint8_t b1 = *(uint8_t *)(lin + i + 1); + uint8_t b2 = *(uint8_t *)(lin + i + 2); + + if (b0 != 0xB8 || b1 != 0x00 || b2 != 0x03) { + continue; + } + + // Found "mov ax, 0300h" at offset i. Search ahead for "CD 31". + uint32_t searchEnd = i + 24; + if (searchEnd > size - 1) { + searchEnd = size - 1; + } + + for (uint32_t j = i + 3; j + 1 <= searchEnd; j++) { + uint8_t c0 = *(uint8_t *)(lin + j); + uint8_t c1 = *(uint8_t *)(lin + j + 1); + + if (c0 == 0xCD && c1 == 0x31) { + // Create a data alias for the code segment so we can write + uint16_t dataSel = __dpmi_create_alias_descriptor(sel); + if (dataSel == 0) { + logErr("windrv: patchDoInt10h: cannot create alias for seg %d\n", s); + break; + } + + // Patch 0x31 -> DPMI300_INT_NUM + _farpokeb(dataSel, j + 1, DPMI300_INT_NUM); + + // Verify + uint8_t verify = _farpeekb(sel, j + 1); + dbg("windrv: patched INT 31h -> INT %02Xh at seg%d:%04" PRIX32 + " (verify: %02X)\n", DPMI300_INT_NUM, s + 1, j, verify); + + __dpmi_free_ldt_descriptor(dataSel); + patched = true; + break; + } + } + } + } + + if (!patched) { + dbg("windrv: patchDoInt10h: no INT 31h found after MOV AX,0300h\n"); + } + return patched; +} + + +// Patch hardcoded "mov ax, 0040h; mov es, ax" in driver code segments. +// +// physical_enable in VGA.ASM loads ES with the literal value 0x0040 to +// access the BIOS data area. In real Windows 3.1, selector 0x0040 either +// maps to 0040:0000 or is trapped by the VDD. Under CWSDPMI, 0x0040 is +// an invalid ring-0 GDT selector that causes a GPF. +// +// We scan for the byte pattern B8 40 00 8E C0 (mov ax,0040h; mov es,ax) +// and patch the immediate to our biosDataSel from the stub context. +static bool patchBiosDataAccess(struct WdrvDriverS *drv) +{ + uint16_t biosSel = gStubCtx.biosDataSel; + if (biosSel == 0) { + logErr("windrv: patchBiosDataAccess: no biosDataSel\n"); + return false; + } + + bool patched = false; + + for (int s = 0; s < drv->neMod.segmentCount; s++) { + if (!drv->neMod.segments[s].isCode) { + continue; + } + + uint16_t sel = drv->neMod.segments[s].selector; + uint32_t lin = drv->neMod.segments[s].linearAddr; + uint32_t size = drv->neMod.segments[s].size; + + for (uint32_t i = 0; i + 4 < size; i++) { + uint8_t *p = (uint8_t *)(lin + i); + // B8 40 00 8E C0 = mov ax, 0040h; mov es, ax + if (p[0] == 0xB8 && p[1] == 0x40 && p[2] == 0x00 && + p[3] == 0x8E && p[4] == 0xC0) { + uint16_t dataSel = __dpmi_create_alias_descriptor(sel); + if (dataSel == 0) { + logErr("windrv: patchBiosDataAccess: cannot create alias for seg %d\n", s); + break; + } + + _farpokeb(dataSel, i + 1, (uint8_t)(biosSel & 0xFF)); + _farpokeb(dataSel, i + 2, (uint8_t)(biosSel >> 8)); + + uint8_t v0 = _farpeekb(sel, i + 1); + uint8_t v1 = _farpeekb(sel, i + 2); + logErr("windrv: patched mov ax,0040h -> mov ax,%04Xh at seg%d:%04" PRIX32 + " (verify: %02X %02X)\n", biosSel, s + 1, i, v0, v1); + + __dpmi_free_ldt_descriptor(dataSel); + patched = true; + } + } + } + + if (!patched) { + dbg("windrv: patchBiosDataAccess: pattern not found (OK for some drivers)\n"); + } + return patched; +} + + +// Repatch __WINFLAGS in all driver segments. +// +// The NE loader patches __WINFLAGS (KERNEL.178) into the driver's code/data +// segments at relocation time. After Enable(style=1) reveals the driver type, +// we may need to change WF_ENHANCED to WF_STANDARD for VGA-class drivers +// whose Enable(style=0) hangs waiting for a VDD that doesn't exist. +// +// We scan all segments for the 16-bit word pattern and replace it. +static void patchWinFlags(struct WdrvDriverS *drv, uint16_t oldFlags, uint16_t newFlags) +{ + if (oldFlags == newFlags) { + return; + } + + uint8_t oldLo = (uint8_t)(oldFlags & 0xFF); + uint8_t oldHi = (uint8_t)(oldFlags >> 8); + uint8_t newLo = (uint8_t)(newFlags & 0xFF); + uint8_t newHi = (uint8_t)(newFlags >> 8); + int count = 0; + + for (int s = 0; s < drv->neMod.segmentCount; s++) { + uint16_t sel = drv->neMod.segments[s].selector; + uint32_t lin = drv->neMod.segments[s].linearAddr; + uint32_t size = drv->neMod.segments[s].size; + bool isCode = drv->neMod.segments[s].isCode; + + if (size < 2) { + continue; + } + + // Need a writable alias for code segments + uint16_t dataSel = 0; + if (isCode) { + dataSel = __dpmi_create_alias_descriptor(sel); + if (dataSel == 0) { + continue; + } + } + + for (uint32_t i = 0; i + 1 < size; i++) { + uint8_t *p = (uint8_t *)(lin + i); + if (p[0] == oldLo && p[1] == oldHi) { + if (isCode) { + _farpokeb(dataSel, i, newLo); + _farpokeb(dataSel, i + 1, newHi); + } else { + p[0] = newLo; + p[1] = newHi; + } + count++; + } + } + + if (dataSel != 0) { + __dpmi_free_ldt_descriptor(dataSel); + } + } + + if (count > 0) { + dbg("windrv: patched %d __WINFLAGS locations: 0x%04X -> 0x%04X\n", + count, oldFlags, newFlags); + } +} + + +static void removeDpmi300Proxy(void) +{ + if (gDpmi300Installed) { + __dpmi_set_protected_mode_interrupt_vector(DPMI300_INT_NUM, &gOldDpmi300Vec); + gDpmi300Installed = false; + } +} + + +// Declared in file-scope asm above +extern void exc0dRawHandler(void); +extern void exc0eRawHandler(void); + +static bool installExceptionCapture(void) +{ + // Initialize fault handler stack + gFaultStackTop = (uint32_t)gFaultStack + sizeof(gFaultStack); + + // Get old exception handlers + __dpmi_get_processor_exception_handler_vector(0x0D, &gOldExc0D); + __dpmi_get_processor_exception_handler_vector(0x0E, &gOldExc0E); + + // Copy to packed far pointers for asm indirect far jumps + gOldExc0dFar.offset = (uint32_t)gOldExc0D.offset32; + gOldExc0dFar.selector = (uint16_t)gOldExc0D.selector; + gOldExc0eFar.offset = (uint32_t)gOldExc0E.offset32; + gOldExc0eFar.selector = (uint16_t)gOldExc0E.selector; + + // Install our handlers + __dpmi_paddr newVec; + newVec.selector = _my_cs(); + + newVec.offset32 = (unsigned long)exc0dRawHandler; + if (__dpmi_set_processor_exception_handler_vector(0x0D, &newVec) != 0) { + return false; + } + + newVec.offset32 = (unsigned long)exc0eRawHandler; + if (__dpmi_set_processor_exception_handler_vector(0x0E, &newVec) != 0) { + __dpmi_set_processor_exception_handler_vector(0x0D, &gOldExc0D); + return false; + } + + gExcCaptureInstalled = true; + return true; +} + + +static void removeExceptionCapture(void) +{ + if (gExcCaptureInstalled) { + __dpmi_set_processor_exception_handler_vector(0x0D, &gOldExc0D); + __dpmi_set_processor_exception_handler_vector(0x0E, &gOldExc0E); + gExcCaptureInstalled = false; + } +} + + +static bool installInt2FhHandler(void) +{ + __dpmi_paddr oldVec; + __dpmi_get_protected_mode_interrupt_vector(0x2F, &oldVec); + gOldInt2FhVec = oldVec; + gOldInt2FhFar.offset = oldVec.offset32; + gOldInt2FhFar.selector = oldVec.selector; + + __dpmi_paddr newVec; + newVec.offset32 = (unsigned long)int2FhRawHandler; + newVec.selector = _my_cs(); + if (__dpmi_set_protected_mode_interrupt_vector(0x2F, &newVec) != 0) { + return false; + } + + gInt2FhInstalled = true; + return true; +} + + +static void removeInt2FhHandler(void) +{ + if (gInt2FhInstalled) { + __dpmi_set_protected_mode_interrupt_vector(0x2F, &gOldInt2FhVec); + gInt2FhInstalled = false; + } +} + + +static void dbg(const char *fmt, ...) +{ + if (!gDebug) { + return; + } + va_list ap; + va_start(ap, fmt); + logErrV(fmt, ap); + va_end(ap); +} + + +// Patch Windows PROLOG_0 sequences in all code segments. +// +// The Windows 3.x module loader converts the 3-byte function prolog +// 8C D8 90 (mov ax, ds ; nop) +// to +// B8 xx xx (mov ax, ) +// +// This ensures AX holds the correct DGROUP selector when the function +// body executes "push ds ; mov ds, ax" for FAR entry. +// +// However, NEAR calls enter at offset+3 (skipping the mov ax), so AX +// may be clobbered. Since DS is always DGROUP at both entry paths +// (the relay sets it for far calls, the caller preserves it for near +// calls), the "mov ds, ax" is redundant. We NOP it out so the +// function simply does "push ds" (saving DGROUP for the epilog) and +// continues with DS already correct. +// +// Full original 10-byte prolog: +// 8C D8 90 mov ax, ds ; nop offset+0 (far entry) +// 45 inc bp offset+3 (near entry) +// 55 push bp offset+4 +// 8B EC mov bp, sp offset+5 +// 1E push ds offset+7 +// 8E D8 mov ds, ax offset+8 +// +// Patched: +// B8 xx xx mov ax, DGROUP offset+0 (for far entry AX) +// 45 inc bp offset+3 +// 55 push bp offset+4 +// 8B EC mov bp, sp offset+5 +// 1E push ds offset+7 +// 90 90 nop ; nop offset+8 (DS already correct) +// Patch Win16 PROLOG_0/PROLOG_1 function prologs and their matching epilogs. +// +// Win16 PROLOG_0 functions use `inc bp` to mark far frames for stack walking +// and `dec bp` in the epilog to undo it. The Windows kernel needs these odd +// BP markers for stack traversal and memory management, but our DOS environment +// has no such requirement. Leaving them in causes frame pointer corruption +// when the odd BP propagates through the call chain. +// +// Prolog pattern (two variants): +// 8C D8 90 45 55 8B EC [1E 8E D8] mov ax,ds; nop; inc bp; push bp; mov bp,sp; [push ds; mov ds,ax] +// B8 XX XX 45 55 8B EC [1E 8E D8] mov ax,IMMED; inc bp; push bp; mov bp,sp; [push ds; mov ds,ax] +// +// Epilog pattern: +// 5D 4D CB pop bp; dec bp; retf +// 5D 4D C3 pop bp; dec bp; ret +// +// Patches applied: +// - 8C D8 90 → B8 DGROUP_LO DGROUP_HI (load correct DGROUP selector) +// - 45 → 90 (NOP out inc bp) +// - 8E D8 → 90 90 (NOP out mov ds,ax — DS already set by thunk) +// - 4D → 90 in epilog (NOP out dec bp) +static void patchPrologs(NeModuleT *mod) +{ + uint16_t dgroupSel = mod->autoDataSel; + int prologCount = 0; + int epilogCount = 0; + + for (int s = 0; s < mod->segmentCount; s++) { + if (!mod->segments[s].isCode) { + continue; + } + + uint8_t *base = (uint8_t *)mod->segments[s].linearAddr; + uint32_t size = mod->segments[s].size; + + // Pass 1: Patch prologs — find "45 55 8B EC" (inc bp; push bp; mov bp,sp) + for (uint32_t i = 0; i + 3 < size; i++) { + if (base[i] != 0x45 || + base[i + 1] != 0x55 || + base[i + 2] != 0x8B || + base[i + 3] != 0xEC) { + continue; + } + + // NOP out inc bp + base[i] = 0x90; + prologCount++; + + // If preceded by "8C D8 90" (mov ax,ds; nop), patch to mov ax,DGROUP + if (i >= 3 && + base[i - 3] == 0x8C && + base[i - 2] == 0xD8 && + base[i - 1] == 0x90) { + base[i - 3] = 0xB8; + base[i - 2] = (uint8_t)(dgroupSel & 0xFF); + base[i - 1] = (uint8_t)(dgroupSel >> 8); + } + + // "1E 8E D8" (push ds; mov ds,ax) must be kept intact! + // The driver expects DS = DGROUP for all DS-relative data access. + // Do NOT NOP these out. + } + + // Pass 2: Patch epilogs — find "5D 4D" followed by any return: + // CB = retf, C3 = ret, CA xx xx = retf N, C2 xx xx = ret N + // Pascal calling convention uses retf N (CA) to clean parameters, + // so most epilogs are "5D 4D CA xx xx", not "5D 4D CB". + for (uint32_t i = 0; i + 2 < size; i++) { + if (base[i] == 0x5D && + base[i + 1] == 0x4D && + (base[i + 2] == 0xCB || base[i + 2] == 0xC3 || + base[i + 2] == 0xCA || base[i + 2] == 0xC2)) { + base[i + 1] = 0x90; + epilogCount++; + } + } + } + + dbg("windrv: patched %d prologs, %d epilogs (DGROUP=0x%04X)\n", + prologCount, epilogCount, dgroupSel); +} + + +// Patch VFLATD initialization code to avoid a 20-byte stack imbalance. +// +// The VFLATD init code at seg5:0x2368 is a subroutine (no prolog, near ret +// at 0x252B) called from the mode setup function. It allocates DOS memory +// via GlobalDOSAlloc/GlobalAlloc/GlobalLock/GetCurrentPDB, pushing 20 bytes +// of intermediate values onto the stack. All exit paths converge at 0x2519 +// (GlobalFree) -> 0x2522 (SetSwapAreaSize) -> 0x252B (ret) WITHOUT cleaning +// these 20 bytes. +// +// In real Windows 3.x the caller at 0x3613 restores SP from BP, so the +// imbalance is harmless. But our thunk returns via a clean `ret`, which +// pops 0x2362 (junk) instead of the real return address 0x3613, landing +// in the middle of a `lea sp,[bp-2]` instruction -> SIGILL. +// +// There are TWO entry points to this init code: +// 0x22C5: wrapper function that checks [0EE9] and proceeds with init +// 0x2368: direct entry from mode setup (after VBE mode set) +// +// Fix: patch BOTH to C3 (near ret) so neither path executes VFLATD init. +// With LFB mode forced via DPMI (patchVflatdBypassCall), VFLATD setup +// is unnecessary. +static void patchVflatdStackBug(NeModuleT *mod) +{ + int segIdx = -1; + for (int s = 0; s < mod->segmentCount; s++) { + if (mod->segments[s].isCode && mod->segments[s].size > 0x2369) { + uint8_t *base = (uint8_t *)mod->segments[s].linearAddr; + // Verify the call at 0x02A4 targets 0x22C5: E8 1E 20 + if (base[0x02A4] == 0xE8 && base[0x02A5] == 0x1E && + base[0x02A6] == 0x20) { + segIdx = s; + break; + } + } + } + + if (segIdx < 0) { + dbg("windrv: VFLATD init patch: pattern not found, skipping\n"); + return; + } + + uint8_t *base = (uint8_t *)mod->segments[segIdx].linearAddr; + + // Patch wrapper function at 0x22C5 to immediate return + base[0x22C5] = 0xC3; + dbg("windrv: patched VFLATD init wrapper at seg %d offset 0x22C5 (ret)\n", segIdx); + + // Patch direct entry at 0x2368 (C6 06 A4 49 00 = mov byte [49A4],0) + if (base[0x2368] == 0xC6 && base[0x2369] == 0x06 && + base[0x236A] == 0xA4 && base[0x236B] == 0x49) { + base[0x2368] = 0xC3; + dbg("windrv: patched VFLATD init direct entry at seg %d offset 0x2368 (ret)\n", segIdx); + } else { + dbg("windrv: VFLATD init direct entry at 0x2368: unexpected bytes, skipping\n"); + } +} + + +// Bypass the VFLATD API call at seg5:0x3FD4. +// +// The driver checks [DS:8889] to decide between two framebuffer paths: +// [8889] == 0xFF: DPMI path (allocate descriptor, map physical via INT 31h) +// [8889] != 0xFF: VFLATD path (call far through [DS:0D76]) +// +// Since VFLATD is not available, the far pointer at [0D76] is null, causing +// a GPF. Force the DPMI path by patching the conditional jump to unconditional. +// +// Original at 0x3FA9: 80 3E 89 88 FF 74 32 (cmp byte [8889],0xFF; jz +0x32) +// Patched: EB 37 90 90 90 90 90 (jmp +0x37; nop*5) +// +// Both reach 0x3FE2 which uses DPMI INT 31h functions 0800h/0007h/0008h +// to map the physical framebuffer — fully supported by CWSDPMI. +static void patchVflatdBypassCall(NeModuleT *mod) +{ + int segIdx = -1; + for (int s = 0; s < mod->segmentCount; s++) { + if (mod->segments[s].isCode && mod->segments[s].size > 0x3FB0) { + uint8_t *base = (uint8_t *)mod->segments[s].linearAddr; + if (base[0x3FA9] == 0x80 && base[0x3FAA] == 0x3E && + base[0x3FAB] == 0x89 && base[0x3FAC] == 0x88 && + base[0x3FAD] == 0xFF && base[0x3FAE] == 0x74 && + base[0x3FAF] == 0x32) { + segIdx = s; + break; + } + } + } + + if (segIdx < 0) { + dbg("windrv: VFLATD bypass patch: pattern not found, skipping\n"); + return; + } + + uint8_t *base = (uint8_t *)mod->segments[segIdx].linearAddr; + + // 0x3FA9: EB 37 jmp 0x3FE2 (unconditional -> DPMI path) + // 0x3FAB: 90*5 nop padding + base[0x3FA9] = 0xEB; + base[0x3FAA] = 0x37; + base[0x3FAB] = 0x90; + base[0x3FAC] = 0x90; + base[0x3FAD] = 0x90; + base[0x3FAE] = 0x90; + base[0x3FAF] = 0x90; + + dbg("windrv: patched VFLATD bypass at seg %d offset 0x3FA9\n", segIdx); + + // NOP all "call far [DS:0D76]" (FF 1E 76 0D) in the code segment. + // These call through the VFLATD entry point which is null since VFLATD + // isn't present. With LFB mode via DPMI, bank switching is unnecessary. + uint32_t segSize = mod->segments[segIdx].size; + int nopCount = 0; + for (uint32_t i = 0; i + 3 < segSize; i++) { + if (base[i] == 0xFF && base[i + 1] == 0x1E && + base[i + 2] == 0x76 && base[i + 3] == 0x0D) { + base[i] = 0x90; + base[i + 1] = 0x90; + base[i + 2] = 0x90; + base[i + 3] = 0x90; + dbg("windrv: NOPed VFLATD call at seg %d offset 0x%04" PRIX32 "\n", segIdx, i); + nopCount++; + } + } + dbg("windrv: NOPed %d VFLATD call(s) total\n", nopCount); +} diff --git a/win31drv/windrv.h b/win31drv/windrv.h new file mode 100644 index 0000000..eb7e2cf --- /dev/null +++ b/win31drv/windrv.h @@ -0,0 +1,189 @@ +#ifndef WINDRV_H +#define WINDRV_H + +// ============================================================================ +// windrv.h - Public API for using Windows 3.x display drivers from DOS +// +// This library loads Windows 3.x accelerated display drivers (16-bit NE +// format DLLs) and provides a clean 32-bit C API for DOS programs compiled +// with DJGPP to use their hardware-accelerated drawing functions. +// +// The library handles: +// - NE executable loading with segment relocation +// - 32-bit to 16-bit protected mode thunking via DPMI +// - Windows API stub functions that drivers import +// - DDI (Device Driver Interface) function wrappers +// +// Usage: +// 1. Call wdrvInit() to initialize the library +// 2. Call wdrvLoadDriver() with path to a .DRV file +// 3. Call wdrvEnable() to set a video mode +// 4. Use drawing functions (wdrvBitBlt, wdrvLine, etc.) +// 5. Call wdrvDisable() to restore text mode +// 6. Call wdrvUnloadDriver() and wdrvShutdown() to clean up +// ============================================================================ + +#include +#include +#include "wintypes.h" + +// ============================================================================ +// Error codes +// ============================================================================ + +#define WDRV_OK 0 +#define WDRV_ERR_INIT -1 // Initialization failed +#define WDRV_ERR_NO_DPMI -2 // DPMI not available or insufficient +#define WDRV_ERR_FILE_NOT_FOUND -3 // Driver file not found +#define WDRV_ERR_BAD_FORMAT -4 // Not a valid NE executable +#define WDRV_ERR_LOAD_FAILED -5 // Failed to load driver segments +#define WDRV_ERR_NO_MEMORY -6 // Out of memory (conventional or extended) +#define WDRV_ERR_RELOC_FAILED -7 // Relocation processing failed +#define WDRV_ERR_NO_ENTRY -8 // Required DDI entry point not found +#define WDRV_ERR_ENABLE_FAILED -9 // Driver Enable() call failed +#define WDRV_ERR_THUNK_FAILED -10 // Thunk setup failed +#define WDRV_ERR_NOT_LOADED -11 // No driver loaded +#define WDRV_ERR_NOT_ENABLED -12 // Driver not enabled +#define WDRV_ERR_UNSUPPORTED -13 // Operation not supported by driver + +// ============================================================================ +// Opaque driver handle +// ============================================================================ + +typedef struct WdrvDriverS *WdrvHandleT; + +// ============================================================================ +// Driver information (returned by wdrvGetInfo) +// ============================================================================ + +typedef struct { + char driverName[64]; // Module name from NE header + uint16_t driverVersion; // Driver version number + int32_t maxWidth; // Maximum supported width + int32_t maxHeight; // Maximum supported height + int32_t maxBpp; // Maximum bits per pixel + int32_t numColors; // Number of colors + uint32_t rasterCaps; // Raster capability bits (RC_*) + bool hasBitBlt; // Driver exports BitBlt + bool hasOutput; // Driver exports Output (lines, shapes) + bool hasPixel; // Driver exports Pixel + bool hasStretchBlt; // Driver exports StretchBlt + bool hasExtTextOut; // Driver exports ExtTextOut + bool hasSetPalette; // Driver exports SetPalette + bool hasSetCursor; // Driver exports SetCursor +} WdrvInfoT; + +// ============================================================================ +// BitBlt parameters +// ============================================================================ + +typedef struct { + int16_t dstX; + int16_t dstY; + int16_t srcX; + int16_t srcY; + int16_t width; + int16_t height; + uint32_t rop3; // Raster operation (SRCCOPY, PATCOPY, etc.) +} WdrvBitBltParamsT; + +// ============================================================================ +// Library initialization / shutdown +// ============================================================================ + +// Initialize the library. Must be called before any other functions. +// Sets up DPMI descriptors, thunk infrastructure, and API stubs. +int32_t wdrvInit(void); + +// Shut down the library and free all resources. +void wdrvShutdown(void); + +// ============================================================================ +// Driver loading +// ============================================================================ + +// Load a Windows 3.x display driver (.DRV file). +// Returns a driver handle on success, NULL on failure. +// Call wdrvGetLastError() for details on failure. +WdrvHandleT wdrvLoadDriver(const char *driverPath); + +// Unload a previously loaded driver. +void wdrvUnloadDriver(WdrvHandleT handle); + +// Get information about a loaded driver. +// The driver must be loaded but need not be enabled. +int32_t wdrvGetInfo(WdrvHandleT handle, WdrvInfoT *info); + +// ============================================================================ +// Mode setting +// ============================================================================ + +// Enable the driver (set video mode and initialize hardware). +// width/height/bpp are the requested mode; the driver may adjust. +// Pass 0 for defaults (driver's preferred resolution). +int32_t wdrvEnable(WdrvHandleT handle, int32_t width, int32_t height, int32_t bpp); + +// Disable the driver (restore text mode, release hardware). +int32_t wdrvDisable(WdrvHandleT handle); + +// ============================================================================ +// Drawing operations +// ============================================================================ + +// Block transfer (hardware-accelerated if supported). +int32_t wdrvBitBlt(WdrvHandleT handle, WdrvBitBltParamsT *params); + +// Solid rectangle fill using PatBlt with a solid brush. +int32_t wdrvFillRect(WdrvHandleT handle, int16_t x, int16_t y, int16_t w, int16_t h, uint32_t color); + +// Set a single pixel. +int32_t wdrvSetPixel(WdrvHandleT handle, int16_t x, int16_t y, uint32_t color); + +// Get a single pixel's color. +uint32_t wdrvGetPixel(WdrvHandleT handle, int16_t x, int16_t y); + +// Draw a polyline using the Output DDI function. +int32_t wdrvPolyline(WdrvHandleT handle, Point16T *points, int16_t count, uint32_t color); + +// Draw a rectangle outline. +int32_t wdrvRectangle(WdrvHandleT handle, int16_t x, int16_t y, int16_t w, int16_t h, uint32_t color); + +// ============================================================================ +// Palette operations (for 8bpp modes) +// ============================================================================ + +// Set palette entries. colors is an array of RGBQUAD (R,G,B,flags). +int32_t wdrvSetPalette(WdrvHandleT handle, int32_t startIndex, int32_t count, const uint8_t *colors); + +// ============================================================================ +// Direct framebuffer access +// ============================================================================ + +// Get a near pointer to the linear framebuffer (if available). +// Returns NULL if the driver doesn't provide linear access. +void *wdrvGetFramebuffer(WdrvHandleT handle); + +// Get the framebuffer pitch (bytes per scanline). +int32_t wdrvGetPitch(WdrvHandleT handle); + +// ============================================================================ +// Error reporting +// ============================================================================ + +// Get the last error code. +int32_t wdrvGetLastError(void); + +// Get a human-readable description of the last error. +const char *wdrvGetLastErrorString(void); + +// ============================================================================ +// Debugging +// ============================================================================ + +// Enable/disable verbose debug output to stderr. +void wdrvSetDebug(bool enable); + +// Dump all segment base addresses for debugging. +void wdrvDumpSegmentBases(WdrvHandleT handle); + +#endif // WINDRV_H diff --git a/win31drv/winstub.c b/win31drv/winstub.c new file mode 100644 index 0000000..5b7853c --- /dev/null +++ b/win31drv/winstub.c @@ -0,0 +1,1400 @@ +// ============================================================================ +// winstub.c - Windows API function stubs +// +// Provides minimal implementations of KERNEL, GDI, USER, and DIBENG +// functions that Windows 3.x display drivers import. Each stub is +// registered as a 16-bit callback via the thunking layer. +// +// These stubs implement just enough behavior for a display driver to +// initialize and perform basic drawing operations. Many stubs simply +// return success without doing real work. +// ============================================================================ + +#include +#include +#include +#include +#include +#include +#include + +#include "thunk.h" +#include "winstub.h" +#include "wintypes.h" +#include "log.h" + +static bool gStubDebug = false; + +// Forward declarations - KERNEL stubs +static uint32_t stubFatalExit(uint16_t *p, uint16_t n); +static uint32_t stubGlobalAlloc(uint16_t *p, uint16_t n); +static uint32_t stubGlobalFree(uint16_t *p, uint16_t n); +static uint32_t stubGlobalLock(uint16_t *p, uint16_t n); +static uint32_t stubGlobalUnlock(uint16_t *p, uint16_t n); +static uint32_t stubGlobalRealloc(uint16_t *p, uint16_t n); +static uint32_t stubGlobalSize(uint16_t *p, uint16_t n); +static uint32_t stubGlobalDOSAlloc(uint16_t *p, uint16_t n); +static uint32_t stubGlobalDOSFree(uint16_t *p, uint16_t n); +static uint32_t stubLocalInit(uint16_t *p, uint16_t n); +static uint32_t stubLocalAlloc(uint16_t *p, uint16_t n); +static uint32_t stubLocalReAlloc(uint16_t *p, uint16_t n); +static uint32_t stubLocalFree(uint16_t *p, uint16_t n); +static uint32_t stubLocalLock(uint16_t *p, uint16_t n); +static uint32_t stubLocalUnlock(uint16_t *p, uint16_t n); +static uint32_t stubLocalSize(uint16_t *p, uint16_t n); +static uint32_t stubGetFreeSpace(uint16_t *p, uint16_t n); +static uint32_t stubGetCurrentPDB(uint16_t *p, uint16_t n); +static uint32_t stubGetModuleHandle(uint16_t *p, uint16_t n); +static uint32_t stubGetModuleUsage(uint16_t *p, uint16_t n); +static uint32_t stubGetProcAddress(uint16_t *p, uint16_t n); +static uint32_t stubGetWinFlags(uint16_t *p, uint16_t n); +static uint32_t stubGetVersion(uint16_t *p, uint16_t n); +static uint32_t stubGetProfileInt(uint16_t *p, uint16_t n); +static uint32_t stubGetPrivateProfileInt(uint16_t *p, uint16_t n); +static uint32_t stubGetPrivateProfileString(uint16_t *p, uint16_t n); +static uint32_t stubGetDOSEnvironment(uint16_t *p, uint16_t n); +static uint32_t stubGetSystemDirectory(uint16_t *p, uint16_t n); +static uint32_t stubAllocSelector(uint16_t *p, uint16_t n); +static uint32_t stubFreeSelector(uint16_t *p, uint16_t n); +static uint32_t stubAllocCStoDSAlias(uint16_t *p, uint16_t n); +static uint32_t stubAllocDSToCSAlias(uint16_t *p, uint16_t n); +static uint32_t stubLoadLibrary(uint16_t *p, uint16_t n); +static uint32_t stubPrestoChangoSelector(uint16_t *p, uint16_t n); +static uint32_t stubSetSelectorBase(uint16_t *p, uint16_t n); +static uint32_t stubSetSelectorLimit(uint16_t *p, uint16_t n); +static uint32_t stubGetSelectorLimit(uint16_t *p, uint16_t n); +static uint32_t stubGetSelectorBase(uint16_t *p, uint16_t n); +static uint32_t stubSelectorAccessRights(uint16_t *p, uint16_t n); +static uint32_t stubGetModuleFileName(uint16_t *p, uint16_t n); +static uint32_t stubOutputDebugString(uint16_t *p, uint16_t n); +static uint32_t stubWriteProfileString(uint16_t *p, uint16_t n); +static uint32_t stubGetExePtr(uint16_t *p, uint16_t n); + +// Forward declarations - GDI stubs +static uint32_t stubGetDeviceCaps(uint16_t *p, uint16_t n); +static uint32_t stubDummy(uint16_t *p, uint16_t n); + +// Forward declarations - USER stubs +static uint32_t stubGetSystemMetrics(uint16_t *p, uint16_t n); +static uint32_t stubMessageBox(uint16_t *p, uint16_t n); + +// Helper to register a single stub and add it to the lookup table +static bool registerStub(StubContextT *ctx, const char *module, uint16_t ordinal, + ThunkCallbackT func, uint16_t paramWords); + + +void stubSetDebug(bool debug) +{ + gStubDebug = debug; +} + + +bool stubInit(StubContextT *ctx, ThunkContextT *thunkCtx) +{ + memset(ctx, 0, sizeof(StubContextT)); + ctx->thunkCtx = thunkCtx; + ctx->nextHandle = 0x1000; // Start handle allocation here + + // Create well-known memory region selectors. + // Win 3.x drivers import these as KERNEL "variables" (__A000H, __0040H, etc.) + // and use them for direct hardware access. + + // Helper macro: allocate a 16-bit data selector for a physical address range + #define MAKE_MEM_SEL(field, physAddr, limit) do { \ + int _s = __dpmi_allocate_ldt_descriptors(1); \ + if (_s > 0) { \ + __dpmi_set_segment_base_address(_s, (physAddr)); \ + __dpmi_set_segment_limit(_s, (limit)); \ + __dpmi_set_descriptor_access_rights(_s, 0x00F2); \ + ctx->field = (uint16_t)_s; \ + } \ + } while (0) + + MAKE_MEM_SEL(biosDataSel, 0x00400, 0x00FF); // 0040:0000 (256 bytes) + MAKE_MEM_SEL(vramSel, 0xA0000, 0xFFFF); // A000:0000 (64K) + MAKE_MEM_SEL(monoTextSel, 0xB0000, 0x7FFF); // B000:0000 (32K) + MAKE_MEM_SEL(colorTextSel, 0xB8000, 0x7FFF); // B800:0000 (32K) + MAKE_MEM_SEL(videoBiosSel, 0xC0000, 0x7FFF); // C000:0000 (32K) + MAKE_MEM_SEL(upperMemD000Sel, 0xD0000, 0xFFFF); // D000:0000 (64K) + MAKE_MEM_SEL(upperMemE000Sel, 0xE0000, 0xFFFF); // E000:0000 (64K) + MAKE_MEM_SEL(sysBiosSel, 0xF0000, 0xFFFF); // F000:0000 (64K) + + #undef MAKE_MEM_SEL + + // ================================================================ + // Register KERNEL stubs + // ================================================================ + + // Error handling + registerStub(ctx, "KERNEL", KERNEL_ORD_FATALEXIT, stubFatalExit, 1); + registerStub(ctx, "KERNEL", KERNEL_ORD_FATALAPPEXIT, stubFatalExit, 3); + + // Global memory + registerStub(ctx, "KERNEL", KERNEL_ORD_GLOBALALLOC, stubGlobalAlloc, 3); + registerStub(ctx, "KERNEL", KERNEL_ORD_GLOBALREALLOC, stubGlobalRealloc, 4); + registerStub(ctx, "KERNEL", KERNEL_ORD_GLOBALFREE, stubGlobalFree, 1); + registerStub(ctx, "KERNEL", KERNEL_ORD_GLOBALLOCK, stubGlobalLock, 1); + registerStub(ctx, "KERNEL", KERNEL_ORD_GLOBALUNLOCK, stubGlobalUnlock, 1); + registerStub(ctx, "KERNEL", KERNEL_ORD_GLOBALSIZE, stubGlobalSize, 1); + registerStub(ctx, "KERNEL", KERNEL_ORD_GLOBALFLAGS, stubDummy, 1); + registerStub(ctx, "KERNEL", KERNEL_ORD_GLOBALDOSALLOC, stubGlobalDOSAlloc, 2); + registerStub(ctx, "KERNEL", KERNEL_ORD_GLOBALDOSFREE, stubGlobalDOSFree, 1); + registerStub(ctx, "KERNEL", KERNEL_ORD_GLOBALDOSALLOC2, stubGlobalDOSAlloc, 2); + registerStub(ctx, "KERNEL", KERNEL_ORD_GLOBALDOSFREE2, stubGlobalDOSFree, 1); + + // Local memory + registerStub(ctx, "KERNEL", KERNEL_ORD_LOCALINIT, stubLocalInit, 3); + registerStub(ctx, "KERNEL", KERNEL_ORD_LOCALALLOC, stubLocalAlloc, 2); + registerStub(ctx, "KERNEL", KERNEL_ORD_LOCALREALLOC, stubLocalReAlloc, 3); + registerStub(ctx, "KERNEL", KERNEL_ORD_LOCALFREE, stubLocalFree, 1); + registerStub(ctx, "KERNEL", KERNEL_ORD_LOCALLOCK, stubLocalLock, 1); + registerStub(ctx, "KERNEL", KERNEL_ORD_LOCALUNLOCK, stubLocalUnlock, 1); + registerStub(ctx, "KERNEL", KERNEL_ORD_LOCALSIZE, stubLocalSize, 1); + + // Memory info + registerStub(ctx, "KERNEL", KERNEL_ORD_GETFREESPACE, stubGetFreeSpace, 1); + registerStub(ctx, "KERNEL", KERNEL_ORD_LOCKSEGMENT, stubDummy, 1); + registerStub(ctx, "KERNEL", KERNEL_ORD_UNLOCKSEGMENT, stubDummy, 1); + registerStub(ctx, "KERNEL", KERNEL_ORD_SETSWAPAREA, stubDummy, 1); + registerStub(ctx, "KERNEL", KERNEL_ORD_GETCURRENTPDB, stubGetCurrentPDB, 0); + + // Module management + registerStub(ctx, "KERNEL", KERNEL_ORD_GETMODULEHANDLE, stubGetModuleHandle, 2); + registerStub(ctx, "KERNEL", KERNEL_ORD_GETMODULEUSAGE, stubGetModuleUsage, 1); + registerStub(ctx, "KERNEL", KERNEL_ORD_GETPROFILEINT, stubGetProfileInt, 5); + registerStub(ctx, "KERNEL", KERNEL_ORD_GETPROFILEINT2, stubGetProfileInt, 5); + registerStub(ctx, "KERNEL", KERNEL_ORD_WRITEPROFILESTRING, stubWriteProfileString, 6); + registerStub(ctx, "KERNEL", KERNEL_ORD_GETMODULEFILENAME, stubGetModuleFileName, 4); + registerStub(ctx, "KERNEL", KERNEL_ORD_GETPROCADDRESS, stubGetProcAddress, 3); + registerStub(ctx, "KERNEL", KERNEL_ORD_LOADLIBRARY, stubLoadLibrary, 2); + registerStub(ctx, "KERNEL", KERNEL_ORD_FREELIBRARY, stubDummy, 1); + registerStub(ctx, "KERNEL", KERNEL_ORD_INITTASK, stubDummy, 0); + registerStub(ctx, "KERNEL", KERNEL_ORD_GETEXEPTR, stubGetExePtr, 1); + + // Resource management (return 0 / not found) + registerStub(ctx, "KERNEL", KERNEL_ORD_FINDRESOURCE, stubDummy, 3); + registerStub(ctx, "KERNEL", KERNEL_ORD_LOADRESOURCE, stubDummy, 2); + registerStub(ctx, "KERNEL", KERNEL_ORD_FREERESOURCE, stubDummy, 1); + registerStub(ctx, "KERNEL", KERNEL_ORD_LOCKRESOURCE, stubDummy, 1); + registerStub(ctx, "KERNEL", KERNEL_ORD_SIZEOFRESOURCE, stubDummy, 3); + + // System info + registerStub(ctx, "KERNEL", KERNEL_ORD_GETWINFLAGS, stubGetWinFlags, 0); + registerStub(ctx, "KERNEL", KERNEL_ORD_GETVERSION, stubGetVersion, 0); + registerStub(ctx, "KERNEL", KERNEL_ORD_GETPRIVATEPROFILEINT, stubGetPrivateProfileInt, 7); + registerStub(ctx, "KERNEL", KERNEL_ORD_GETPRIVATEPROFILESTRING, stubGetPrivateProfileString, 11); + registerStub(ctx, "KERNEL", KERNEL_ORD_WRITEPRIVATEPROFILESTRING, stubDummy, 8); + registerStub(ctx, "KERNEL", KERNEL_ORD_GETDOSENVIRONMENT, stubGetDOSEnvironment, 0); + registerStub(ctx, "KERNEL", KERNEL_ORD_GETSYSTEMDIRECTORY, stubGetSystemDirectory, 3); + + // Selector management + registerStub(ctx, "KERNEL", KERNEL_ORD_ALLOCSELECTOR, stubAllocSelector, 1); + registerStub(ctx, "KERNEL", KERNEL_ORD_FREESELECTOR, stubFreeSelector, 1); + registerStub(ctx, "KERNEL", KERNEL_ORD_ALLOCCSTODSALIAS, stubAllocCStoDSAlias, 1); + registerStub(ctx, "KERNEL", KERNEL_ORD_ALLOCDSTOCSALIAS, stubAllocDSToCSAlias, 1); + registerStub(ctx, "KERNEL", KERNEL_ORD_PRESTOCHANGOSELECTOR, stubPrestoChangoSelector, 2); + registerStub(ctx, "KERNEL", KERNEL_ORD_SETSELECTORBASE, stubSetSelectorBase, 3); + registerStub(ctx, "KERNEL", KERNEL_ORD_SETSELECTORLIMIT, stubSetSelectorLimit, 3); + registerStub(ctx, "KERNEL", KERNEL_ORD_GETSELECTORLIMIT, stubGetSelectorLimit, 1); + registerStub(ctx, "KERNEL", KERNEL_ORD_GETSELECTORBASE2, stubGetSelectorBase, 1); + registerStub(ctx, "KERNEL", KERNEL_ORD_SELECTORACCESSRIGHTS, stubSelectorAccessRights, 3); + + // Debug + registerStub(ctx, "KERNEL", KERNEL_ORD_OUTPUTDEBUGSTRING, stubOutputDebugString, 2); + + // Variable imports (__WINFLAGS, __0040H, __A000H, __AHSHIFT, __AHINCR, etc.) + // are handled as special cases in stubResolveImport, not as callbacks. + + // ================================================================ + // Register GDI stubs + // ================================================================ + + // GetDeviceCaps(hdc:WORD, index:WORD) -> int + registerStub(ctx, "GDI", GDI_ORD_GETDEVICECAPS, stubGetDeviceCaps, 2); + + // CreateDC, DeleteDC, SelectObject, DeleteObject - simple stubs + registerStub(ctx, "GDI", GDI_ORD_CREATEDC, stubDummy, 8); + registerStub(ctx, "GDI", GDI_ORD_DELETEDC, stubDummy, 1); + registerStub(ctx, "GDI", GDI_ORD_SELECTOBJECT, stubDummy, 2); + registerStub(ctx, "GDI", GDI_ORD_DELETEOBJECT, stubDummy, 1); + registerStub(ctx, "GDI", GDI_ORD_SETBKCOLOR, stubDummy, 3); + registerStub(ctx, "GDI", GDI_ORD_SETTEXTCOLOR, stubDummy, 3); + + // ================================================================ + // Register DIBENG stubs (return 0 so driver uses its own code). + // Parameter counts MUST match the real DDI signatures so the + // generated retf N correctly pops the caller's parameters. + // ================================================================ + + // DIBBitBlt: lpDst(2) DstX DstY lpSrc(2) SrcX SrcY xExt yExt Rop3(2) lpBrush(2) lpDM(2) = 16 + registerStub(ctx, "DIBENG", DIBENG_ORD_DIBBITBLT, stubDummy, 16); + // DIBOutput: lpDst(2) style count lpPts(2) lpPen(2) lpBrush(2) lpDM(2) lpClip(2) = 14 + registerStub(ctx, "DIBENG", DIBENG_ORD_DIBOUTPUT, stubDummy, 14); + // DIBPixel: lpDev(2) x y color(2) lpDM(2) = 8 + registerStub(ctx, "DIBENG", DIBENG_ORD_DIBPIXEL, stubDummy, 8); + // DIBStrBlt: lpDst(2) DstX DstY DstXE DstYE lpSrc(2) SrcX SrcY SrcXE SrcYE Rop3(2) lpBrush(2) lpDM(2) lpClip(2) = 20 + registerStub(ctx, "DIBENG", DIBENG_ORD_DIBSTRBLT, stubDummy, 20); + // DIBColorInfo: lpDev(2) color(2) lpPColor(2) = 6 + registerStub(ctx, "DIBENG", DIBENG_ORD_DIBCOLORINFO, stubDummy, 6); + // DIBRealize: lpDev(2) style lpIn(2) lpOut(2) lpTXF(2) = 9 + registerStub(ctx, "DIBENG", DIBENG_ORD_DIBREALIZE, stubDummy, 9); + // DIBCreateBitmap (BitmapBits): lpDev(2) flags(2) count(2) lpBits(2) = 8 + registerStub(ctx, "DIBENG", DIBENG_ORD_DIBCREATEBITMAP, stubDummy, 8); + // DIBScanLR: lpDev(2) x y color(2) style = 7 + registerStub(ctx, "DIBENG", DIBENG_ORD_DIBSCANLR, stubDummy, 7); + // DIBExtOut: lpDev(2) x y lpClip(2) lpStr(2) count lpFont(2) lpDM(2) lpTXF(2) lpWidths(2) lpOpaque(2) opts = 20 + registerStub(ctx, "DIBENG", DIBENG_ORD_DIBEXTOUT, stubDummy, 20); + + // ================================================================ + // Register USER stubs + // ================================================================ + + // GetSystemMetrics(nIndex:WORD) -> int + registerStub(ctx, "USER", USER_ORD_GETSYSTEMMETRICS, stubGetSystemMetrics, 1); + + // MessageBox(hwnd:WORD, lpText:DWORD, lpCaption:DWORD, type:WORD) -> int + registerStub(ctx, "USER", USER_ORD_MESSAGEBOX, stubMessageBox, 6); + + // ================================================================ + // Register KEYBOARD stubs + // ================================================================ + + // ScreenSwitchEnable(wEnable:WORD) -> void + registerStub(ctx, "KEYBOARD", KEYBOARD_ORD_SCREENSWITCHENABLE, stubDummy, 1); + + // ================================================================ + // Pre-allocate DOS memory pool for GlobalDOSAlloc. + // Sub-allocate from it in stubGlobalDOSAlloc to avoid + // calling __dpmi_allocate_dos_memory at runtime. + // ================================================================ + { + uint16_t poolParas = STUB_DOS_POOL_SIZE / 16; + int poolSel; + int poolSeg = __dpmi_allocate_dos_memory(poolParas, &poolSel); + if (poolSeg < 0) { + logErr("stub: failed to allocate DOS memory pool\n"); + return false; + } + ctx->dosPoolSeg = (uint16_t)poolSeg; + ctx->dosPoolSel = (uint16_t)poolSel; + ctx->dosPoolParas = poolParas; + ctx->dosPoolNextPara = 0; + } + + ctx->initialized = true; + return true; +} + + +void stubShutdown(StubContextT *ctx) +{ + // Free allocated memory blocks + for (int i = 0; i < STUB_MAX_ALLOCS; i++) { + if (ctx->allocs[i].inUse) { + if (ctx->allocs[i].selector) { + __dpmi_free_ldt_descriptor(ctx->allocs[i].selector); + } + if (ctx->allocs[i].linearAddr) { + free((void *)ctx->allocs[i].linearAddr); + } + ctx->allocs[i].inUse = false; + } + } + + // Free DOS memory pool sub-allocation selectors + for (int i = 0; i < STUB_MAX_DOS_ALLOCS; i++) { + if (ctx->dosAllocs[i].inUse) { + if (ctx->dosAllocs[i].selector) { + __dpmi_free_ldt_descriptor(ctx->dosAllocs[i].selector); + } + ctx->dosAllocs[i].inUse = false; + } + } + + // Free the DOS memory pool itself + if (ctx->dosPoolSel) { + __dpmi_free_dos_memory(ctx->dosPoolSel); + ctx->dosPoolSel = 0; + ctx->dosPoolSeg = 0; + } + + // Free extra selectors + for (int i = 0; i < STUB_MAX_SELECTORS; i++) { + if (ctx->selectors[i].inUse) { + __dpmi_free_ldt_descriptor(ctx->selectors[i].selector); + ctx->selectors[i].inUse = false; + } + } + + // Free well-known memory selectors + uint16_t *memSels[] = { + &ctx->biosDataSel, &ctx->vramSel, &ctx->monoTextSel, + &ctx->colorTextSel, &ctx->videoBiosSel, &ctx->upperMemD000Sel, + &ctx->upperMemE000Sel, &ctx->sysBiosSel, NULL + }; + for (int i = 0; memSels[i]; i++) { + if (*memSels[i]) { + __dpmi_free_ldt_descriptor(*memSels[i]); + *memSels[i] = 0; + } + } + + ctx->initialized = false; +} + + +void stubSetModule(StubContextT *ctx, NeModuleT *mod) +{ + ctx->neModule = mod; +} + + +FarPtr16T stubResolveImport(StubContextT *ctx, const char *moduleName, + uint16_t ordinal, const char *funcName) +{ + // ================================================================ + // KERNEL "variable" imports + // + // These are not functions - they're resolved as segment selector + // values that get patched into the driver's code/data via + // relocation fixups. The selector goes in the segment field of + // the returned far pointer; the offset is 0. + // ================================================================ + if (strcasecmp(moduleName, "KERNEL") == 0) { + switch (ordinal) { + case KERNEL_ORD___0040H: + // Value import: selector goes in offset field for OFFSET relocations. + return makeFarPtr16(0, ctx->biosDataSel); + case KERNEL_ORD___A000H: + return makeFarPtr16(0, ctx->vramSel); + case KERNEL_ORD___B000H: + return makeFarPtr16(0, ctx->monoTextSel); + case KERNEL_ORD___B800H: + return makeFarPtr16(0, ctx->colorTextSel); + case KERNEL_ORD___C000H: + return makeFarPtr16(0, ctx->videoBiosSel); + case KERNEL_ORD___D000H: + return makeFarPtr16(0, ctx->upperMemD000Sel); + case KERNEL_ORD___E000H: + return makeFarPtr16(0, ctx->upperMemE000Sel); + case KERNEL_ORD___F000H: + case KERNEL_ORD___ROMBIOS: + return makeFarPtr16(0, ctx->sysBiosSel); + case KERNEL_ORD___WINFLAGS: { + // Value import: the NE relocation is OFFSET type, so the + // value must go in the offset field to be patched correctly. + uint16_t flags = WF_PMODE | WF_CPU386 | WF_ENHANCED; + return makeFarPtr16(0, flags); + } + case KERNEL_ORD___AHSHIFT: + // Selector arithmetic shift count. In protected mode this + // is always 3 (selectors are 8 bytes apart in the LDT). + // Value import: goes in offset field for OFFSET relocations. + return makeFarPtr16(0, 3); + case KERNEL_ORD___AHINCR: + // Selector increment value. In protected mode, consecutive + // LDT selectors differ by 8. + // Value import: goes in offset field for OFFSET relocations. + return makeFarPtr16(0, 8); + case KERNEL_ORD___0000H: + return makeFarPtr16(0, 0); + } + } + + // Look up in the stub table + for (uint16_t i = 0; i < ctx->stubCount; i++) { + if (strcasecmp(ctx->stubTable[i].module, moduleName) == 0 && + ctx->stubTable[i].ordinal == ordinal) { + return ctx->stubTable[i].addr; + } + } + + // For DISPLAY (self-reference) imports, return NULL and let the NE loader + // handle it via internal reference + if (strcasecmp(moduleName, "DISPLAY") == 0) { + return FARPTR16_NULL; + } + + // Unknown import - log and return NULL + if (funcName && funcName[0]) { + logErr("winstub: unresolved import %s.%s (ord %u)\n", + moduleName, funcName, ordinal); + } else { + logErr("winstub: unresolved import %s.%u\n", moduleName, ordinal); + } + + return FARPTR16_NULL; +} + + +// ============================================================================ +// Internal helper +// ============================================================================ + +static StubContextT *gStubCtx = NULL; // Global reference for callback functions + +static bool registerStub(StubContextT *ctx, const char *module, uint16_t ordinal, + ThunkCallbackT func, uint16_t paramWords) +{ + if (ctx->stubCount >= 256) { + return false; + } + + FarPtr16T addr; + if (!thunkRegisterCallback(ctx->thunkCtx, func, paramWords, &addr)) { + logErr("winstub: failed to register callback for %s.%u\n", module, ordinal); + return false; + } + + uint16_t slot = ctx->stubCount; + strncpy(ctx->stubTable[slot].module, module, 15); + ctx->stubTable[slot].module[15] = '\0'; + ctx->stubTable[slot].ordinal = ordinal; + ctx->stubTable[slot].addr = addr; + ctx->stubCount++; + + if (gStubDebug) { + logErr("winstub: slot %u = %s.%u paramWords=%u -> %04X:%04X\n", + slot, module, ordinal, paramWords, addr.segment, addr.offset); + } + + gStubCtx = ctx; // Keep global reference updated + + return true; +} + + +// ============================================================================ +// KERNEL stub implementations +// ============================================================================ + +// Find a free allocation slot +static int findFreeAllocSlot(void) +{ + for (int i = 0; i < STUB_MAX_ALLOCS; i++) { + if (!gStubCtx->allocs[i].inUse) { + return i; + } + } + return -1; +} + + +// Find allocation by handle +static int findAllocByHandle(uint16_t handle) +{ + for (int i = 0; i < STUB_MAX_ALLOCS; i++) { + if (gStubCtx->allocs[i].inUse && gStubCtx->allocs[i].handle == handle) { + return i; + } + } + return -1; +} + + +// FatalExit(code) -> does not return +static uint32_t stubFatalExit(uint16_t *p, uint16_t n) +{ + (void)n; + logErr("DRIVER: FatalExit(%d)\n", (int16_t)p[0]); + return 0; +} + + +// GlobalAlloc(flags, size_hi, size_lo) -> HGLOBAL +// Pascal params: [0]=flags, [1]=size_hi, [2]=size_lo +// On 16-bit stack (Pascal, rightmost on top): size_lo, size_hi, flags +// But our params array is in push order: params[0]=flags (pushed first, deepest) +static uint32_t stubGlobalAlloc(uint16_t *p, uint16_t n) +{ + (void)n; + uint16_t flags = p[0]; + uint32_t size = ((uint32_t)p[1] << 16) | p[2]; + + (void)flags; + + int slot = findFreeAllocSlot(); + if (slot < 0 || size == 0 || size > 0x100000) { + return 0; // Failure + } + + uint8_t *mem = (uint8_t *)calloc(1, size); + if (!mem) { + return 0; + } + + // Create a 16-bit data selector for this block + int sel = __dpmi_allocate_ldt_descriptors(1); + if (sel < 0) { + free(mem); + return 0; + } + + uint32_t linAddr = (uint32_t)mem; + __dpmi_set_segment_base_address(sel, linAddr + __djgpp_base_address); + __dpmi_set_segment_limit(sel, size - 1); + __dpmi_set_descriptor_access_rights(sel, 0x00F2); // 16-bit data RW + + // In Windows 3.x, GlobalAlloc returns a handle that IS the selector. + // Drivers use the handle directly as a segment selector, so we must + // return the actual LDT selector value. + uint16_t handle = (uint16_t)sel; + + gStubCtx->allocs[slot].handle = handle; + gStubCtx->allocs[slot].linearAddr = linAddr; + gStubCtx->allocs[slot].selector = (uint16_t)sel; + gStubCtx->allocs[slot].size = size; + gStubCtx->allocs[slot].lockCount = 0; + gStubCtx->allocs[slot].inUse = true; + + return handle; +} + + +// GlobalFree(hMem) -> HGLOBAL (0 = success) +static uint32_t stubGlobalFree(uint16_t *p, uint16_t n) +{ + (void)n; + uint16_t handle = p[0]; + int slot = findAllocByHandle(handle); + if (slot < 0) { + return handle; // Failure - return handle + } + + if (gStubCtx->allocs[slot].selector) { + uint16_t sel = gStubCtx->allocs[slot].selector; + __dpmi_free_ldt_descriptor(sel); + thunkSanitizeCbFrame(sel); + } + if (gStubCtx->allocs[slot].linearAddr) { + free((void *)gStubCtx->allocs[slot].linearAddr); + } + gStubCtx->allocs[slot].inUse = false; + + return 0; // Success +} + + +// GlobalLock(hMem) -> far pointer (DX:AX = seg:off) +static uint32_t stubGlobalLock(uint16_t *p, uint16_t n) +{ + (void)n; + uint16_t handle = p[0]; + int slot = findAllocByHandle(handle); + if (slot < 0) { + return 0; // NULL pointer + } + + gStubCtx->allocs[slot].lockCount++; + + // Return selector:0000 as the far pointer + uint16_t sel = gStubCtx->allocs[slot].selector; + return ((uint32_t)sel << 16) | 0x0000; // DX=sel, AX=0 +} + + +// GlobalUnlock(hMem) -> BOOL +static uint32_t stubGlobalUnlock(uint16_t *p, uint16_t n) +{ + (void)n; + uint16_t handle = p[0]; + int slot = findAllocByHandle(handle); + if (slot >= 0 && gStubCtx->allocs[slot].lockCount > 0) { + gStubCtx->allocs[slot].lockCount--; + } + return 0; // Success (return value is remaining lock count == 0) +} + + +// GlobalRealloc(hMem, size_hi, size_lo, flags) -> HGLOBAL +static uint32_t stubGlobalRealloc(uint16_t *p, uint16_t n) +{ + (void)n; + uint16_t handle = p[0]; + uint32_t newSize = ((uint32_t)p[1] << 16) | p[2]; + + int slot = findAllocByHandle(handle); + if (slot < 0 || newSize == 0) { + return 0; + } + + uint8_t *newMem = (uint8_t *)realloc((void *)gStubCtx->allocs[slot].linearAddr, newSize); + if (!newMem) { + return 0; + } + + gStubCtx->allocs[slot].linearAddr = (uint32_t)newMem; + gStubCtx->allocs[slot].size = newSize; + + // Update the selector (true linear = pointer + DS base) + __dpmi_set_segment_base_address(gStubCtx->allocs[slot].selector, (uint32_t)newMem + __djgpp_base_address); + __dpmi_set_segment_limit(gStubCtx->allocs[slot].selector, newSize - 1); + + return handle; +} + + +// GlobalSize(hMem) -> DWORD +static uint32_t stubGlobalSize(uint16_t *p, uint16_t n) +{ + (void)n; + uint16_t handle = p[0]; + int slot = findAllocByHandle(handle); + if (slot < 0) { + return 0; + } + return gStubCtx->allocs[slot].size; +} + + +// LocalAlloc - treat as a near-pointer allocation within the data segment +// For simplicity, we redirect to GlobalAlloc +static uint32_t stubLocalAlloc(uint16_t *p, uint16_t n) +{ + (void)n; + // LocalAlloc(flags, size) - size is 16-bit + uint16_t localParams[3]; + localParams[0] = p[0]; // flags + localParams[1] = 0; // size_hi + localParams[2] = p[1]; // size_lo + return stubGlobalAlloc(localParams, 3); +} + + +static uint32_t stubLocalFree(uint16_t *p, uint16_t n) +{ + return stubGlobalFree(p, n); +} + + +static uint32_t stubLocalLock(uint16_t *p, uint16_t n) +{ + // LocalLock returns a near pointer (just the offset) + uint32_t farPtr = stubGlobalLock(p, n); + return farPtr & 0xFFFF; // Return just the offset (always 0) +} + + +static uint32_t stubLocalUnlock(uint16_t *p, uint16_t n) +{ + return stubGlobalUnlock(p, n); +} + + +// LocalInit(segment, start, end) -> BOOL +static uint32_t stubLocalInit(uint16_t *p, uint16_t n) +{ + (void)p; (void)n; + return 1; // Success +} + + +// LocalReAlloc(hMem, newSize, flags) -> HANDLE +static uint32_t stubLocalReAlloc(uint16_t *p, uint16_t n) +{ + (void)n; + // Return the same handle (pretend success) + return p[0]; +} + + +// LocalSize(hMem) -> UINT +static uint32_t stubLocalSize(uint16_t *p, uint16_t n) +{ + (void)n; + // Redirect to GlobalSize + return stubGlobalSize(p, 1); +} + + +// GlobalDOSAlloc(size_hi:WORD, size_lo:WORD) -> DWORD (AX=selector, DX=segment) +// +// Sub-allocates from the pre-allocated DOS memory pool. +static uint32_t stubGlobalDOSAlloc(uint16_t *p, uint16_t n) +{ + (void)n; + uint32_t size = ((uint32_t)p[0] << 16) | p[1]; + if (size == 0 || size > STUB_DOS_POOL_SIZE) { + return 0; + } + + uint16_t paragraphs = (uint16_t)((size + 15) / 16); + + // Find a free slot + int slot = -1; + for (int i = 0; i < STUB_MAX_DOS_ALLOCS; i++) { + if (!gStubCtx->dosAllocs[i].inUse) { + slot = i; + break; + } + } + if (slot < 0) { + return 0; + } + + // Bump-allocate from pool + uint16_t paraOff = gStubCtx->dosPoolNextPara; + if (paraOff + paragraphs > gStubCtx->dosPoolParas) { + return 0; + } + gStubCtx->dosPoolNextPara = paraOff + paragraphs; + + // Real-mode segment for this sub-block + uint16_t blockSeg = gStubCtx->dosPoolSeg + paraOff; + + // Create a PM selector for the sub-block + int sel = __dpmi_allocate_ldt_descriptors(1); + if (sel < 0) { + return 0; + } + uint32_t blockBase = (uint32_t)blockSeg * 16; + __dpmi_set_segment_base_address(sel, blockBase); + __dpmi_set_segment_limit(sel, (uint32_t)paragraphs * 16 - 1); + __dpmi_set_descriptor_access_rights(sel, 0x00F2); // 16-bit data, writable, DPL=3 + + // Track the sub-allocation + gStubCtx->dosAllocs[slot].paraOff = paraOff; + gStubCtx->dosAllocs[slot].paragraphs = paragraphs; + gStubCtx->dosAllocs[slot].selector = (uint16_t)sel; + gStubCtx->dosAllocs[slot].inUse = true; + + if (gStubDebug) { + logErr("stub: GlobalDOSAlloc(%lu) -> sel=%04X seg=%04X\n", + (unsigned long)size, (uint16_t)sel, blockSeg); + } + + // Return DX:AX = segment:selector (LOWORD=selector, HIWORD=paragraph) + return ((uint32_t)blockSeg << 16) | (uint16_t)sel; +} + + +// GlobalDOSFree(selector:WORD) -> WORD (0=success) +static uint32_t stubGlobalDOSFree(uint16_t *p, uint16_t n) +{ + (void)n; + uint16_t sel = p[0]; + + // Find the sub-allocation by selector + for (int i = 0; i < STUB_MAX_DOS_ALLOCS; i++) { + if (gStubCtx->dosAllocs[i].inUse && gStubCtx->dosAllocs[i].selector == sel) { + __dpmi_free_ldt_descriptor(sel); + thunkSanitizeCbFrame(sel); + gStubCtx->dosAllocs[i].inUse = false; + return 0; // Success + } + } + return sel; // Failure — not found +} + + +// GetFreeSpace(flags) -> DWORD (bytes of free memory) +static uint32_t stubGetFreeSpace(uint16_t *p, uint16_t n) +{ + (void)p; (void)n; + return 16 * 1024 * 1024; // Report 16MB free +} + + +// GetCurrentPDB() -> WORD (PSP segment) +static uint32_t stubGetCurrentPDB(uint16_t *p, uint16_t n) +{ + (void)p; (void)n; + // Return a fake PSP segment address. The driver rarely uses this + // for anything meaningful - it's mainly for KERNEL internal bookkeeping. + return 0x0100; +} + + +// GetModuleUsage(hModule) -> int (reference count) +static uint32_t stubGetModuleUsage(uint16_t *p, uint16_t n) +{ + (void)p; (void)n; + return 1; // Always 1 reference +} + + +// GetProfileInt(lpApp:DWORD, lpKey:DWORD, nDefault:WORD) -> UINT +static uint32_t stubGetProfileInt(uint16_t *p, uint16_t n) +{ + (void)n; + // Return the default value (param index 4 = nDefault) + return p[4]; +} + + +// GetDOSEnvironment() -> far pointer to environment block +static uint32_t stubGetDOSEnvironment(uint16_t *p, uint16_t n) +{ + (void)p; (void)n; + // Return NULL - driver shouldn't need environment + return 0; +} + + +// GetSystemDirectory(lpBuffer:DWORD, nSize:WORD) -> UINT (chars copied) +static uint32_t stubGetSystemDirectory(uint16_t *p, uint16_t n) +{ + (void)n; + // Write "C:\WINDOWS\SYSTEM" to the buffer + uint16_t seg = p[0]; + uint16_t off = p[1]; + if (seg == 0 && off == 0) { + return 0; + } + const char *sysDir = "C:\\WINDOWS\\SYSTEM"; + uint16_t len = (uint16_t)strlen(sysDir); + movedata(_my_ds(), (unsigned)sysDir, seg, off, len + 1); + return len; +} + + +// SelectorAccessRights(sel:WORD, op:WORD, rights:WORD) -> WORD +// op=0: get, op=1: set +static uint32_t stubSelectorAccessRights(uint16_t *p, uint16_t n) +{ + (void)n; + uint16_t sel = p[0]; + uint16_t op = p[1]; + + if (op == 0) { + // Get access rights + uint8_t desc[8]; + if (__dpmi_get_descriptor(sel, desc) != 0) { + return 0; + } + return ((uint16_t)desc[6] << 8) | desc[5]; + } else { + // Set access rights + uint16_t rights = p[2]; + __dpmi_set_descriptor_access_rights(sel, rights); + return 0; + } +} + + +// GetModuleHandle(lpModuleName) -> HMODULE +// Returns a fake module handle +static uint32_t stubGetModuleHandle(uint16_t *p, uint16_t n) +{ + (void)p; (void)n; + // Return a non-zero handle. The driver doesn't really need a valid one. + return 0x0100; +} + + +// GetProcAddress(hModule:WORD, lpProcName:DWORD) -> FARPROC +// If lpProcName has segment=0, the offset is an ordinal number. +static uint32_t stubGetProcAddress(uint16_t *p, uint16_t n) +{ + (void)n; + uint16_t nameSeg = p[1]; + uint16_t nameOff = p[2]; + + // If segment is 0, offset is an ordinal (MAKEINTRESOURCE) + if (nameSeg == 0 && gStubCtx && gStubCtx->neModule) { + uint16_t ordinal = nameOff; + uint16_t seg = 0; + uint16_t off = 0; + uint16_t sel = 0; + if (neLookupExport(gStubCtx->neModule, ordinal, &seg, &off, &sel)) { + logErr("winstub: GetProcAddress(ord %u) -> %04X:%04X\n", ordinal, sel, off); + return ((uint32_t)sel << 16) | off; + } + } + + logErr("winstub: GetProcAddress(%04X:%04X) -> NULL\n", nameSeg, nameOff); + return 0; +} + + +// GetModuleFileName(hModule, lpFilename, nSize) -> int +static uint32_t stubGetModuleFileName(uint16_t *p, uint16_t n) +{ + (void)p; (void)n; + return 0; // Empty string, 0 chars copied +} + + +// GetWinFlags() -> DWORD +static uint32_t stubGetWinFlags(uint16_t *p, uint16_t n) +{ + (void)p; (void)n; + uint32_t flags = WF_PMODE | WF_CPU386 | WF_ENHANCED; + return flags; +} + + +// GetVersion() -> DWORD (low word = Windows version, high word = DOS version) +static uint32_t stubGetVersion(uint16_t *p, uint16_t n) +{ + (void)p; (void)n; + // Windows 3.10, DOS 6.22 + // Low byte of low word = major version = 3 + // High byte of low word = minor version = 10 + // Low byte of high word = DOS major = 6 + // High byte of high word = DOS minor = 22 + return 0x160A0A03; // DOS 6.22, Windows 3.10 +} + + +// GetPrivateProfileInt(lpApp, lpKey, nDefault, lpFile) -> UINT +// Pascal params: lpApp(2w), lpKey(2w), nDefault(1w), lpFile(2w) = 7 words +static uint32_t stubGetPrivateProfileInt(uint16_t *p, uint16_t n) +{ + (void)n; + // Pascal push order: lpApp(2w), lpKey(2w), nDefault(1w), lpFile(2w) + uint16_t appSeg = p[0]; + uint16_t appOff = p[1]; + uint16_t keySeg = p[2]; + uint16_t keyOff = p[3]; + uint16_t def = p[4]; + uint16_t fileSeg = p[5]; + uint16_t fileOff = p[6]; + + char app[64] = {0}; + char key[64] = {0}; + char file[64] = {0}; + if (appSeg || appOff) { + movedata(appSeg, appOff, _my_ds(), (unsigned)app, 63); + } + if (keySeg || keyOff) { + movedata(keySeg, keyOff, _my_ds(), (unsigned)key, 63); + } + if (fileSeg || fileOff) { + movedata(fileSeg, fileOff, _my_ds(), (unsigned)file, 63); + } + logErr("stub: GetPrivateProfileInt([%s] %s, def=%u, %s)\n", + app, key, def, file); + + // Provide values for known sections + if (strcasecmp(app, "VBESVGA.DRV") == 0) { + if (strcasecmp(key, "Width") == 0) { + return 800; + } else if (strcasecmp(key, "Height") == 0) { + return 600; + } else if (strcasecmp(key, "Depth") == 0) { + return 8; + } + } + + return def; +} + + +// GetPrivateProfileString - return default string +static uint32_t stubGetPrivateProfileString(uint16_t *p, uint16_t n) +{ + (void)n; + // Pascal push order: lpApp(2w), lpKey(2w), lpDefault(2w), + // lpBuffer(2w), nSize(1w), lpFile(2w) + uint16_t appSeg = p[0]; + uint16_t appOff = p[1]; + uint16_t keySeg = p[2]; + uint16_t keyOff = p[3]; + uint16_t defSeg = p[4]; + uint16_t defOff = p[5]; + uint16_t bufSeg = p[6]; + uint16_t bufOff = p[7]; + uint16_t bufSize = p[8]; + uint16_t fileSeg = p[9]; + uint16_t fileOff = p[10]; + + char app[64] = {0}; + char key[64] = {0}; + char def[64] = {0}; + char file[64] = {0}; + if (appSeg || appOff) { + movedata(appSeg, appOff, _my_ds(), (unsigned)app, 63); + } + if (keySeg || keyOff) { + movedata(keySeg, keyOff, _my_ds(), (unsigned)key, 63); + } + if (defSeg || defOff) { + movedata(defSeg, defOff, _my_ds(), (unsigned)def, 63); + } + if (fileSeg || fileOff) { + movedata(fileSeg, fileOff, _my_ds(), (unsigned)file, 63); + } + logErr("stub: GetPrivateProfileString([%s] %s, def=\"%s\", buf=%u, %s)\n", + app, key, def, bufSize, file); + + // Provide SYSTEM.INI [display] values matching the OEMSETUP.INF + // that ships with the S3 Trio64V driver v1.70.04 (D808L mode: + // 800x600 256-color, large font). + const char *result = def; + if (strcasecmp(app, "DISPLAY") == 0) { + if (strcasecmp(key, "SCREEN-SIZE") == 0) { + result = "800"; + } else if (strcasecmp(key, "COLOR-FORMAT") == 0) { + result = "8"; + } else if (strcasecmp(key, "DAC-TYPE") == 0) { + result = "nbt"; + } else if (strcasecmp(key, "POLYGON-SUPPORT") == 0) { + result = "on"; + } else if (strcasecmp(key, "ELLIPSE-SUPPORT") == 0) { + result = "on"; + } else if (strcasecmp(key, "SCACHE") == 0) { + result = "on"; + } else if (strcasecmp(key, "TEXTRMW") == 0) { + result = "0"; + } else if (strcasecmp(key, "FASTMMIO") == 0) { + result = "on"; + } else if (strcasecmp(key, "DPI") == 0) { + result = "120"; + } + } else if (strcasecmp(app, "VBESVGA.DRV") == 0) { + if (strcasecmp(key, "Width") == 0) { + result = "800"; + } else if (strcasecmp(key, "Height") == 0) { + result = "600"; + } else if (strcasecmp(key, "Depth") == 0) { + result = "8"; + } + } else if (strcasecmp(app, "Debug") == 0) { + if (strcasecmp(key, "OutputTo") == 0) { + result = "NUL"; + } + } + + // Copy result to buffer + if ((bufSeg || bufOff) && bufSize > 0) { + uint16_t len = (uint16_t)strlen(result); + if (len >= bufSize) { + len = bufSize - 1; + } + movedata(_my_ds(), (unsigned)result, bufSeg, bufOff, len + 1); + return len; + } + return 0; +} + + +// AllocSelector(srcSelector) -> new selector +static uint32_t stubAllocSelector(uint16_t *p, uint16_t n) +{ + (void)n; + uint16_t srcSel = p[0]; + + int newSel = __dpmi_allocate_ldt_descriptors(1); + if (newSel < 0) { + return 0; + } + + // If srcSelector is non-zero, copy its attributes + if (srcSel != 0) { + // Get source descriptor and copy it + uint8_t desc[8]; + if (__dpmi_get_descriptor(srcSel, desc) == 0) { + __dpmi_set_descriptor(newSel, desc); + } + } + + // Track this selector + for (int i = 0; i < STUB_MAX_SELECTORS; i++) { + if (!gStubCtx->selectors[i].inUse) { + gStubCtx->selectors[i].selector = (uint16_t)newSel; + gStubCtx->selectors[i].inUse = true; + break; + } + } + + return (uint16_t)newSel; +} + + +// FreeSelector(selector) -> selector (0 = success) +// NOTE: We do NOT actually free the LDT descriptor here. Windows 3.1 drivers +// use an alloc-then-free pattern to "discover" selector numbers, then continue +// using the freed selector. The descriptor stays valid with its stale contents. +// Actual cleanup happens in stubShutdown when all selectors are freed. +static uint32_t stubFreeSelector(uint16_t *p, uint16_t n) +{ + (void)n; + uint16_t sel = p[0]; + + // Mark as freed in our tracking but don't release the LDT slot + for (int i = 0; i < STUB_MAX_SELECTORS; i++) { + if (gStubCtx->selectors[i].inUse && gStubCtx->selectors[i].selector == sel) { + gStubCtx->selectors[i].inUse = false; + break; + } + } + + return 0; +} + + +// AllocCStoDSAlias(codeSelector) -> data selector +// Creates a data segment alias for a code segment +static uint32_t stubAllocCStoDSAlias(uint16_t *p, uint16_t n) +{ + (void)n; + uint16_t codeSel = p[0]; + + int newSel = __dpmi_allocate_ldt_descriptors(1); + if (newSel < 0) { + return 0; + } + + // Copy the code segment descriptor + uint8_t desc[8]; + if (__dpmi_get_descriptor(codeSel, desc) != 0) { + __dpmi_free_ldt_descriptor(newSel); + return 0; + } + + // Change to data segment (clear the code bit in the type field) + // Byte 5 (access rights): change type from code to data + // Code readable: 1010 -> Data writable: 0010 + desc[5] = (desc[5] & 0xF0) | 0x02; // Data, writable + + __dpmi_set_descriptor(newSel, desc); + + for (int i = 0; i < STUB_MAX_SELECTORS; i++) { + if (!gStubCtx->selectors[i].inUse) { + gStubCtx->selectors[i].selector = (uint16_t)newSel; + gStubCtx->selectors[i].inUse = true; + break; + } + } + + return (uint16_t)newSel; +} + + +// AllocDSToCSAlias(dataSel) -> code selector +// Creates a code segment alias for a data segment +static uint32_t stubAllocDSToCSAlias(uint16_t *p, uint16_t n) +{ + (void)n; + uint16_t dataSel = p[0]; + + int newSel = __dpmi_allocate_ldt_descriptors(1); + if (newSel < 0) { + return 0; + } + + // Copy the data segment descriptor + uint8_t desc[8]; + if (__dpmi_get_descriptor(dataSel, desc) != 0) { + __dpmi_free_ldt_descriptor(newSel); + return 0; + } + + // Change to code segment (set the code bit in the type field) + // Data writable: 0010 -> Code readable: 1010 + desc[5] = (desc[5] & 0xF0) | 0x0A; // Code, readable + + __dpmi_set_descriptor(newSel, desc); + + for (int i = 0; i < STUB_MAX_SELECTORS; i++) { + if (!gStubCtx->selectors[i].inUse) { + gStubCtx->selectors[i].selector = (uint16_t)newSel; + gStubCtx->selectors[i].inUse = true; + break; + } + } + + return (uint16_t)newSel; +} + + +// LoadLibrary(lpLibFileName) -> hModule +// Returns error code < 32 since we don't load real DLLs +static uint32_t stubLoadLibrary(uint16_t *p, uint16_t n) +{ + (void)n; + uint16_t seg = p[0]; + uint16_t off = p[1]; + (void)seg; + (void)off; + + logErr("stub: LoadLibrary() -> 2 (file not found)\n"); + + // Return 2 = file not found. Values < 32 indicate error. + // The driver checks hModule >= 32 for success. + return 2; +} + + +// PrestoChangoSelector(srcSel, dstSel) -> new selector +// Converts code selector to data and vice versa +static uint32_t stubPrestoChangoSelector(uint16_t *p, uint16_t n) +{ + (void)n; + uint16_t srcSel = p[0]; + uint16_t dstSel = p[1]; + + uint8_t desc[8]; + if (__dpmi_get_descriptor(srcSel, desc) != 0) { + return 0; + } + + // Toggle code/data bit + desc[5] ^= 0x08; // Toggle the code/data bit + + __dpmi_set_descriptor(dstSel, desc); + + return dstSel; +} + + +// SetSelectorBase(selector, base_hi, base_lo) -> selector +static uint32_t stubSetSelectorBase(uint16_t *p, uint16_t n) +{ + (void)n; + uint16_t sel = p[0]; + uint32_t base = ((uint32_t)p[1] << 16) | p[2]; + + __dpmi_set_segment_base_address(sel, base); + return sel; +} + + +// SetSelectorLimit(selector, limit_hi, limit_lo) -> 0=success +static uint32_t stubSetSelectorLimit(uint16_t *p, uint16_t n) +{ + (void)n; + uint16_t sel = p[0]; + uint32_t limit = ((uint32_t)p[1] << 16) | p[2]; + + if (__dpmi_set_segment_limit(sel, limit) == 0) { + return 0; // Success + } + return 1; // Failure +} + + +// GetSelectorLimit(selector) -> DWORD +static uint32_t stubGetSelectorLimit(uint16_t *p, uint16_t n) +{ + (void)n; + uint16_t sel = p[0]; + // DPMI function 0006h: Get Segment Base Address + // We need the limit, not the base. Use __dpmi_get_descriptor. + uint8_t desc[8]; + if (__dpmi_get_descriptor(sel, desc) != 0) { + return 0; + } + // Limit is in bytes 0-1 (low 16 bits) and bits 0-3 of byte 6 (high 4 bits) + uint32_t limit = (uint32_t)desc[0] | ((uint32_t)desc[1] << 8) | + ((uint32_t)(desc[6] & 0x0F) << 16); + // If granularity bit is set, limit is in 4K pages + if (desc[6] & 0x80) { + limit = (limit << 12) | 0xFFF; + } + return limit; +} + + +// GetSelectorBase(selector) -> DWORD (DX:AX) +static uint32_t stubGetSelectorBase(uint16_t *p, uint16_t n) +{ + (void)n; + uint16_t sel = p[0]; + unsigned long base; + if (__dpmi_get_segment_base_address(sel, &base) != 0) { + return 0; + } + return (uint32_t)base; +} + + +// OutputDebugString(lpString) -> void +// Prints the debug string to stderr for diagnostic purposes. +static uint32_t stubOutputDebugString(uint16_t *p, uint16_t n) +{ + (void)n; + // p[0] = segment, p[1] = offset of the string + uint16_t seg = p[0]; + uint16_t off = p[1]; + + if (seg == 0 && off == 0) { + return 0; + } + + // Read the string from the 16-bit segment + char buf[256]; + movedata(seg, off, _my_ds(), (unsigned)buf, sizeof(buf) - 1); + buf[sizeof(buf) - 1] = '\0'; + + logErr("DRIVER DEBUG: %s\n", buf); + return 0; +} + + +// WriteProfileString(lpApp:DWORD, lpKey:DWORD, lpString:DWORD) -> BOOL +static uint32_t stubWriteProfileString(uint16_t *p, uint16_t n) +{ + (void)p; (void)n; + // No-op — pretend we wrote to WIN.INI + return 1; // TRUE +} + + +// GetExePtr(hInstance:WORD) -> HMODULE +static uint32_t stubGetExePtr(uint16_t *p, uint16_t n) +{ + (void)n; + // In real Windows, converts an instance handle to a module handle. + // For our purposes, they're the same value. + return p[0]; +} + + +// ============================================================================ +// GDI stub implementations +// ============================================================================ + +// GetDeviceCaps(hdc, index) -> int +static uint32_t stubGetDeviceCaps(uint16_t *p, uint16_t n) +{ + (void)n; + // Return generic values for common device caps indices + uint16_t index = p[1]; // Second param (Pascal: hdc pushed first, index pushed second) + + switch (index) { + case 8: return 8; // BITSPIXEL + case 14: return 1; // PLANES + case 12: return 16; // NUMCOLORS + case 38: return 0; // RASTERCAPS - no special caps + case 88: return 8; // SIZEPALETTE + default: return 0; + } +} + + +// Dummy stub - returns 0/1 depending on context +static uint32_t stubDummy(uint16_t *p, uint16_t n) +{ + (void)p; (void)n; + return 0; +} + + +// ============================================================================ +// USER stub implementations +// ============================================================================ + +// GetSystemMetrics(nIndex) -> int +static uint32_t stubGetSystemMetrics(uint16_t *p, uint16_t n) +{ + (void)n; + uint16_t index = p[0]; + + switch (index) { + case 0: return 640; // SM_CXSCREEN + case 1: return 480; // SM_CYSCREEN + case 16: return 640; // SM_CXFULLSCREEN + case 17: return 480; // SM_CYFULLSCREEN + default: return 0; + } +} + + +// MessageBox - just ignore, return IDOK +static uint32_t stubMessageBox(uint16_t *p, uint16_t n) +{ + (void)p; (void)n; + return 1; // IDOK +} diff --git a/win31drv/winstub.h b/win31drv/winstub.h new file mode 100644 index 0000000..6ebd3c0 --- /dev/null +++ b/win31drv/winstub.h @@ -0,0 +1,257 @@ +#ifndef WINSTUB_H +#define WINSTUB_H + +#include +#include +#include "wintypes.h" +#include "thunk.h" +#include "neload.h" + +// ============================================================================ +// Windows API stub layer +// +// Provides minimal implementations of KERNEL, GDI, and USER functions +// that Windows 3.x display drivers import. These stubs are registered as +// 16-bit callbacks via the thunking layer so the driver can call them. +// +// Supported modules: +// KERNEL - Memory management (GlobalAlloc/Lock/Free), module queries, +// selector management, system info +// GDI - Minimal DC management, palette, object stubs +// USER - GetSystemMetrics, MessageBox (stub) +// ============================================================================ + +// ============================================================================ +// Stub context +// ============================================================================ + +#define STUB_MAX_ALLOCS 64 // Max GlobalAlloc blocks +#define STUB_MAX_SELECTORS 32 // Extra selector allocations + +// Pre-allocated DOS memory pool for GlobalDOSAlloc. +// Avoids calling __dpmi_allocate_dos_memory at runtime (which uses INT +// 21h AH=48h and walks the MCB chain) by bump-allocating from a single +// block allocated at init time. +#define STUB_DOS_POOL_SIZE 0x4000 // 16KB pool +#define STUB_MAX_DOS_ALLOCS 8 + +typedef struct { + // Memory allocation tracking + struct { + uint16_t handle; // HGLOBAL16 + uint32_t linearAddr; // Linear address + uint16_t selector; // PM selector for the block + uint32_t size; // Block size + uint16_t lockCount; // Lock count + bool inUse; + } allocs[STUB_MAX_ALLOCS]; + uint16_t nextHandle; // Next handle value to assign + + // Extra selectors (for AllocSelector, AllocCStoDSAlias, etc.) + struct { + uint16_t selector; + bool inUse; + } selectors[STUB_MAX_SELECTORS]; + + // DOS memory pool for GlobalDOSAlloc (pre-allocated at init) + struct { + uint16_t paraOff; // Paragraph offset from pool base + uint16_t paragraphs; // Size in paragraphs + uint16_t selector; // PM selector for this sub-block + bool inUse; + } dosAllocs[STUB_MAX_DOS_ALLOCS]; + uint16_t dosPoolSeg; // Real-mode segment of pool + uint16_t dosPoolSel; // DPMI selector for pool (for freeing) + uint16_t dosPoolParas; // Total pool size in paragraphs + uint16_t dosPoolNextPara; // Next free paragraph offset (bump allocator) + + // Well-known memory region selectors + uint16_t biosDataSel; // 0040:0000 BIOS data area + uint16_t vramSel; // A000:0000 VGA graphics RAM + uint16_t monoTextSel; // B000:0000 Mono text video + uint16_t colorTextSel; // B800:0000 Color text video + uint16_t videoBiosSel; // C000:0000 Video BIOS ROM + uint16_t upperMemD000Sel; // D000:0000 Upper memory + uint16_t upperMemE000Sel; // E000:0000 Upper memory + uint16_t sysBiosSel; // F000:0000 System BIOS ROM + + // Thunk context reference + ThunkContextT *thunkCtx; + + // NE module reference (for GetProcAddress lookups) + NeModuleT *neModule; + + // Lookup table: module name + ordinal -> FarPtr16T + // Built during stub registration + struct { + char module[16]; + uint16_t ordinal; + FarPtr16T addr; + } stubTable[256]; + uint16_t stubCount; + + bool initialized; +} StubContextT; + +// ============================================================================ +// Stub layer functions +// ============================================================================ + +// Initialize the stub layer. Must be called after thunkInit(). +bool stubInit(StubContextT *ctx, ThunkContextT *thunkCtx); + +// Enable or disable verbose stub logging. +void stubSetDebug(bool debug); + +// Shut down the stub layer. +void stubShutdown(StubContextT *ctx); + +// Set the NE module reference for GetProcAddress lookups. +void stubSetModule(StubContextT *ctx, NeModuleT *mod); + +// Resolve an imported function. Called by the NE loader's import resolver. +// Returns the 16-bit far pointer to the stub function, or FARPTR16_NULL +// if the import is unknown. +FarPtr16T stubResolveImport(StubContextT *ctx, const char *moduleName, uint16_t ordinal, const char *funcName); + +// ============================================================================ +// KERNEL stubs - ordinal numbers +// These are the most commonly imported KERNEL functions by display drivers. +// ============================================================================ + +// Error handling +#define KERNEL_ORD_FATALEXIT 1 +#define KERNEL_ORD_FATALAPPEXIT 137 + +// Memory management - Global +#define KERNEL_ORD_GLOBALALLOC 15 +#define KERNEL_ORD_GLOBALREALLOC 16 +#define KERNEL_ORD_GLOBALFREE 17 +#define KERNEL_ORD_GLOBALLOCK 18 +#define KERNEL_ORD_GLOBALUNLOCK 19 +#define KERNEL_ORD_GLOBALSIZE 20 +#define KERNEL_ORD_GLOBALFLAGS 22 +#define KERNEL_ORD_GLOBALDOSALLOC 84 +#define KERNEL_ORD_GLOBALDOSFREE 85 +#define KERNEL_ORD_GLOBALDOSALLOC2 184 // Duplicate ordinal for GlobalDOSAlloc +#define KERNEL_ORD_GLOBALDOSFREE2 185 // Duplicate ordinal for GlobalDOSFree + +// Memory management - Local +#define KERNEL_ORD_LOCALINIT 4 +#define KERNEL_ORD_LOCALALLOC 5 +#define KERNEL_ORD_LOCALREALLOC 6 +#define KERNEL_ORD_LOCALFREE 7 +#define KERNEL_ORD_LOCALLOCK 8 +#define KERNEL_ORD_LOCALUNLOCK 9 +#define KERNEL_ORD_LOCALSIZE 10 + +// Memory info +#define KERNEL_ORD_GETFREESPACE 102 +#define KERNEL_ORD_LOCKSEGMENT 23 +#define KERNEL_ORD_UNLOCKSEGMENT 24 +#define KERNEL_ORD_SETSWAPAREA 81 +#define KERNEL_ORD_GETCURRENTPDB 82 + +// Module management +#define KERNEL_ORD_GETMODULEHANDLE 47 +#define KERNEL_ORD_GETMODULEUSAGE 35 +#define KERNEL_ORD_GETPROFILEINT 48 +#define KERNEL_ORD_GETPROFILEINT2 57 // Alternate ordinal used by some drivers +#define KERNEL_ORD_WRITEPROFILESTRING 59 +#define KERNEL_ORD_GETMODULEFILENAME 49 +#define KERNEL_ORD_GETPROCADDRESS 50 +#define KERNEL_ORD_LOADLIBRARY 95 +#define KERNEL_ORD_FREELIBRARY 96 +#define KERNEL_ORD_INITTASK 91 +#define KERNEL_ORD_GETEXEPTR 133 + +// Resource management +#define KERNEL_ORD_FINDRESOURCE 60 +#define KERNEL_ORD_LOADRESOURCE 61 +#define KERNEL_ORD_FREERESOURCE 63 +#define KERNEL_ORD_LOCKRESOURCE 62 +#define KERNEL_ORD_SIZEOFRESOURCE 65 + +// Selector management +#define KERNEL_ORD_ALLOCSELECTOR 175 +#define KERNEL_ORD_FREESELECTOR 176 +#define KERNEL_ORD_ALLOCCSTODSALIAS 170 +#define KERNEL_ORD_ALLOCDSTOCSALIAS 171 +#define KERNEL_ORD_SETSELECTORBASE 187 +#define KERNEL_ORD_GETSELECTORBASE 186 +#define KERNEL_ORD_SETSELECTORLIMIT 189 +#define KERNEL_ORD_PRESTOCHANGOSELECTOR 177 +#define KERNEL_ORD_SELECTORACCESSRIGHTS 196 +#define KERNEL_ORD_ALLOCSELECTORARRAY 206 + +// System info +#define KERNEL_ORD_GETVERSION 3 +#define KERNEL_ORD_GETWINFLAGS 132 +#define KERNEL_ORD_GETSYSTEMDIRECTORY 135 +#define KERNEL_ORD_GETDOSENVIRONMENT 131 +#define KERNEL_ORD_GETPRIVATEPROFILEINT 127 +#define KERNEL_ORD_GETPRIVATEPROFILESTRING 128 +#define KERNEL_ORD_WRITEPRIVATEPROFILESTRING 129 + +// Selector arithmetic (variables, not functions) +#define KERNEL_ORD___AHSHIFT 113 +#define KERNEL_ORD___AHINCR 114 +#define KERNEL_ORD___WINFLAGS 178 + +// Segment selectors to well-known memory regions (variables, not functions) +#define KERNEL_ORD___0000H 183 +#define KERNEL_ORD___0040H 193 +#define KERNEL_ORD___A000H 174 +#define KERNEL_ORD___B000H 181 +#define KERNEL_ORD___B800H 182 +#define KERNEL_ORD___C000H 195 +#define KERNEL_ORD___D000H 179 +#define KERNEL_ORD___E000H 190 +#define KERNEL_ORD___F000H 194 +#define KERNEL_ORD___ROMBIOS 173 + +// Selector query +#define KERNEL_ORD_GETSELECTORLIMIT 188 +#define KERNEL_ORD_GETSELECTORBASE2 186 + +// Debug +#define KERNEL_ORD_OUTPUTDEBUGSTRING 115 + +// ============================================================================ +// GDI stubs - ordinal numbers +// ============================================================================ + +#define GDI_ORD_CREATEDC 53 +#define GDI_ORD_DELETEDC 68 +#define GDI_ORD_SELECTOBJECT 45 +#define GDI_ORD_DELETEOBJECT 69 +#define GDI_ORD_GETDEVICECAPS 80 +#define GDI_ORD_SETBKCOLOR 1 +#define GDI_ORD_SETTEXTCOLOR 6 +#define GDI_ORD_GETPALETTE_GDI 5 + +// DIB Engine exports (from DIBENG.DLL or built into GDI) +#define DIBENG_ORD_DIBBITBLT 1 +#define DIBENG_ORD_DIBOUTPUT 2 +#define DIBENG_ORD_DIBPIXEL 3 +#define DIBENG_ORD_DIBSTRBLT 4 +#define DIBENG_ORD_DIBCOLORINFO 5 +#define DIBENG_ORD_DIBREALIZE 6 +#define DIBENG_ORD_DIBCREATEBITMAP 7 +#define DIBENG_ORD_DIBSCANLR 8 +#define DIBENG_ORD_DIBEXTOUT 9 + +// ============================================================================ +// USER stubs - ordinal numbers +// ============================================================================ + +#define USER_ORD_GETSYSTEMMETRICS 179 +#define USER_ORD_MESSAGEBOX 1 + +// ============================================================================ +// KEYBOARD stubs - ordinal numbers +// ============================================================================ + +#define KEYBOARD_ORD_SCREENSWITCHENABLE 100 + +#endif // WINSTUB_H diff --git a/win31drv/wintypes.h b/win31drv/wintypes.h new file mode 100644 index 0000000..362edea --- /dev/null +++ b/win31drv/wintypes.h @@ -0,0 +1,152 @@ +#ifndef WINTYPES_H +#define WINTYPES_H + +#include + +// ============================================================================ +// Windows 16-bit basic types +// ============================================================================ + +typedef uint16_t WORD; +typedef int16_t SWORD; +typedef uint32_t DWORD; +typedef int32_t LONG; +typedef uint8_t BYTE; +typedef int16_t BOOL16; + +// 16-bit handle types +typedef uint16_t HANDLE16; +typedef uint16_t HWND16; +typedef uint16_t HDC16; +typedef uint16_t HMODULE16; +typedef uint16_t HINSTANCE16; +typedef uint16_t HGLOBAL16; +typedef uint16_t HBITMAP16; +typedef uint16_t HBRUSH16; +typedef uint16_t HPEN16; +typedef uint16_t HPALETTE16; +typedef uint16_t HRGN16; +typedef uint16_t HCURSOR16; +typedef uint16_t ATOM16; + +// ============================================================================ +// Far pointer types (16:16 segment:offset, stored little-endian) +// ============================================================================ + +typedef struct __attribute__((packed)) { + uint16_t offset; + uint16_t segment; +} FarPtr16T; + +#define FARPTR16_NULL ((FarPtr16T){0, 0}) + +static inline FarPtr16T makeFarPtr16(uint16_t seg, uint16_t off) +{ + FarPtr16T fp; + fp.segment = seg; + fp.offset = off; + return fp; +} + +static inline uint32_t farPtr16ToLinear(FarPtr16T fp) +{ + // For real-mode addresses only; PM addresses need descriptor lookup + return ((uint32_t)fp.segment << 4) + fp.offset; +} + +// ============================================================================ +// Color types +// ============================================================================ + +typedef uint32_t ColorRefT; + +#define RGB_RED(c) ((uint8_t)(c)) +#define RGB_GREEN(c) ((uint8_t)((c) >> 8)) +#define RGB_BLUE(c) ((uint8_t)((c) >> 16)) +#define MAKE_RGB(r, g, b) ((ColorRefT)(((BYTE)(r)) | ((WORD)((BYTE)(g)) << 8) | ((DWORD)((BYTE)(b)) << 16))) + +// Palette index color reference +#define PALETTEINDEX(i) ((ColorRefT)(0x01000000L | (DWORD)(WORD)(i))) + +// ============================================================================ +// Geometry types (16-bit, packed for compatibility with Win16 structures) +// ============================================================================ + +typedef struct __attribute__((packed)) { + int16_t x; + int16_t y; +} Point16T; + +typedef struct __attribute__((packed)) { + int16_t left; + int16_t top; + int16_t right; + int16_t bottom; +} Rect16T; + +// ============================================================================ +// Windows constants +// ============================================================================ + +// GetWinFlags return values +#define WF_PMODE 0x0001 +#define WF_CPU286 0x0002 +#define WF_CPU386 0x0004 +#define WF_CPU486 0x0008 +#define WF_STANDARD 0x0010 +#define WF_ENHANCED 0x0020 +#define WF_80x87 0x0400 + +// GlobalAlloc flags +#define GMEM_FIXED 0x0000 +#define GMEM_MOVEABLE 0x0002 +#define GMEM_ZEROINIT 0x0040 +#define GHND (GMEM_MOVEABLE | GMEM_ZEROINIT) +#define GPTR (GMEM_FIXED | GMEM_ZEROINIT) + +// Boolean +#define WIN_FALSE 0 +#define WIN_TRUE 1 + +// Null handle +#define NULL16 ((uint16_t)0) + +// ============================================================================ +// Raster operation codes (ROP2 and ROP3) +// ============================================================================ + +#define R2_BLACK 1 +#define R2_NOTMERGEPEN 2 +#define R2_MASKNOTPEN 3 +#define R2_NOTCOPYPEN 4 +#define R2_MASKPENNOT 5 +#define R2_NOT 6 +#define R2_XORPEN 7 +#define R2_NOTMASKPEN 8 +#define R2_MASKPEN 9 +#define R2_NOTXORPEN 10 +#define R2_NOP 11 +#define R2_MERGENOTPEN 12 +#define R2_COPYPEN 13 +#define R2_MERGEPENNOT 14 +#define R2_MERGEPEN 15 +#define R2_WHITE 16 + +// Common ROP3 codes +#define SRCCOPY 0x00CC0020L +#define SRCPAINT 0x00EE0086L +#define SRCAND 0x008800C6L +#define SRCINVERT 0x00660046L +#define SRCERASE 0x00440328L +#define NOTSRCCOPY 0x00330008L +#define NOTSRCERASE 0x001100A6L +#define MERGECOPY 0x00C000CAL +#define MERGEPAINT 0x00BB0226L +#define PATCOPY 0x00F00021L +#define PATPAINT 0x00FB0A09L +#define PATINVERT 0x005A0049L +#define DSTINVERT 0x00550009L +#define BLACKNESS 0x00000042L +#define WHITENESS 0x00FF0062L + +#endif // WINTYPES_H