1110 lines
40 KiB
C
1110 lines
40 KiB
C
// ============================================================================
|
|
// thunk.c - 32-bit to 16-bit protected mode thunking layer
|
|
//
|
|
// This module provides the mechanism for DJGPP 32-bit code to call into
|
|
// 16-bit Windows driver code. It uses DPMI to create 16-bit code, data,
|
|
// and stack segments, and installs a small relay thunk in the 16-bit code
|
|
// segment that handles the 32/16-bit transition.
|
|
//
|
|
// Architecture:
|
|
// The 32-bit caller writes parameters to a shared data area in DOS
|
|
// memory, writes configuration (stack and data segment selectors) to
|
|
// the relay's CS-relative data area, then does a far call (lcall) to
|
|
// the 16-bit relay.
|
|
//
|
|
// The relay code (running in 16-bit mode but with the caller's 32-bit
|
|
// SS still active, since lcall doesn't change SS) performs:
|
|
// 1. Saves DS and the 32-bit return address
|
|
// 2. Saves SS:ESP (32-bit values via operand-size prefixes)
|
|
// 3. Loads DS from its config area to point to the shared data
|
|
// 4. Switches SS:SP to a dedicated 16-bit stack
|
|
// 5. Pushes Pascal-convention parameters from DS onto the 16-bit stack
|
|
// 6. Far-calls the target driver function
|
|
// 7. Saves DX:AX return value
|
|
// 8. Restores SS:ESP to the caller's 32-bit stack
|
|
// 9. Restores DS to the caller's flat data segment
|
|
// 10. Pushes the 32-bit return address back onto the 32-bit stack
|
|
// 11. Does an operand-size-prefixed retf to return to 32-bit code
|
|
//
|
|
// Key insight: When the 32-bit lcall transfers to the 16-bit relay,
|
|
// SS is unchanged (same-privilege far call). DJGPP's SS has B=1
|
|
// (32-bit stack), so ESP is used for all stack operations even in
|
|
// 16-bit code. This lets the relay safely save/restore the full
|
|
// 32-bit ESP before switching to the 16-bit driver stack.
|
|
//
|
|
// For 16-to-32 callbacks (Windows API stubs called by the driver), small
|
|
// 16-bit stubs use a software interrupt to transfer control to a 32-bit
|
|
// DPMI handler that dispatches to registered C callback functions.
|
|
// BX is saved/restored around the INT because the stub uses BX to pass
|
|
// the slot index, and the driver may depend on BX being preserved across
|
|
// the far call (as per Pascal calling convention: BX is not callee-saved,
|
|
// but the Windows KERNEL implementations happen to preserve it, and
|
|
// driver code like BBLT.ASM depends on this).
|
|
// ============================================================================
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <stdarg.h>
|
|
#include <inttypes.h>
|
|
#include <pc.h>
|
|
#include <dpmi.h>
|
|
#include <go32.h>
|
|
#include <sys/movedata.h>
|
|
|
|
#include "thunk.h"
|
|
#include "log.h"
|
|
#include <sys/farptr.h>
|
|
|
|
// Forward declarations
|
|
static bool installRelayCode(ThunkContextT *ctx);
|
|
static uint16_t allocDescriptor16(uint32_t base, uint32_t limit, bool isCode);
|
|
|
|
// ============================================================================
|
|
// 16-bit relay thunk machine code
|
|
//
|
|
// This code runs in a 16-bit code segment (CS D=0). On entry, SS is the
|
|
// caller's 32-bit flat data segment (SS B=1), so stack operations use
|
|
// the full 32-bit ESP.
|
|
//
|
|
// DS-relative layout (ThunkDataT shared data, base = dataSegBase):
|
|
// [DS:0x00] = target function offset (WORD)
|
|
// [DS:0x02] = target function segment selector (WORD)
|
|
// [DS:0x04] = parameter count in 16-bit words (WORD)
|
|
// [DS:0x06+] = parameters (params[0] pushed first = leftmost param)
|
|
//
|
|
// CS-relative data area (at RELAY_DATA_START, written by 32-bit caller):
|
|
// Configuration (set before each call):
|
|
// stack16_ss - 16-bit stack segment selector
|
|
// stack16_sp - initial SP value (top of stack)
|
|
// ds16 - data segment selector for ThunkDataT
|
|
// Scratch (used by relay during execution):
|
|
// saved_eip_{lo,hi}, saved_cs_{lo,hi} - 32-bit return address
|
|
// saved_ss, saved_esp - caller's SS:ESP
|
|
// saved_ds - caller's DS
|
|
// retval_ax, retval_dx - driver return value
|
|
// ============================================================================
|
|
|
|
#define RELAY_DATA_START 0xC0
|
|
|
|
// Scratch area (used during relay execution)
|
|
#define RELAY_SAVED_EIP_LO (RELAY_DATA_START + 0) // 0xC0
|
|
#define RELAY_SAVED_EIP_HI (RELAY_DATA_START + 2) // 0xC2
|
|
#define RELAY_SAVED_CS_LO (RELAY_DATA_START + 4) // 0xC4
|
|
#define RELAY_SAVED_CS_HI (RELAY_DATA_START + 6) // 0xC6
|
|
#define RELAY_SAVED_SS (RELAY_DATA_START + 8) // 0xC8
|
|
#define RELAY_SAVED_ESP (RELAY_DATA_START + 10) // 0xCA (4 bytes)
|
|
#define RELAY_SAVED_DS (RELAY_DATA_START + 14) // 0xCE
|
|
|
|
// Per-call configuration (written by 32-bit caller before lcall)
|
|
#define RELAY_STACK16_SS (RELAY_DATA_START + 16) // 0xD0
|
|
#define RELAY_STACK16_SP (RELAY_DATA_START + 18) // 0xD2
|
|
#define RELAY_DS16 (RELAY_DATA_START + 20) // 0xD4
|
|
#define RELAY_DS_DGROUP (RELAY_DATA_START + 22) // 0xD6
|
|
#define RELAY_TARGET_ADDR (RELAY_DATA_START + 24) // 0xD8 (4 bytes: off + seg)
|
|
|
|
// Return value storage
|
|
#define RELAY_RETVAL_AX (RELAY_DATA_START + 28) // 0xDC
|
|
#define RELAY_RETVAL_DX (RELAY_DATA_START + 30) // 0xDE
|
|
|
|
// Writable data alias selector (code segments are read-only in PM;
|
|
// this selector has the same base but is a writable data segment).
|
|
// Set once by installRelayCode.
|
|
#define RELAY_SCRATCH_SEL (RELAY_DATA_START + 32) // 0xE0
|
|
#define RELAY_SAVED_EBP (RELAY_DATA_START + 34) // 0xE2 (4 bytes)
|
|
|
|
#define RELAY_DATA_SIZE 38 // Bytes from RELAY_DATA_START
|
|
|
|
// Hand-assembled 16-bit relay thunk.
|
|
// Each instruction is annotated with its offset and encoding.
|
|
//
|
|
// IMPORTANT: x86 code segments are read-only in protected mode. All writes
|
|
// to the scratch data area use ES, which is loaded at entry with a writable
|
|
// data alias selector (RELAY_SCRATCH_SEL) that has the same base as CS.
|
|
// Reads can use either CS or ES since both have the same base address.
|
|
//
|
|
// Segment override prefixes: CS=0x2E, ES=0x26. Operand size prefix: 0x66.
|
|
//
|
|
// Register encoding in ModR/M (reg field):
|
|
// AX/EAX=000, CX/ECX=001, DX/EDX=010, BX/EBX=011
|
|
// SP/ESP=100, BP/EBP=101, SI/ESI=110, DI/EDI=111
|
|
// Segment register encoding (reg field in 8C/8E):
|
|
// ES=000, CS=001, SS=010, DS=011, FS=100, GS=101
|
|
// Addressing mode [disp16]: mod=00, r/m=110
|
|
|
|
static const uint8_t kRelayCode[] = {
|
|
// ---- Load ES with writable data alias of code segment ----
|
|
// 0x00: mov es, [cs:SCRATCH_SEL]
|
|
// 8E /0 [disp16] with CS override
|
|
// ModR/M: mod=00, reg=000(ES), r/m=110([disp16]) = 0x06
|
|
0x2E, 0x8E, 0x06,
|
|
(RELAY_SCRATCH_SEL & 0xFF), (RELAY_SCRATCH_SEL >> 8), // 5 bytes
|
|
|
|
// ---- Save caller's DS (write via ES) ----
|
|
// 0x05: mov [es:SAVED_DS], ds
|
|
0x26, 0x8C, 0x1E,
|
|
(RELAY_SAVED_DS & 0xFF), (RELAY_SAVED_DS >> 8), // 5 bytes
|
|
|
|
// ---- Pop 32-bit return address (4 x 16-bit pops, write via ES) ----
|
|
// The 32-bit lcall pushed 4+4=8 bytes (EIP then CS, each 32-bit).
|
|
// With SS B=1, pop uses ESP and reads 16-bit values.
|
|
|
|
// 0x0A: pop word [es:SAVED_EIP_LO]
|
|
0x26, 0x8F, 0x06,
|
|
(RELAY_SAVED_EIP_LO & 0xFF), (RELAY_SAVED_EIP_LO >> 8), // 5 bytes
|
|
|
|
// 0x0F: pop word [es:SAVED_EIP_HI]
|
|
0x26, 0x8F, 0x06,
|
|
(RELAY_SAVED_EIP_HI & 0xFF), (RELAY_SAVED_EIP_HI >> 8), // 5 bytes
|
|
|
|
// 0x14: pop word [es:SAVED_CS_LO]
|
|
0x26, 0x8F, 0x06,
|
|
(RELAY_SAVED_CS_LO & 0xFF), (RELAY_SAVED_CS_LO >> 8), // 5 bytes
|
|
|
|
// 0x19: pop word [es:SAVED_CS_HI]
|
|
0x26, 0x8F, 0x06,
|
|
(RELAY_SAVED_CS_HI & 0xFF), (RELAY_SAVED_CS_HI >> 8), // 5 bytes
|
|
|
|
// ---- Save caller's SS:ESP (write via ES) ----
|
|
|
|
// 0x1E: mov [es:SAVED_SS], ss
|
|
0x26, 0x8C, 0x16,
|
|
(RELAY_SAVED_SS & 0xFF), (RELAY_SAVED_SS >> 8), // 5 bytes
|
|
|
|
// 0x23: o32 mov [es:SAVED_ESP], esp
|
|
// 66 prefix, ES override, 89 /4 [disp16]
|
|
// ModR/M: mod=00, reg=100(ESP), r/m=110([disp16]) = 0x26
|
|
0x66, 0x26, 0x89, 0x26,
|
|
(RELAY_SAVED_ESP & 0xFF), (RELAY_SAVED_ESP >> 8), // 6 bytes
|
|
|
|
// ---- Save caller's 32-bit EBP (write via ES) ----
|
|
// The XOR block below zeroes EBP for safe 16-bit execution, but the
|
|
// 32-bit caller uses EBP as its frame pointer. Must save/restore it.
|
|
|
|
// 0x29: o32 mov [es:SAVED_EBP], ebp
|
|
// ModR/M: mod=00, reg=101(EBP), r/m=110([disp16]) = 0x2E
|
|
0x66, 0x26, 0x89, 0x2E,
|
|
(RELAY_SAVED_EBP & 0xFF), (RELAY_SAVED_EBP >> 8), // 6 bytes
|
|
|
|
// ---- Load DS with 16-bit data segment (read from CS, OK) ----
|
|
|
|
// 0x2F: mov ds, [cs:DS16]
|
|
0x2E, 0x8E, 0x1E,
|
|
(RELAY_DS16 & 0xFF), (RELAY_DS16 >> 8), // 5 bytes
|
|
|
|
// ---- Switch to 16-bit stack (reads from CS, OK) ----
|
|
|
|
// 0x34: cli
|
|
0xFA, // 1 byte
|
|
|
|
// 0x35: mov ss, [cs:STACK16_SS]
|
|
0x2E, 0x8E, 0x16,
|
|
(RELAY_STACK16_SS & 0xFF), (RELAY_STACK16_SS >> 8), // 5 bytes
|
|
|
|
// 0x3A: o32 xor esp, esp
|
|
// Zero upper 16 bits of ESP. CWSDPMI uses 32-bit interrupt gates,
|
|
// so the CPU uses full ESP when pushing interrupt frames. Without
|
|
// this, stale upper bits from the 32-bit stack cause corruption.
|
|
0x66, 0x31, 0xE4, // 3 bytes
|
|
|
|
// 0x3D: mov sp, [cs:STACK16_SP]
|
|
0x2E, 0x8B, 0x26,
|
|
(RELAY_STACK16_SP & 0xFF), (RELAY_STACK16_SP >> 8), // 5 bytes
|
|
|
|
// 0x42: sti (re-enable interrupts now that stack switch is complete)
|
|
0xFB, // 1 byte
|
|
|
|
// ---- Push parameters from DS onto 16-bit stack ----
|
|
// CX = param count, BX = byte offset into params array.
|
|
// Push params[0] first (leftmost, goes deepest = Pascal convention).
|
|
|
|
// 0x43: mov cx, [ds:0x0004]
|
|
0x8B, 0x0E, 0x04, 0x00, // 4 bytes
|
|
|
|
// 0x47: xor bx, bx
|
|
0x31, 0xDB, // 2 bytes
|
|
|
|
// 0x49: test cx, cx
|
|
0x85, 0xC9, // 2 bytes
|
|
|
|
// 0x4B: jz +9 -> 0x56 (skip to DGROUP load if no params)
|
|
// IP after jz = 0x4D, target = 0x4D + 9 = 0x56
|
|
0x74, 0x09, // 2 bytes
|
|
|
|
// 0x4D: push word [bx+0x0006]
|
|
// FF /6 [BX+disp16]
|
|
// ModR/M: mod=10, reg=110(/6=push), r/m=111(BX) = 0xB7
|
|
0xFF, 0xB7, 0x06, 0x00, // 4 bytes
|
|
|
|
// 0x51: add bx, 2
|
|
0x83, 0xC3, 0x02, // 3 bytes
|
|
|
|
// 0x54: loop -> 0x49
|
|
// IP after loop = 0x56, relative = 0x49 - 0x56 = -13 = 0xF3
|
|
0xE2, 0xF3, // 2 bytes
|
|
|
|
// ---- Load driver's DGROUP into DS and ES, then far call via CS config ----
|
|
// The target address was written to CS:RELAY_TARGET_ADDR by the 32-bit
|
|
// caller. We load DS=DGROUP so the driver runs with its own data segment.
|
|
// ES is also set to DGROUP since many Win3.x drivers assume ES=DS on entry.
|
|
|
|
// 0x56: mov ds, [cs:DS_DGROUP]
|
|
0x2E, 0x8E, 0x1E,
|
|
(RELAY_DS_DGROUP & 0xFF), (RELAY_DS_DGROUP >> 8), // 5 bytes
|
|
|
|
// 0x5B: push ds
|
|
0x1E, // 1 byte
|
|
|
|
// 0x5C: pop es (ES = DS = DGROUP)
|
|
0x07, // 1 byte
|
|
|
|
// ---- Zero upper 16 bits of all GP registers ----
|
|
// DJGPP 32-bit code leaves stale values in the upper halves.
|
|
// 16-bit driver code using 67h prefix (32-bit addressing) would
|
|
// pick up these stale bits, causing accesses outside segment limits.
|
|
// EBP was saved earlier (at 0x29) so the 32-bit caller can recover it.
|
|
|
|
// 0x5D: o32 xor eax, eax
|
|
0x66, 0x31, 0xC0, // 3 bytes
|
|
// 0x60: o32 xor ebx, ebx
|
|
0x66, 0x31, 0xDB, // 3 bytes
|
|
// 0x63: o32 xor ecx, ecx
|
|
0x66, 0x31, 0xC9, // 3 bytes
|
|
// 0x66: o32 xor edx, edx
|
|
0x66, 0x31, 0xD2, // 3 bytes
|
|
// 0x69: o32 xor ebp, ebp
|
|
0x66, 0x31, 0xED, // 3 bytes
|
|
// 0x6C: o32 xor esi, esi
|
|
0x66, 0x31, 0xF6, // 3 bytes
|
|
// 0x6F: o32 xor edi, edi
|
|
0x66, 0x31, 0xFF, // 3 bytes
|
|
|
|
// ---- Far call to driver function via CS config ----
|
|
|
|
// 0x72: call far [cs:TARGET_ADDR]
|
|
// CS override (2E), FF /3 [disp16]
|
|
// ModR/M: mod=00, reg=011(/3=call far), r/m=110([disp16]) = 0x1E
|
|
0x2E, 0xFF, 0x1E,
|
|
(RELAY_TARGET_ADDR & 0xFF), (RELAY_TARGET_ADDR >> 8), // 5 bytes
|
|
|
|
// ---- Reload ES (driver may have clobbered it) ----
|
|
|
|
// 0x77: mov es, [cs:SCRATCH_SEL]
|
|
0x2E, 0x8E, 0x06,
|
|
(RELAY_SCRATCH_SEL & 0xFF), (RELAY_SCRATCH_SEL >> 8), // 5 bytes
|
|
|
|
// ---- Save return value (DX:AX) via ES ----
|
|
|
|
// 0x7C: mov [es:RETVAL_AX], ax
|
|
0x26, 0xA3,
|
|
(RELAY_RETVAL_AX & 0xFF), (RELAY_RETVAL_AX >> 8), // 4 bytes
|
|
|
|
// 0x80: mov [es:RETVAL_DX], dx
|
|
0x26, 0x89, 0x16,
|
|
(RELAY_RETVAL_DX & 0xFF), (RELAY_RETVAL_DX >> 8), // 5 bytes
|
|
|
|
// ---- Restore caller's 32-bit EBP (read from CS, OK) ----
|
|
|
|
// 0x85: o32 mov ebp, [cs:SAVED_EBP]
|
|
// ModR/M: mod=00, reg=101(EBP), r/m=110([disp16]) = 0x2E
|
|
0x66, 0x2E, 0x8B, 0x2E,
|
|
(RELAY_SAVED_EBP & 0xFF), (RELAY_SAVED_EBP >> 8), // 6 bytes
|
|
|
|
// ---- Restore caller's SS:ESP (reads from CS, OK) ----
|
|
|
|
// 0x8B: cli
|
|
0xFA, // 1 byte
|
|
|
|
// 0x8C: mov ss, [cs:SAVED_SS]
|
|
0x2E, 0x8E, 0x16,
|
|
(RELAY_SAVED_SS & 0xFF), (RELAY_SAVED_SS >> 8), // 5 bytes
|
|
|
|
// 0x91: o32 mov esp, [cs:SAVED_ESP]
|
|
0x66, 0x2E, 0x8B, 0x26,
|
|
(RELAY_SAVED_ESP & 0xFF), (RELAY_SAVED_ESP >> 8), // 6 bytes
|
|
|
|
// 0x97: sti (re-enable interrupts now that stack restore is complete)
|
|
0xFB, // 1 byte
|
|
|
|
// ---- Restore caller's DS (read from CS, OK) ----
|
|
|
|
// 0x98: mov ds, [cs:SAVED_DS]
|
|
0x2E, 0x8E, 0x1E,
|
|
(RELAY_SAVED_DS & 0xFF), (RELAY_SAVED_DS >> 8), // 5 bytes
|
|
|
|
// ---- Push 32-bit return address back onto 32-bit stack ----
|
|
// Order: CS_HI deepest, EIP_LO on top, so o32 retf reads them correctly.
|
|
|
|
// 0x9D: push word [cs:SAVED_CS_HI]
|
|
0x2E, 0xFF, 0x36,
|
|
(RELAY_SAVED_CS_HI & 0xFF), (RELAY_SAVED_CS_HI >> 8), // 5 bytes
|
|
|
|
// 0xA2: push word [cs:SAVED_CS_LO]
|
|
0x2E, 0xFF, 0x36,
|
|
(RELAY_SAVED_CS_LO & 0xFF), (RELAY_SAVED_CS_LO >> 8), // 5 bytes
|
|
|
|
// 0xA7: push word [cs:SAVED_EIP_HI]
|
|
0x2E, 0xFF, 0x36,
|
|
(RELAY_SAVED_EIP_HI & 0xFF), (RELAY_SAVED_EIP_HI >> 8), // 5 bytes
|
|
|
|
// 0xAC: push word [cs:SAVED_EIP_LO]
|
|
0x2E, 0xFF, 0x36,
|
|
(RELAY_SAVED_EIP_LO & 0xFF), (RELAY_SAVED_EIP_LO >> 8), // 5 bytes
|
|
|
|
// ---- Restore return value (reads from CS, OK) ----
|
|
|
|
// 0xB1: mov ax, [cs:RETVAL_AX]
|
|
0x2E, 0xA1,
|
|
(RELAY_RETVAL_AX & 0xFF), (RELAY_RETVAL_AX >> 8), // 4 bytes
|
|
|
|
// 0xB5: mov dx, [cs:RETVAL_DX]
|
|
0x2E, 0x8B, 0x16,
|
|
(RELAY_RETVAL_DX & 0xFF), (RELAY_RETVAL_DX >> 8), // 5 bytes
|
|
|
|
// ---- 32-bit far return ----
|
|
|
|
// 0xBA: o32 retf
|
|
0x66, 0xCB, // 2 bytes
|
|
|
|
// Code ends at 0xBC. Padding to RELAY_DATA_START (0xC0).
|
|
0x90, 0x90, 0x90, 0x90, // 4 NOP
|
|
|
|
// ---- Data area (at offset 0xC0) ----
|
|
// Scratch: saved_eip(4), saved_cs(4), saved_ss(2), saved_esp(4),
|
|
// saved_ds(2)
|
|
// Per-call: stack16_ss(2), stack16_sp(2), ds16(2), ds_dgroup(2),
|
|
// target_addr(4)
|
|
// Return: retval_ax(2), retval_dx(2)
|
|
// Once: scratch_sel(2)
|
|
// Caller: saved_ebp(4)
|
|
// Total: 38 bytes
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
};
|
|
|
|
#define RELAY_CODE_SIZE sizeof(kRelayCode)
|
|
|
|
// ============================================================================
|
|
// 16-to-32 callback mechanism
|
|
//
|
|
// For Windows API stubs that the driver calls, we create small 16-bit code
|
|
// snippets that use a software interrupt (INT 0x66, chosen to avoid
|
|
// conflicts) to transfer control to a 32-bit DPMI handler. The handler
|
|
// looks up the callback by its slot index (passed in BX) and invokes the
|
|
// registered C function.
|
|
//
|
|
// Each callback stub looks like:
|
|
// push bx ; 53 (1 byte) - save BX
|
|
// mov bx, <slot_index> ; BB xx xx (3 bytes)
|
|
// int 0x66 ; CD 66 (2 bytes)
|
|
// pop bx ; 5B (1 byte) - restore BX
|
|
// retf <param_bytes> ; CA xx xx (3 bytes) - Pascal callee cleanup
|
|
// Total: 10 bytes per stub
|
|
// ============================================================================
|
|
|
|
#define CALLBACK_STUB_SIZE 10
|
|
#define CALLBACK_INT_NUM 0x66
|
|
|
|
// Callback registry
|
|
static ThunkCallbackT gCallbacks[THUNK_MAX_CALLBACKS];
|
|
static uint16_t gCallbackParamWords[THUNK_MAX_CALLBACKS];
|
|
static uint16_t gCallbackCount = 0;
|
|
static ThunkContextT *gCallbackCtx = NULL;
|
|
|
|
// DPMI interrupt handler for callback dispatching
|
|
static __dpmi_paddr gOldCbVec;
|
|
static volatile bool gHandlerInstalled = false;
|
|
static bool gThunkDebug = false;
|
|
|
|
// Diagnostic: monitor 3 bytes at gDiagWatchSel:gDiagWatchOff for corruption.
|
|
// Set gDiagWatchSel nonzero to enable. Logs when bytes change.
|
|
static uint16_t gDiagWatchSel = 0;
|
|
static uint32_t gDiagWatchOff = 0;
|
|
static uint8_t gDiagWatchBytes[3] = {0};
|
|
|
|
// Shared area for passing parameters from the interrupt handler
|
|
static uint16_t gCbParams[THUNK_MAX_PARAMS];
|
|
static uint32_t gCbRetVal;
|
|
|
|
// Saved register frame for the raw callback handler (same layout as INT 10h).
|
|
typedef struct __attribute__((packed)) {
|
|
uint32_t edi; // +0
|
|
uint32_t esi; // +4
|
|
uint32_t ebp; // +8
|
|
uint32_t _reserved; // +12
|
|
uint32_t ebx; // +16
|
|
uint32_t edx; // +20
|
|
uint32_t ecx; // +24
|
|
uint32_t eax; // +28
|
|
uint32_t es; // +32
|
|
uint32_t ds; // +36
|
|
uint32_t eip; // +40
|
|
uint32_t cs; // +44
|
|
uint32_t eflags; // +48
|
|
} CbFrameT;
|
|
|
|
// Globals for the raw callback handler assembly stub.
|
|
// Non-static so the asm symbols are accessible.
|
|
uint16_t gCbDsSel;
|
|
uint32_t gCbSavedSS;
|
|
uint32_t gCbSavedESP;
|
|
uint32_t gCbSavedGS;
|
|
uint32_t gCbSavedFS;
|
|
uint32_t gCbDgroupSel;
|
|
CbFrameT gCbFrame;
|
|
uint8_t gCbStack[16384] __attribute__((aligned(16)));
|
|
uint32_t gCbStackTop;
|
|
|
|
// Worker function called from the assembly stub.
|
|
// Diagnostic: set by cbIntWorker to prove it was called
|
|
volatile uint32_t gCbWorkerCalled = 0;
|
|
volatile uint32_t gCbWorkerLastSS = 0;
|
|
volatile uint32_t gCbWorkerLastESP = 0;
|
|
volatile uint32_t gCbWorkerLastSlot = 0xDEAD;
|
|
|
|
void cbIntWorker(CbFrameT *frame)
|
|
{
|
|
gCbWorkerCalled++;
|
|
gCbWorkerLastSS = gCbSavedSS;
|
|
gCbWorkerLastESP = gCbSavedESP;
|
|
gCbWorkerLastSlot = (uint16_t)frame->ebx;
|
|
|
|
uint16_t slot = (uint16_t)frame->ebx;
|
|
|
|
if (slot >= gCallbackCount || !gCallbacks[slot]) {
|
|
frame->eax = (frame->eax & 0xFFFF0000);
|
|
frame->edx = (frame->edx & 0xFFFF0000);
|
|
return;
|
|
}
|
|
|
|
// The driver far-called our stub, then the stub did push bx; INT 0x66.
|
|
// CWSDPMI pushed an IRET frame on the interrupted stack before
|
|
// dispatching to us. The driver's parameters are above the IRET
|
|
// frame, saved BX, and far return address on the interrupted stack.
|
|
//
|
|
// Stack layout from savedESP upward:
|
|
// [+0..11] = IRET frame (EIP, CS, EFLAGS - 32-bit, 12 bytes)
|
|
// [+12..13] = saved BX (from push bx in callback stub)
|
|
// [+14..15] = return IP (from driver's far call to stub)
|
|
// [+16..17] = return CS
|
|
// [+18..] = parameters (rightmost/last in Pascal at lowest addr)
|
|
|
|
uint16_t paramWords = gCallbackParamWords[slot];
|
|
uint16_t origSS = (uint16_t)gCbSavedSS;
|
|
uint32_t origESP = gCbSavedESP;
|
|
|
|
if (paramWords > 0 && paramWords <= THUNK_MAX_PARAMS) {
|
|
uint32_t paramOffset = origESP + 18;
|
|
movedata(origSS, paramOffset,
|
|
_my_ds(), (unsigned)gCbParams,
|
|
paramWords * 2);
|
|
|
|
// Reverse so gCbParams[0] = leftmost param (Pascal declaration order).
|
|
for (uint16_t i = 0; i < paramWords / 2; i++) {
|
|
uint16_t tmp = gCbParams[i];
|
|
gCbParams[i] = gCbParams[paramWords - 1 - i];
|
|
gCbParams[paramWords - 1 - i] = tmp;
|
|
}
|
|
}
|
|
|
|
// Read return address (IP:CS) from the 16-bit stack above the IRET frame
|
|
// (+12 = saved BX, +14 = retIP, +16 = retCS)
|
|
uint16_t retIP = 0;
|
|
uint16_t retCS = 0;
|
|
movedata(origSS, origESP + 14, _my_ds(), (unsigned)&retIP, 2);
|
|
movedata(origSS, origESP + 16, _my_ds(), (unsigned)&retCS, 2);
|
|
|
|
// Calculate driver's SP after retf N cleanup:
|
|
// IRET(12) + saved_bx(2) + retaddr(4) + params(N*2)
|
|
uint32_t driverSP = origESP + 18 + paramWords * 2;
|
|
|
|
if (gThunkDebug) {
|
|
logErr("CB[%u] %u words:", slot, paramWords);
|
|
for (uint16_t i = 0; i < paramWords && i < 6; i++) {
|
|
logErr(" %04X", gCbParams[i]);
|
|
}
|
|
logErr(" ret=%04X:%04X SP=%04" PRIX32 " BP=%04X ESP32=%08" PRIX32 "\n",
|
|
retCS, retIP, driverSP, (uint16_t)frame->ebp, origESP);
|
|
fflush(stderr);
|
|
}
|
|
|
|
// Diagnostic: check BEFORE callback dispatch
|
|
if (gDiagWatchSel != 0) {
|
|
uint8_t b0 = _farpeekb(gDiagWatchSel, gDiagWatchOff);
|
|
if (b0 != gDiagWatchBytes[0]) {
|
|
logErr("WATCH-PRE: %04X:%08" PRIX32 " changed %02X->%02X before CB[%u]\n",
|
|
gDiagWatchSel, gDiagWatchOff,
|
|
gDiagWatchBytes[0], b0, slot);
|
|
gDiagWatchBytes[0] = b0;
|
|
}
|
|
}
|
|
|
|
gCbRetVal = gCallbacks[slot](gCbParams, paramWords);
|
|
fflush(stderr);
|
|
|
|
// Set return value in DX:AX
|
|
frame->eax = (frame->eax & 0xFFFF0000) | (gCbRetVal & 0xFFFF);
|
|
frame->edx = (frame->edx & 0xFFFF0000) | (gCbRetVal >> 16);
|
|
|
|
// Diagnostic: check AFTER callback dispatch
|
|
if (gDiagWatchSel != 0) {
|
|
uint8_t b0 = _farpeekb(gDiagWatchSel, gDiagWatchOff);
|
|
uint8_t b1 = _farpeekb(gDiagWatchSel, gDiagWatchOff + 1);
|
|
uint8_t b2 = _farpeekb(gDiagWatchSel, gDiagWatchOff + 2);
|
|
if (b0 != gDiagWatchBytes[0] || b1 != gDiagWatchBytes[1] || b2 != gDiagWatchBytes[2]) {
|
|
logErr("WATCH-POST: %04X:%08" PRIX32 " changed %02X %02X %02X->%02X %02X %02X after CB[%u]\n",
|
|
gDiagWatchSel, gDiagWatchOff,
|
|
gDiagWatchBytes[0], gDiagWatchBytes[1], gDiagWatchBytes[2],
|
|
b0, b1, b2, slot);
|
|
gDiagWatchBytes[0] = b0;
|
|
gDiagWatchBytes[1] = b1;
|
|
gDiagWatchBytes[2] = b2;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Defined in the file-scope asm block below
|
|
extern void cbIntRawHandler(void);
|
|
|
|
// Raw callback interrupt handler.
|
|
//
|
|
// Key insight: in x86 protected mode, code segments are NOT writable.
|
|
// We use FS (loaded with our DS selector) for all writes. CS-relative
|
|
// reads are fine (readable code segment).
|
|
__asm__(
|
|
" .text\n"
|
|
" .p2align 4\n"
|
|
" .globl _cbIntRawHandler\n"
|
|
"_cbIntRawHandler:\n"
|
|
|
|
// Save original FS (may be DGROUP in driver context), then load
|
|
// FS with our writable DS selector so we can access C globals.
|
|
" pushl %eax\n"
|
|
" pushl %ecx\n"
|
|
" xorl %eax, %eax\n"
|
|
" movw %fs, %ax\n"
|
|
" movw %cs:_gCbDsSel, %cx\n"
|
|
" movw %cx, %fs\n"
|
|
" movl %eax, %fs:_gCbSavedFS\n"
|
|
|
|
// Diagnostic: increment gCbWorkerCalled via FS to prove entry
|
|
" movl %fs:_gCbWorkerCalled, %eax\n"
|
|
" incl %eax\n"
|
|
" movl %eax, %fs:_gCbWorkerCalled\n"
|
|
|
|
" popl %ecx\n"
|
|
" popl %eax\n"
|
|
|
|
// Save GP registers via FS (writable data segment)
|
|
" movl %eax, %fs:_gCbFrame+28\n"
|
|
" movl %ecx, %fs:_gCbFrame+24\n"
|
|
" movl %edx, %fs:_gCbFrame+20\n"
|
|
" movl %ebx, %fs:_gCbFrame+16\n"
|
|
" movl %ebp, %fs:_gCbFrame+8\n"
|
|
" movl %esi, %fs:_gCbFrame+4\n"
|
|
" movl %edi, %fs:_gCbFrame+0\n"
|
|
|
|
// Save segment registers (ES, DS, and GS)
|
|
" xorl %eax, %eax\n"
|
|
" movw %es, %ax\n"
|
|
" movl %eax, %fs:_gCbFrame+32\n"
|
|
" movw %ds, %ax\n"
|
|
" movl %eax, %fs:_gCbFrame+36\n"
|
|
" movw %gs, %ax\n"
|
|
" movl %eax, %fs:_gCbSavedGS\n"
|
|
|
|
// Save IRET frame from interrupted stack
|
|
" movl (%esp), %eax\n"
|
|
" movl %eax, %fs:_gCbFrame+40\n"
|
|
" movl 4(%esp), %eax\n"
|
|
" movl %eax, %fs:_gCbFrame+44\n"
|
|
" movl 8(%esp), %eax\n"
|
|
" movl %eax, %fs:_gCbFrame+48\n"
|
|
|
|
// Save interrupted SS:ESP
|
|
" movl %esp, %fs:_gCbSavedESP\n"
|
|
" xorl %eax, %eax\n"
|
|
" movw %ss, %ax\n"
|
|
" movl %eax, %fs:_gCbSavedSS\n"
|
|
|
|
// Switch to our handler stack (DS/ES/SS = our DS, ESP = handler stack)
|
|
" movw %fs:_gCbDsSel, %ax\n"
|
|
" movw %ax, %ds\n"
|
|
" movw %ax, %es\n"
|
|
" movw %ax, %ss\n"
|
|
" movl _gCbStackTop, %esp\n"
|
|
|
|
// Call C worker
|
|
" leal _gCbFrame, %eax\n"
|
|
" pushl %eax\n"
|
|
" call _cbIntWorker\n"
|
|
" addl $4, %esp\n"
|
|
|
|
// Restore interrupted SS:ESP (reads via CS are allowed)
|
|
" movl %cs:_gCbSavedESP, %eax\n"
|
|
" movl %cs:_gCbSavedSS, %ecx\n"
|
|
" movw %cx, %ss\n"
|
|
" movl %eax, %esp\n"
|
|
|
|
// ---- EFLAGS writeback SKIPPED (writes to original stack may fault) ----
|
|
// cbIntWorker does not modify EFLAGS, so this is safe to skip.
|
|
|
|
// Restore GP registers (reads via CS)
|
|
" movl %cs:_gCbFrame+0, %edi\n"
|
|
" movl %cs:_gCbFrame+4, %esi\n"
|
|
" movl %cs:_gCbFrame+8, %ebp\n"
|
|
" movl %cs:_gCbFrame+16, %ebx\n"
|
|
" movl %cs:_gCbFrame+20, %edx\n"
|
|
" movl %cs:_gCbFrame+24, %ecx\n"
|
|
|
|
// Restore segment registers (FS, GS, ES, DS)
|
|
// Always set FS and GS to DGROUP (not saved values) because the
|
|
// DPMI host may modify FS/GS when dispatching interrupts.
|
|
" movl %cs:_gCbDgroupSel, %eax\n"
|
|
" movw %ax, %fs\n"
|
|
" movw %ax, %gs\n"
|
|
" movl %cs:_gCbFrame+32, %eax\n"
|
|
" movw %ax, %es\n"
|
|
" movl %cs:_gCbFrame+36, %eax\n"
|
|
" movw %ax, %ds\n"
|
|
|
|
// Restore EAX last
|
|
" movl %cs:_gCbFrame+28, %eax\n"
|
|
|
|
" iret\n"
|
|
);
|
|
|
|
|
|
// ============================================================================
|
|
// Public functions
|
|
// ============================================================================
|
|
|
|
void thunkSanitizeCbFrame(uint16_t freedSel)
|
|
{
|
|
if ((uint16_t)gCbFrame.es == freedSel) {
|
|
gCbFrame.es = 0;
|
|
}
|
|
if ((uint16_t)gCbFrame.ds == freedSel) {
|
|
gCbFrame.ds = 0;
|
|
}
|
|
}
|
|
|
|
|
|
void thunkSetDebug(bool debug)
|
|
{
|
|
gThunkDebug = debug;
|
|
}
|
|
|
|
|
|
void thunkSetWatch(uint16_t sel, uint32_t off)
|
|
{
|
|
gDiagWatchSel = sel;
|
|
gDiagWatchOff = off;
|
|
if (sel != 0) {
|
|
gDiagWatchBytes[0] = _farpeekb(sel, off);
|
|
gDiagWatchBytes[1] = _farpeekb(sel, off + 1);
|
|
gDiagWatchBytes[2] = _farpeekb(sel, off + 2);
|
|
logErr("WATCH: set %04X:%08" PRIX32 " = %02X %02X %02X\n",
|
|
sel, off, gDiagWatchBytes[0], gDiagWatchBytes[1], gDiagWatchBytes[2]);
|
|
}
|
|
}
|
|
|
|
|
|
bool thunkInit(ThunkContextT *ctx)
|
|
{
|
|
memset(ctx, 0, sizeof(ThunkContextT));
|
|
|
|
// Allocate conventional (DOS) memory for all 16-bit segments.
|
|
//
|
|
// Layout in DOS memory block:
|
|
// Offset 0x0000: Relay code (256 bytes, includes CS-relative data)
|
|
// Offset 0x0100: Callback stubs (THUNK_MAX_CALLBACKS * 10 = 1280 bytes)
|
|
// Offset 0x0600: Shared data area / ThunkDataT (256 bytes)
|
|
// Offset 0x0700: 16-bit stack (8192 bytes)
|
|
// Offset 0x2700: (end)
|
|
//
|
|
// Total: 0x2700 = 9984 bytes = 624 paragraphs
|
|
|
|
uint32_t relayOff = 0x0000;
|
|
uint32_t callbackOff = 0x0100;
|
|
uint32_t dataOff = 0x0600;
|
|
uint32_t stackOff = 0x0700;
|
|
uint32_t totalSize = 0x2700;
|
|
uint16_t paragraphs = (totalSize + 15) / 16;
|
|
|
|
int dosSel;
|
|
int dosSeg = __dpmi_allocate_dos_memory(paragraphs, &dosSel);
|
|
if (dosSeg < 0) {
|
|
logErr("thunk: failed to allocate %" PRIu32 " bytes of DOS memory\n", totalSize);
|
|
return false;
|
|
}
|
|
|
|
ctx->dosMemSeg = dosSeg;
|
|
ctx->dosMemSel = dosSel;
|
|
ctx->dosMemSize = totalSize;
|
|
|
|
uint32_t dosBase = (uint32_t)dosSeg * 16;
|
|
|
|
logErr("thunk: DOS mem at 0x%05" PRIX32 "-0x%05" PRIX32,
|
|
dosBase, dosBase + totalSize - 1);
|
|
if (0x8134 >= dosBase && 0x8134 < dosBase + totalSize) {
|
|
logErr(" ** 0x8134 INSIDE thunk block at offset 0x%04" PRIX32 " **",
|
|
(uint32_t)(0x8134 - dosBase));
|
|
}
|
|
logErr("\n");
|
|
|
|
// Zero the entire area
|
|
{
|
|
uint8_t zeroBuf[256];
|
|
memset(zeroBuf, 0, sizeof(zeroBuf));
|
|
for (uint32_t off = 0; off < totalSize; off += 256) {
|
|
uint32_t chunk = totalSize - off;
|
|
if (chunk > 256) {
|
|
chunk = 256;
|
|
}
|
|
dosmemput(zeroBuf, chunk, dosBase + off);
|
|
}
|
|
}
|
|
|
|
// Create 16-bit code segment descriptor for relay + callbacks
|
|
ctx->relayCodeBase = dosBase + relayOff;
|
|
ctx->relayCodeSize = callbackOff + THUNK_MAX_CALLBACKS * CALLBACK_STUB_SIZE;
|
|
ctx->relayCodeSel = allocDescriptor16(ctx->relayCodeBase,
|
|
ctx->relayCodeSize - 1, true);
|
|
if (ctx->relayCodeSel == 0) {
|
|
logErr("thunk: failed to create relay code segment\n");
|
|
goto fail;
|
|
}
|
|
|
|
// Create 16-bit data segment descriptor for shared data (ThunkDataT)
|
|
ctx->dataSegBase = dosBase + dataOff;
|
|
ctx->dataSegSize = 256;
|
|
ctx->dataSegSel = allocDescriptor16(ctx->dataSegBase,
|
|
ctx->dataSegSize - 1, false);
|
|
if (ctx->dataSegSel == 0) {
|
|
logErr("thunk: failed to create data segment\n");
|
|
goto fail;
|
|
}
|
|
|
|
// Create 16-bit stack segment descriptor
|
|
ctx->stackBase = dosBase + stackOff;
|
|
ctx->stackSize = totalSize - stackOff;
|
|
ctx->stackSel = allocDescriptor16(ctx->stackBase,
|
|
ctx->stackSize - 1, false);
|
|
if (ctx->stackSel == 0) {
|
|
logErr("thunk: failed to create stack segment\n");
|
|
goto fail;
|
|
}
|
|
|
|
// Install the relay code into the code segment area
|
|
if (!installRelayCode(ctx)) {
|
|
goto fail;
|
|
}
|
|
|
|
// Install the interrupt handler for 16-to-32 callbacks
|
|
gCallbackCtx = ctx;
|
|
gCallbackCount = 0;
|
|
memset(gCallbacks, 0, sizeof(gCallbacks));
|
|
|
|
gCbDsSel = _my_ds();
|
|
gCbStackTop = (uint32_t)gCbStack + sizeof(gCbStack);
|
|
|
|
__dpmi_get_protected_mode_interrupt_vector(CALLBACK_INT_NUM, &gOldCbVec);
|
|
|
|
__dpmi_paddr newVec;
|
|
newVec.offset32 = (unsigned long)cbIntRawHandler;
|
|
newVec.selector = _my_cs();
|
|
if (__dpmi_set_protected_mode_interrupt_vector(CALLBACK_INT_NUM, &newVec) != 0) {
|
|
logErr("thunk: failed to install callback interrupt handler\n");
|
|
goto fail;
|
|
}
|
|
gHandlerInstalled = true;
|
|
|
|
ctx->initialized = true;
|
|
return true;
|
|
|
|
fail:
|
|
thunkShutdown(ctx);
|
|
return false;
|
|
}
|
|
|
|
|
|
void thunkShutdown(ThunkContextT *ctx)
|
|
{
|
|
// Restore interrupt handler
|
|
if (gHandlerInstalled) {
|
|
__dpmi_set_protected_mode_interrupt_vector(CALLBACK_INT_NUM, &gOldCbVec);
|
|
gHandlerInstalled = false;
|
|
}
|
|
|
|
// Free descriptors
|
|
if (ctx->relayCodeSel) {
|
|
__dpmi_free_ldt_descriptor(ctx->relayCodeSel);
|
|
ctx->relayCodeSel = 0;
|
|
}
|
|
if (ctx->dataSegSel) {
|
|
__dpmi_free_ldt_descriptor(ctx->dataSegSel);
|
|
ctx->dataSegSel = 0;
|
|
}
|
|
if (ctx->stackSel) {
|
|
__dpmi_free_ldt_descriptor(ctx->stackSel);
|
|
ctx->stackSel = 0;
|
|
}
|
|
|
|
// Free DOS memory
|
|
if (ctx->dosMemSel) {
|
|
__dpmi_free_dos_memory(ctx->dosMemSel);
|
|
ctx->dosMemSeg = 0;
|
|
ctx->dosMemSel = 0;
|
|
ctx->dosMemSize = 0;
|
|
}
|
|
|
|
gCallbackCtx = NULL;
|
|
gCallbackCount = 0;
|
|
ctx->initialized = false;
|
|
}
|
|
|
|
|
|
uint32_t thunkCall16(ThunkContextT *ctx, uint16_t targetSel, uint16_t targetOff,
|
|
const uint16_t *params, uint16_t paramCount)
|
|
{
|
|
if (!ctx->initialized) {
|
|
logErr("thunk: not initialized\n");
|
|
return 0;
|
|
}
|
|
if (paramCount > THUNK_MAX_PARAMS) {
|
|
logErr("thunk: too many parameters (%u)\n", paramCount);
|
|
return 0;
|
|
}
|
|
|
|
// Build the ThunkDataT in the shared data segment (DOS memory)
|
|
ThunkDataT td;
|
|
td.targetOff = targetOff;
|
|
td.targetSeg = targetSel;
|
|
td.paramCount = paramCount;
|
|
if (paramCount > 0) {
|
|
memcpy(td.params, params, paramCount * 2);
|
|
}
|
|
dosmemput(&td, 6 + paramCount * 2, ctx->dataSegBase);
|
|
|
|
// Write relay configuration to the CS-relative data area.
|
|
// The relay reads stack/DS/DGROUP/target config from here.
|
|
struct __attribute__((packed)) {
|
|
uint16_t stack16Ss;
|
|
uint16_t stack16Sp;
|
|
uint16_t ds16;
|
|
uint16_t dgroupSel;
|
|
uint16_t targetOff;
|
|
uint16_t targetSeg;
|
|
} relayConfig;
|
|
// Windows 3.x drivers assume SS == DS == DGROUP. Some drivers
|
|
// (VBESVGA BBLT.ASM) do PrestoChangeoSelector(SS, WorkSelector) to
|
|
// create a code alias of DGROUP, then retf into compiled blit code
|
|
// stored in the data segment. If SS != DGROUP, the code alias has
|
|
// the wrong base and the CPU executes garbage, corrupting memory.
|
|
// When DGROUP is available, use it as SS with SP near the top of the
|
|
// 64K segment (stack grows downward).
|
|
if (ctx->dgroupSel) {
|
|
relayConfig.stack16Ss = ctx->dgroupSel;
|
|
relayConfig.stack16Sp = 0xFFF0; // Top of 64K DGROUP, 16-byte aligned
|
|
} else {
|
|
relayConfig.stack16Ss = ctx->stackSel;
|
|
relayConfig.stack16Sp = ctx->stackSize;
|
|
}
|
|
relayConfig.ds16 = ctx->dataSegSel;
|
|
relayConfig.dgroupSel = ctx->dgroupSel ? ctx->dgroupSel : ctx->dataSegSel;
|
|
relayConfig.targetOff = targetOff;
|
|
relayConfig.targetSeg = targetSel;
|
|
dosmemput(&relayConfig, sizeof(relayConfig),
|
|
ctx->relayCodeBase + RELAY_STACK16_SS);
|
|
|
|
// Build the 48-bit far pointer for lcall: 32-bit offset + 16-bit selector.
|
|
// Relay entry point is at offset 0 in the code segment.
|
|
struct __attribute__((packed)) {
|
|
uint32_t offset;
|
|
uint16_t selector;
|
|
} farTarget;
|
|
farTarget.offset = 0;
|
|
farTarget.selector = ctx->relayCodeSel;
|
|
|
|
// Far-call to the 16-bit relay. The relay handles everything:
|
|
// DS/SS switching, parameter pushing, calling the driver, and returning.
|
|
// The 32-bit side just does the lcall and collects the result.
|
|
uint32_t result;
|
|
|
|
// The S3 driver uses GS and FS segment overrides (0x65/0x64 prefixes)
|
|
// to access DGROUP data. The relay sets DS and ES to DGROUP but not
|
|
// GS or FS, so we must pre-load both with the DGROUP selector.
|
|
// Also store it for the callback handler to restore on exit.
|
|
uint16_t dgroupSel = relayConfig.dgroupSel;
|
|
gCbDgroupSel = dgroupSel;
|
|
|
|
// Diagnostic: check watched byte before lcall
|
|
if (gDiagWatchSel != 0) {
|
|
uint8_t b = _farpeekb(gDiagWatchSel, gDiagWatchOff);
|
|
if (b != gDiagWatchBytes[0]) {
|
|
logErr("WATCH-LCALL-PRE: %02X->%02X target=%04X:%04X\n",
|
|
gDiagWatchBytes[0], b, targetSel, targetOff);
|
|
gDiagWatchBytes[0] = b;
|
|
}
|
|
}
|
|
|
|
__asm__ volatile (
|
|
// Save ES, GS, and FS
|
|
"push %%es\n\t"
|
|
"push %%gs\n\t"
|
|
"push %%fs\n\t"
|
|
|
|
// Set GS and FS = DGROUP
|
|
"movw %[dgroup], %%gs\n\t"
|
|
"movw %[dgroup], %%fs\n\t"
|
|
|
|
"lcall *%[farTarget]\n\t"
|
|
|
|
// Combine DX:AX into EAX
|
|
"shll $16, %%edx\n\t"
|
|
"movzwl %%ax, %%eax\n\t"
|
|
"orl %%edx, %%eax\n\t"
|
|
|
|
// Restore FS, GS, and ES
|
|
"pop %%fs\n\t"
|
|
"pop %%gs\n\t"
|
|
"pop %%es\n\t"
|
|
|
|
: "=a" (result)
|
|
: [farTarget] "m" (farTarget),
|
|
[dgroup] "r" (dgroupSel)
|
|
: "ebx", "ecx", "edx", "esi", "edi", "memory", "cc"
|
|
);
|
|
|
|
// Diagnostic: check watched byte after lcall returns
|
|
if (gDiagWatchSel != 0) {
|
|
uint8_t b = _farpeekb(gDiagWatchSel, gDiagWatchOff);
|
|
if (b != gDiagWatchBytes[0]) {
|
|
logErr("WATCH-LCALL-POST: %02X->%02X target=%04X:%04X\n",
|
|
gDiagWatchBytes[0], b, targetSel, targetOff);
|
|
gDiagWatchBytes[0] = b;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
uint32_t thunkCall16v(ThunkContextT *ctx, uint16_t targetSel, uint16_t targetOff,
|
|
uint16_t paramCount, ...)
|
|
{
|
|
uint16_t params[THUNK_MAX_PARAMS];
|
|
va_list ap;
|
|
|
|
va_start(ap, paramCount);
|
|
for (uint16_t i = 0; i < paramCount && i < THUNK_MAX_PARAMS; i++) {
|
|
params[i] = (uint16_t)va_arg(ap, unsigned int);
|
|
}
|
|
va_end(ap);
|
|
|
|
return thunkCall16(ctx, targetSel, targetOff, params, paramCount);
|
|
}
|
|
|
|
|
|
bool thunkRegisterCallback(ThunkContextT *ctx, ThunkCallbackT callback,
|
|
uint16_t paramWords, FarPtr16T *result)
|
|
{
|
|
if (gCallbackCount >= THUNK_MAX_CALLBACKS) {
|
|
logErr("thunk: callback table full\n");
|
|
return false;
|
|
}
|
|
|
|
uint16_t slot = gCallbackCount;
|
|
gCallbacks[slot] = callback;
|
|
gCallbackParamWords[slot] = paramWords;
|
|
gCallbackCount++;
|
|
|
|
// Build the 16-bit stub code:
|
|
// 53 push bx (1 byte, save caller's BX)
|
|
// BB xx xx mov bx, slot (3 bytes)
|
|
// CD 66 int CALLBACK_INT_NUM (2 bytes)
|
|
// 5B pop bx (1 byte, restore caller's BX)
|
|
// CA xx xx retf param_bytes (3 bytes, Pascal callee cleanup)
|
|
// Total: 10 bytes
|
|
|
|
uint16_t paramBytes = paramWords * 2;
|
|
|
|
uint8_t stub[CALLBACK_STUB_SIZE];
|
|
stub[0] = 0x53; // push bx
|
|
stub[1] = 0xBB; // mov bx, imm16
|
|
stub[2] = (uint8_t)(slot & 0xFF);
|
|
stub[3] = (uint8_t)(slot >> 8);
|
|
stub[4] = 0xCD; // int imm8
|
|
stub[5] = CALLBACK_INT_NUM;
|
|
stub[6] = 0x5B; // pop bx
|
|
stub[7] = 0xCA; // retf imm16
|
|
stub[8] = (uint8_t)(paramBytes & 0xFF);
|
|
stub[9] = (uint8_t)(paramBytes >> 8);
|
|
|
|
// Write the stub into the callback area (offset 0x0100 in code segment)
|
|
uint32_t stubOffset = 0x0100 + slot * CALLBACK_STUB_SIZE;
|
|
uint32_t stubAddr = ctx->relayCodeBase + stubOffset;
|
|
dosmemput(stub, CALLBACK_STUB_SIZE, stubAddr);
|
|
|
|
// Return the 16-bit far pointer to this stub
|
|
result->segment = ctx->relayCodeSel;
|
|
result->offset = (uint16_t)stubOffset;
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
// ============================================================================
|
|
// Internal helpers
|
|
// ============================================================================
|
|
|
|
static bool installRelayCode(ThunkContextT *ctx)
|
|
{
|
|
// Write the hand-assembled relay code into the code segment in DOS memory.
|
|
// We use the DOS memory PM selector for writing (the code segment is
|
|
// read-only by its descriptor, but the underlying memory is the same).
|
|
dosmemput(kRelayCode, RELAY_CODE_SIZE, ctx->relayCodeBase);
|
|
|
|
// Write the DOS memory PM selector into RELAY_SCRATCH_SEL so the relay
|
|
// can load it into ES for writable access to the data area.
|
|
uint16_t scratchSel = (uint16_t)ctx->dosMemSel;
|
|
dosmemput(&scratchSel, 2, ctx->relayCodeBase + RELAY_SCRATCH_SEL);
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
static uint16_t allocDescriptor16(uint32_t base, uint32_t limit, bool isCode)
|
|
{
|
|
int sel = __dpmi_allocate_ldt_descriptors(1);
|
|
if (sel < 0) {
|
|
return 0;
|
|
}
|
|
|
|
if (__dpmi_set_segment_base_address(sel, base) < 0) {
|
|
__dpmi_free_ldt_descriptor(sel);
|
|
return 0;
|
|
}
|
|
|
|
if (__dpmi_set_segment_limit(sel, limit) < 0) {
|
|
__dpmi_free_ldt_descriptor(sel);
|
|
return 0;
|
|
}
|
|
|
|
// Access rights for 16-bit segments:
|
|
// Code (readable, non-conforming): byte5=0xFA, byte6=0x00
|
|
// Data (writable): byte5=0xF2, byte6=0x00
|
|
// byte5: P=1, DPL=3, S=1, Type=1010(code) or 0010(data)
|
|
// byte6: G=0, D=0(16-bit), 0, AVL=0, limit_hi=0
|
|
uint16_t rights = isCode ? 0x00FA : 0x00F2;
|
|
if (__dpmi_set_descriptor_access_rights(sel, rights) < 0) {
|
|
__dpmi_free_ldt_descriptor(sel);
|
|
return 0;
|
|
}
|
|
|
|
return (uint16_t)sel;
|
|
}
|