// s3Trio.c -- S3 Trio64/Vision864/Vision968 accelerated video driver // // Supports the S3 86C764 (Trio64), 86C765 (Trio64V+), 86C868 (Vision868), // 86C864 (Vision864), 86C964 (Vision964), 86C968 (Vision968), and // 86C732 (Trio32) chipsets. // // The S3 2D acceleration engine (sometimes called the "graphics engine" // or "BitBLT engine") provides hardware-accelerated: // - Solid rectangle fill // - 8x8 mono/color pattern fill // - Screen-to-screen BitBLT // - Mono color expansion (for text rendering) // - Bresenham line draw // - Hardware clipping rectangle // - 64x64 two-color hardware cursor // // Register access: // The S3 extended registers are accessed through CRTC index/data // ports (0x3D4/0x3D5) at indices 0x30-0x6D. These must be unlocked // by writing specific key values to CR38 and CR39. // // The 2D engine registers are at I/O ports 0x82E8-0xBEE8 (legacy) // or via MMIO at the linear framebuffer base + 0x1000000 on newer // chips (Trio64+). We use MMIO when available (Trio64, ViRGE) for // faster register access, falling back to I/O on older Vision chips. // // VESA mode setting: // We use VBE BIOS calls for mode setting rather than programming // CRTC timings directly. This is simpler and more reliable across // the S3 chip variants (which have subtly different timing register // layouts). After VESA sets the mode, we unlock the S3 extended // registers and enable the acceleration engine. #include "accelVid.h" #include "vgaCommon.h" #include "pci.h" #include #include #include #include #include // ============================================================ // S3 vendor/device IDs // ============================================================ #define S3_VENDOR_ID 0x5333 #define S3_TRIO32 0x8810 #define S3_TRIO64 0x8811 #define S3_TRIO64V_PLUS 0x8814 #define S3_VISION864 0x88C0 #define S3_VISION864P 0x88C1 #define S3_VISION868 0x8880 #define S3_VISION964 0x88D0 #define S3_VISION968 0x88F0 #define S3_VISION968_ALT 0x88F1 #define S3_VIRGE 0x5631 #define S3_VIRGE_VX 0x883D #define S3_VIRGE_DX 0x8A01 #define S3_VIRGE_GX2 0x8A10 #define S3_VIRGE_MX 0x8C01 #define S3_VIRGE_MXP 0x8C03 #define S3_SAVAGE3D 0x8A20 #define S3_SAVAGE3D_MV 0x8A21 #define S3_SAVAGE4 0x8A22 #define S3_SAVAGE_MX 0x8C10 #define S3_SAVAGE_MX_MV 0x8C11 #define S3_SAVAGE_IX 0x8C12 #define S3_SAVAGE_IX_MV 0x8C13 #define S3_SAVAGE_2000 0x9102 // Terminated by {0, 0} static const uint16_t sS3DeviceIds[] = { S3_VENDOR_ID, S3_TRIO32, S3_VENDOR_ID, S3_TRIO64, S3_VENDOR_ID, S3_TRIO64V_PLUS, S3_VENDOR_ID, S3_VIRGE, S3_VENDOR_ID, S3_VIRGE_VX, S3_VENDOR_ID, S3_VIRGE_DX, S3_VENDOR_ID, S3_VIRGE_GX2, S3_VENDOR_ID, S3_VIRGE_MX, S3_VENDOR_ID, S3_VIRGE_MXP, S3_VENDOR_ID, S3_SAVAGE3D, S3_VENDOR_ID, S3_SAVAGE3D_MV, S3_VENDOR_ID, S3_SAVAGE4, S3_VENDOR_ID, S3_SAVAGE_MX, S3_VENDOR_ID, S3_SAVAGE_MX_MV, S3_VENDOR_ID, S3_SAVAGE_IX, S3_VENDOR_ID, S3_SAVAGE_IX_MV, S3_VENDOR_ID, S3_SAVAGE_2000, S3_VENDOR_ID, S3_VISION864, S3_VENDOR_ID, S3_VISION864P, S3_VENDOR_ID, S3_VISION868, S3_VENDOR_ID, S3_VISION964, S3_VENDOR_ID, S3_VISION968, S3_VENDOR_ID, S3_VISION968_ALT, 0, 0 }; // ============================================================ // S3 extended CRTC register indices // ============================================================ #define S3_CR30_CHIP_ID 0x30 #define S3_CR31_MEM_CONFIG 0x31 #define S3_CR33_BACKWARD_COMPAT 0x33 #define S3_CR34_BACKWARD_COMPAT 0x34 #define S3_CR35_CRTC_LOCK 0x35 #define S3_CR38_LOCK_1 0x38 // unlock with 0x48 #define S3_CR39_LOCK_2 0x39 // unlock with 0xA5 #define S3_CR40_SYS_CONFIG 0x40 #define S3_CR40_ENGINE_ENABLE 0x01 // bit 0: enable graphics engine #define S3_CR42_MODE_CONTROL 0x42 #define S3_CR43_EXT_MODE 0x43 #define S3_CR45_HW_CURSOR_MODE 0x45 #define S3_CR46_HW_CURSOR_XHI 0x46 #define S3_CR47_HW_CURSOR_XLO 0x47 #define S3_CR48_HW_CURSOR_YHI 0x48 #define S3_CR49_HW_CURSOR_YLO 0x49 #define S3_CR4A_HW_CURSOR_FG_HI 0x4A #define S3_CR4B_HW_CURSOR_FG_LO 0x4B #define S3_CR4C_HW_CURSOR_ADDR_HI 0x4C #define S3_CR4D_HW_CURSOR_ADDR_LO 0x4D #define S3_CR4E_HW_CURSOR_BG_HI 0x4E #define S3_CR4F_HW_CURSOR_BG_LO 0x4F #define S3_CR50_EXT_SYS_CTRL_1 0x50 // CR50 pixel length bits (bits 5:4) #define S3_CR50_PIX_8BPP 0x00 #define S3_CR50_PIX_16BPP 0x10 #define S3_CR50_PIX_32BPP 0x30 #define S3_CR51_EXT_SYS_CTRL_2 0x51 #define S3_CR53_EXT_MEM_CTRL_1 0x53 #define S3_CR54_EXT_MEM_CTRL_2 0x54 #define S3_CR55_EXT_DAC_CTRL 0x55 #define S3_CR58_LFB_CTRL 0x58 #define S3_CR59_LFB_ADDR_HI 0x59 #define S3_CR5A_LFB_ADDR_LO 0x5A #define S3_CR5D_EXT_HCNT 0x5D #define S3_CR5E_EXT_VCNT 0x5E #define S3_CR67_EXT_MISC_CTRL_2 0x67 #define S3_CR6A_EXT_MISC_CTRL_3 0x6A // ============================================================ // S3 2D engine I/O ports (legacy access) // ============================================================ // // These are the standard S3 accelerator register ports. All S3 // chips from the 928 onward support this I/O port interface. #define S3_CUR_Y 0x82E8 #define S3_CUR_X 0x86E8 #define S3_DESTY_AXSTP 0x8AE8 // destination Y / axial step #define S3_DESTX_DIASTP 0x8EE8 // destination X / diagonal step #define S3_ERR_TERM 0x92E8 #define S3_MAJ_AXIS_PCNT 0x96E8 // major axis pixel count #define S3_GP_STAT 0x9AE8 // graphics processor status #define S3_CMD 0x9AE8 // command register (write) #define S3_SHORT_STROKE 0x9EE8 #define S3_BKGD_COLOR 0xA2E8 #define S3_FRGD_COLOR 0xA6E8 #define S3_WRT_MASK 0xAAE8 #define S3_RD_MASK 0xAEE8 #define S3_COLOR_CMP 0xB2E8 #define S3_BKGD_MIX 0xB6E8 #define S3_FRGD_MIX 0xBAE8 #define S3_MULTIFUNC_CTRL 0xBEE8 // multi-function control register #define S3_PIX_TRANS 0xE2E8 // pixel data transfer // ============================================================ // S3 MULTIFUNC_CTRL sub-register indices // ============================================================ // // The multi-function control register at 0xBEE8 is a multiplexed // port: bits 15:12 select the sub-register, bits 11:0 are the value. #define S3_MF_MIN_AXIS_PCNT 0x0000 // minor axis pixel count #define S3_MF_SCISSORS_T 0x1000 // scissors top #define S3_MF_SCISSORS_L 0x2000 // scissors left #define S3_MF_SCISSORS_B 0x3000 // scissors bottom #define S3_MF_SCISSORS_R 0x4000 // scissors right #define S3_MF_PIX_CNTL 0xA000 // pixel control #define S3_MF_MULT_MISC_2 0xD000 // multi misc 2 #define S3_MF_READ_SEL 0xE000 // read register select // ============================================================ // S3 command register bits // ============================================================ // Command type (bits 15:13 for Trio64) #define S3_CMD_NOP 0x0000 #define S3_CMD_LINE 0x2000 #define S3_CMD_RECT 0x4000 #define S3_CMD_POLY_LINE 0x6000 #define S3_CMD_NOP2 0x8000 #define S3_CMD_BITBLT 0xC000 // Drawing direction bits (bits 7:5) #define S3_CMD_DRAW 0x0010 // draw (vs. move) #define S3_CMD_DIR_X_POS 0x0020 // X direction positive #define S3_CMD_DIR_Y_POS 0x0040 // Y direction positive #define S3_CMD_DIR_X_MAJOR 0x0000 // X is major axis #define S3_CMD_DIR_Y_MAJOR 0x0080 // Y is major axis // Additional command bits #define S3_CMD_PLANAR 0x0002 // planar mode // Bit 2 has dual meaning depending on command type: // For RECT/BITBLT: across-plane (packed pixel) mode // For LINE: include last pixel #define S3_CMD_ACROSS_PLANE 0x0004 #define S3_CMD_LAST_PIXEL 0x0004 #define S3_CMD_BYTE_SWAP 0x1000 // byte swap for pixel transfer #define S3_CMD_16BIT_IO 0x0200 // 16-bit pixel transfer #define S3_CMD_32BIT_IO 0x0400 // 32-bit pixel transfer // Source select (bits 8:7 of command when applicable) // Actually in PIX_CNTL register // ============================================================ // S3 MIX register values // ============================================================ // // The foreground and background MIX registers control what source // is used and what ROP is applied. // // Bits 4:0 = ROP (raster operation) // Bits 6:5 = source select: // 00 = background color register // 01 = foreground color register // 10 = pixel data from CPU (via PIX_TRANS) // 11 = display memory (screen source) #define S3_MIX_SRC_BKGD 0x00 #define S3_MIX_SRC_FRGD 0x20 #define S3_MIX_SRC_CPU 0x40 #define S3_MIX_SRC_DISPLAY 0x60 // Common raster operations (bits 4:0) #define S3_MIX_ROP_NOT_DST 0x00 // NOT dest #define S3_MIX_ROP_ZERO 0x01 // 0 #define S3_MIX_ROP_ONE 0x02 // 1 #define S3_MIX_ROP_DST 0x03 // dest (nop) #define S3_MIX_ROP_NOT_SRC 0x04 // NOT source #define S3_MIX_ROP_SRC_XOR_DST 0x05 // source XOR dest #define S3_MIX_ROP_NOT_SRC_AND 0x06 // NOT source AND dest #define S3_MIX_ROP_SRC_AND_DST 0x0C // source AND dest #define S3_MIX_ROP_SRC 0x07 // source (copy) #define S3_MIX_ROP_NOT_SRC_OR 0x0B // NOT source OR dest #define S3_MIX_ROP_SRC_OR_DST 0x0E // source OR dest // ============================================================ // S3 PIX_CNTL (pixel control) values // ============================================================ // // Written via MULTIFUNC_CTRL with index 0xA000. // Controls the source of foreground/background mix selection. // // Bits 7:6 = mix select: // 00 = foreground mix always // 01 = cpu data determines mix (color expansion) // 10 = pixel data determines mix // 11 = video memory determines mix #define S3_PIXCNTL_MIX_FRGD 0x0000 // always use foreground mix #define S3_PIXCNTL_MIX_CPU 0x0040 // CPU data selects mix (color expansion) #define S3_PIXCNTL_MIX_DISPLAY 0x0080 // display memory selects mix // ============================================================ // S3 GP_STAT bits // ============================================================ #define S3_GP_STAT_BUSY 0x0200 // graphics engine busy #define S3_GP_STAT_FIFO_EMPTY 0x0400 // all FIFO slots empty #define S3_GP_STAT_FIFO_1 0x0080 // at least 1 FIFO slot free #define S3_GP_STAT_FIFO_2 0x0040 // at least 2 FIFO slots free #define S3_GP_STAT_FIFO_3 0x0020 // at least 3 FIFO slots free #define S3_GP_STAT_FIFO_4 0x0010 // at least 4 FIFO slots free #define S3_GP_STAT_FIFO_5 0x0008 // at least 5 FIFO slots free #define S3_GP_STAT_FIFO_6 0x0004 // at least 6 FIFO slots free #define S3_GP_STAT_FIFO_7 0x0002 // at least 7 FIFO slots free #define S3_GP_STAT_FIFO_8 0x0001 // at least 8 FIFO slots free // Hardware cursor constants #define S3_HW_CURSOR_SIZE 64 // 64x64 pixels #define S3_HW_CURSOR_BYTES 1024 // 64*64/8 * 2 planes = 1024 bytes // Maximum wait iterations to prevent infinite loops on broken hardware #define S3_MAX_IDLE_WAIT 1000000 // MMIO region offset from LFB base (Trio64/ViRGE new-style MMIO) #define S3_MMIO_OFFSET 0x1000000 #define S3_MMIO_SIZE 0x10000 // 64KB MMIO window // ============================================================ // S3 MMIO register offset mapping // ============================================================ // // The S3 "new MMIO" maps the enhanced registers into a 64KB // window at LFB + 0x1000000. The I/O port addresses map to // MMIO offsets as follows: // I/O 0x82E8 -> MMIO 0x82E8 (same offset within 64KB window) // // For 16-bit register access: write to offset as uint16_t // For 32-bit register access: write to offset as uint32_t // ============================================================ // Private driver state // ============================================================ typedef struct { uint32_t lfbPhysAddr; // physical address of LFB uint32_t vramSize; // total VRAM in bytes uint32_t cursorOffset; // VRAM offset for cursor image int32_t bytesPerPixel; int32_t screenPitch; // bytes per scanline bool isTrio; // true for Trio32/64/V+/ViRGE bool useMMIO; // true if MMIO is available volatile uint8_t *mmio; // mapped MMIO base pointer (NULL if I/O mode) DpmiMappingT lfbMapping; DpmiMappingT mmioMapping; } S3PrivateT; // ============================================================ // Prototypes // ============================================================ static void s3BitBlt(AccelDriverT *drv, int32_t srcX, int32_t srcY, int32_t dstX, int32_t dstY, int32_t w, int32_t h); static void s3ColorExpand(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h, uint32_t fg, uint32_t bg); static bool s3Detect(AccelDriverT *drv); static void s3HostBlit(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h); static bool s3Init(AccelDriverT *drv, const AccelModeRequestT *req); static void s3LineDraw(AccelDriverT *drv, int32_t x1, int32_t y1, int32_t x2, int32_t y2, uint32_t color); static void s3MoveCursor(AccelDriverT *drv, int32_t x, int32_t y); static void s3RectFill(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, uint32_t color); static void s3RectFillPat(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, const uint8_t *pattern, uint32_t fg, uint32_t bg); static void s3SetClip(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h); static void s3SetCursor(AccelDriverT *drv, const HwCursorImageT *image); static void s3ShowCursor(AccelDriverT *drv, bool visible); static void s3Shutdown(AccelDriverT *drv); static void s3UnlockRegs(void); static void s3WaitFifo(S3PrivateT *priv, int32_t slots); static void s3WaitIdle(AccelDriverT *drv); // ============================================================ // Driver instance // ============================================================ static S3PrivateT sS3Private; static AccelDriverT sS3Driver = { .name = "S3 Trio64", .chipFamily = "s3", .caps = 0, .privData = &sS3Private, .detect = s3Detect, .init = s3Init, .shutdown = s3Shutdown, .waitIdle = s3WaitIdle, .setClip = s3SetClip, .rectFill = s3RectFill, .rectFillPat = s3RectFillPat, .bitBlt = s3BitBlt, .hostBlit = s3HostBlit, .colorExpand = s3ColorExpand, .lineDraw = s3LineDraw, .setCursor = s3SetCursor, .moveCursor = s3MoveCursor, .showCursor = s3ShowCursor, }; // ============================================================ // s3RegisterDriver // ============================================================ // // Called from main() to register the S3 driver with the manager. void s3RegisterDriver(void) { accelRegisterDriver(&sS3Driver); } // ============================================================ // S3 register access helpers // ============================================================ // // When MMIO is available (Trio64, ViRGE, Savage), register access // goes through the MMIO window at LFB + 0x1000000. The I/O port // addresses map directly to MMIO offsets within the 64KB window. // When MMIO is not available (Vision series), we fall back to // I/O port access. // // Using MMIO is faster because: (1) memory writes can be posted // and pipelined by the CPU, (2) no I/O port decode penalty, and // (3) on Pentium+, memory writes are faster than I/O instructions. static inline void s3WriteReg16(S3PrivateT *priv, uint16_t port, uint16_t val) { if (priv->useMMIO) { *(volatile uint16_t *)(priv->mmio + (port & 0xFFFF)) = val; } else { outportw(port, val); } } static inline uint16_t s3ReadReg16(S3PrivateT *priv, uint16_t port) { if (priv->useMMIO) { return *(volatile uint16_t *)(priv->mmio + (port & 0xFFFF)); } return inportw(port); } // ============================================================ // s3BitBlt // ============================================================ // // Screen-to-screen BitBLT using the S3 hardware engine. // Handles overlapping source and destination by adjusting the // blit direction. The S3 engine can blit in any of four // directions (positive/negative X/Y). static void s3BitBlt(AccelDriverT *drv, int32_t srcX, int32_t srcY, int32_t dstX, int32_t dstY, int32_t w, int32_t h) { S3PrivateT *priv = (S3PrivateT *)drv->privData; if (w <= 0 || h <= 0) { return; } // Determine blit direction to handle overlapping regions uint16_t cmd = S3_CMD_BITBLT | S3_CMD_DRAW | S3_CMD_ACROSS_PLANE; int32_t sx = srcX; int32_t sy = srcY; int32_t dx = dstX; int32_t dy = dstY; if (dstX <= srcX) { cmd |= S3_CMD_DIR_X_POS; } else { sx += w - 1; dx += w - 1; } if (dstY <= srcY) { cmd |= S3_CMD_DIR_Y_POS; } else { sy += h - 1; dy += h - 1; } s3WaitFifo(priv, 7); // Foreground mix: source = display memory, ROP = copy s3WriteReg16(priv, S3_FRGD_MIX, S3_MIX_SRC_DISPLAY | S3_MIX_ROP_SRC); s3WriteReg16(priv, S3_WRT_MASK, 0xFFFF); // Pixel control: always foreground mix s3WriteReg16(priv, S3_MULTIFUNC_CTRL, S3_MF_PIX_CNTL | S3_PIXCNTL_MIX_FRGD); // Source position s3WriteReg16(priv, S3_CUR_X, sx); s3WriteReg16(priv, S3_CUR_Y, sy); // Destination position s3WriteReg16(priv, S3_DESTX_DIASTP, dx); s3WriteReg16(priv, S3_DESTY_AXSTP, dy); s3WaitFifo(priv, 3); // Dimensions (count is pixels - 1) s3WriteReg16(priv, S3_MAJ_AXIS_PCNT, w - 1); s3WriteReg16(priv, S3_MULTIFUNC_CTRL, S3_MF_MIN_AXIS_PCNT | (h - 1)); // Fire s3WriteReg16(priv, S3_CMD, cmd); } // ============================================================ // s3ColorExpand // ============================================================ // // Monochrome-to-color expansion using CPU-driven pixel transfer. // This is used for text rendering: each byte of srcBuf contains // 8 monochrome pixels (MSB first), which the engine expands to // full-color using the foreground and background color registers. // // The S3 engine is set to CPU data mix mode: for each bit in // the transferred data, 1 = use foreground color, 0 = use // background color. static void s3ColorExpand(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h, uint32_t fg, uint32_t bg) { S3PrivateT *priv = (S3PrivateT *)drv->privData; if (w <= 0 || h <= 0) { return; } s3WaitFifo(priv, 8); // Set colors s3WriteReg16(priv, S3_FRGD_COLOR, fg); s3WriteReg16(priv, S3_BKGD_COLOR, bg); // Foreground mix: source = foreground color, ROP = copy s3WriteReg16(priv, S3_FRGD_MIX, S3_MIX_SRC_FRGD | S3_MIX_ROP_SRC); // Background mix: source = background color, ROP = copy s3WriteReg16(priv, S3_BKGD_MIX, S3_MIX_SRC_BKGD | S3_MIX_ROP_SRC); // Pixel control: CPU data selects fg/bg mix s3WriteReg16(priv, S3_MULTIFUNC_CTRL, S3_MF_PIX_CNTL | S3_PIXCNTL_MIX_CPU); // Destination and dimensions s3WriteReg16(priv, S3_CUR_X, dstX); s3WriteReg16(priv, S3_CUR_Y, dstY); s3WriteReg16(priv, S3_MAJ_AXIS_PCNT, w - 1); s3WaitFifo(priv, 2); s3WriteReg16(priv, S3_MULTIFUNC_CTRL, S3_MF_MIN_AXIS_PCNT | (h - 1)); // Command: rectangle, draw, left-to-right top-to-bottom, CPU data uint16_t cmd = S3_CMD_RECT | S3_CMD_DRAW | S3_CMD_ACROSS_PLANE | S3_CMD_DIR_X_POS | S3_CMD_DIR_Y_POS | S3_CMD_16BIT_IO; s3WriteReg16(priv, S3_CMD, cmd); // Transfer monochrome data to the engine one scanline at a time. // The engine expects MSB-first bit order, which matches our // convention. Data must be written to PIX_TRANS in 16-bit words. int32_t wordsPerRow = (w + 15) / 16; for (int32_t row = 0; row < h; row++) { const uint8_t *rowData = srcBuf + row * srcPitch; s3WaitFifo(priv, 1); for (int32_t word = 0; word < wordsPerRow; word++) { int32_t byteOff = word * 2; uint8_t hi = (byteOff < srcPitch) ? rowData[byteOff] : 0; uint8_t lo = (byteOff + 1 < srcPitch) ? rowData[byteOff + 1] : 0; s3WriteReg16(priv, S3_PIX_TRANS, (hi << 8) | lo); } } } // ============================================================ // s3Detect // ============================================================ // // Scans PCI for any S3 chip in our supported list. Does not // touch any hardware registers (detect must be side-effect-free). static bool s3Detect(AccelDriverT *drv) { int32_t matchIdx; if (!pciFindDeviceList(sS3DeviceIds, &drv->pciDev, &matchIdx)) { return false; } // Set the driver name based on the specific chip found switch (drv->pciDev.deviceId) { case S3_TRIO32: drv->name = "S3 Trio32"; break; case S3_TRIO64: drv->name = "S3 Trio64"; break; case S3_TRIO64V_PLUS: drv->name = "S3 Trio64V+"; break; case S3_VISION864: case S3_VISION864P: drv->name = "S3 Vision864"; break; case S3_VISION868: drv->name = "S3 Vision868"; break; case S3_VISION964: drv->name = "S3 Vision964"; break; case S3_VIRGE: drv->name = "S3 ViRGE"; break; case S3_VIRGE_VX: drv->name = "S3 ViRGE/VX"; break; case S3_VIRGE_DX: drv->name = "S3 ViRGE/DX"; break; case S3_VIRGE_GX2: drv->name = "S3 ViRGE/GX2"; break; case S3_VIRGE_MX: case S3_VIRGE_MXP: drv->name = "S3 ViRGE/MX"; break; case S3_SAVAGE3D: case S3_SAVAGE3D_MV: drv->name = "S3 Savage3D"; break; case S3_SAVAGE4: drv->name = "S3 Savage4"; break; case S3_SAVAGE_MX: case S3_SAVAGE_MX_MV: drv->name = "S3 Savage/MX"; break; case S3_SAVAGE_IX: case S3_SAVAGE_IX_MV: drv->name = "S3 Savage/IX"; break; case S3_SAVAGE_2000: drv->name = "S3 Savage 2000"; break; case S3_VISION968: case S3_VISION968_ALT: drv->name = "S3 Vision968"; break; default: drv->name = "S3 (unknown)"; break; } return true; } // ============================================================ // s3HostBlit // ============================================================ // // CPU-to-screen blit via the PIX_TRANS port. Transfers packed // pixel data from system RAM to VRAM through the engine. The // engine handles the destination address calculation and pitch // alignment, so the CPU just streams data. static void s3HostBlit(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h) { S3PrivateT *priv = (S3PrivateT *)drv->privData; if (w <= 0 || h <= 0) { return; } int32_t bpp = priv->bytesPerPixel; int32_t rowBytes = w * bpp; int32_t wordCount = (rowBytes + 1) / 2; s3WaitFifo(priv, 7); // Foreground mix: source = CPU data, ROP = copy s3WriteReg16(priv, S3_FRGD_MIX, S3_MIX_SRC_CPU | S3_MIX_ROP_SRC); s3WriteReg16(priv, S3_WRT_MASK, 0xFFFF); // Pixel control: always foreground mix s3WriteReg16(priv, S3_MULTIFUNC_CTRL, S3_MF_PIX_CNTL | S3_PIXCNTL_MIX_FRGD); // Destination position s3WriteReg16(priv, S3_CUR_X, dstX); s3WriteReg16(priv, S3_CUR_Y, dstY); // Dimensions s3WriteReg16(priv, S3_MAJ_AXIS_PCNT, w - 1); s3WriteReg16(priv, S3_MULTIFUNC_CTRL, S3_MF_MIN_AXIS_PCNT | (h - 1)); s3WaitFifo(priv, 1); // Command: rectangle, draw, CPU data, left-to-right top-to-bottom s3WriteReg16(priv, S3_CMD, S3_CMD_RECT | S3_CMD_DRAW | S3_CMD_ACROSS_PLANE | S3_CMD_DIR_X_POS | S3_CMD_DIR_Y_POS | S3_CMD_16BIT_IO); // Transfer pixel data row by row through PIX_TRANS for (int32_t row = 0; row < h; row++) { const uint8_t *rowData = srcBuf + row * srcPitch; for (int32_t word = 0; word < wordCount; word++) { int32_t byteOff = word * 2; uint8_t lo = rowData[byteOff]; uint8_t hi = (byteOff + 1 < rowBytes) ? rowData[byteOff + 1] : 0; s3WriteReg16(priv, S3_PIX_TRANS, (hi << 8) | lo); } } } // ============================================================ // s3Init // ============================================================ // // Initializes the S3 chip: sets the requested video mode via // VESA, unlocks extended registers, enables the 2D engine, and // maps the linear framebuffer. // // Mode setting strategy: use VESA VBE to set the mode (with LFB // flag bit 14 set), then unlock S3 extended registers and // configure the acceleration engine. This avoids the complexity // of programming S3-specific CRTC timing registers while still // getting full hardware acceleration. static bool s3Init(AccelDriverT *drv, const AccelModeRequestT *req) { S3PrivateT *priv = (S3PrivateT *)drv->privData; memset(priv, 0, sizeof(*priv)); priv->isTrio = (drv->pciDev.deviceId == S3_TRIO32 || drv->pciDev.deviceId == S3_TRIO64 || drv->pciDev.deviceId == S3_TRIO64V_PLUS || drv->pciDev.deviceId == S3_VIRGE || drv->pciDev.deviceId == S3_VIRGE_VX || drv->pciDev.deviceId == S3_VIRGE_DX || drv->pciDev.deviceId == S3_VIRGE_GX2 || drv->pciDev.deviceId == S3_VIRGE_MX || drv->pciDev.deviceId == S3_VIRGE_MXP || drv->pciDev.deviceId == S3_SAVAGE3D || drv->pciDev.deviceId == S3_SAVAGE3D_MV || drv->pciDev.deviceId == S3_SAVAGE4 || drv->pciDev.deviceId == S3_SAVAGE_MX || drv->pciDev.deviceId == S3_SAVAGE_MX_MV || drv->pciDev.deviceId == S3_SAVAGE_IX || drv->pciDev.deviceId == S3_SAVAGE_IX_MV || drv->pciDev.deviceId == S3_SAVAGE_2000); // Determine VRAM size and LFB address from BAR0 uint32_t barSize = pciSizeBar(drv->pciDev.bus, drv->pciDev.dev, drv->pciDev.func, PCI_BAR0); uint32_t bar0 = pciRead32(drv->pciDev.bus, drv->pciDev.dev, drv->pciDev.func, PCI_BAR0); priv->vramSize = barSize; priv->lfbPhysAddr = bar0 & 0xFFFFFFF0; // Unlock S3 extended registers s3UnlockRegs(); // Cross-check VRAM size from CR36 on Trio chips if (priv->isTrio) { uint8_t cr36 = vgaCrtcRead(0x36); uint32_t ramFromCr36; switch ((cr36 >> 5) & 0x07) { case 0: ramFromCr36 = 4 * 1024 * 1024; break; case 2: ramFromCr36 = 3 * 1024 * 1024; break; case 4: ramFromCr36 = 2 * 1024 * 1024; break; case 6: ramFromCr36 = 1 * 1024 * 1024; break; default: ramFromCr36 = 1 * 1024 * 1024; break; } if (barSize < 512 * 1024 || barSize > 64 * 1024 * 1024) { priv->vramSize = ramFromCr36; } } // Find and set VESA mode VesaModeResultT vesa; if (!vesaFindAndSetMode(req->width, req->height, req->bpp, &vesa)) { return false; } // Map LFB via DPMI if (!dpmiMapFramebuffer(priv->lfbPhysAddr, priv->vramSize, &priv->lfbMapping)) { vgaRestoreTextMode(); return false; } // Fill in driver mode info priv->bytesPerPixel = (vesa.bpp + 7) / 8; priv->screenPitch = vesa.pitch; drv->mode.width = vesa.width; drv->mode.height = vesa.height; drv->mode.bpp = vesa.bpp; drv->mode.pitch = vesa.pitch; drv->mode.framebuffer = priv->lfbMapping.ptr; drv->mode.vramSize = priv->vramSize; drv->mode.offscreenBase = vesa.pitch * vesa.height; // Map MMIO region for Trio64/ViRGE (at LFB + 16MB) priv->useMMIO = false; priv->mmio = NULL; if (priv->isTrio) { if (dpmiMapFramebuffer(priv->lfbPhysAddr + S3_MMIO_OFFSET, S3_MMIO_SIZE, &priv->mmioMapping)) { priv->useMMIO = true; priv->mmio = (volatile uint8_t *)priv->mmioMapping.ptr; } } // Re-unlock after mode set (VESA may re-lock) s3UnlockRegs(); // Enable the graphics engine // CR40 bit 0 = enable graphics engine uint8_t cr40 = vgaCrtcRead(S3_CR40_SYS_CONFIG); vgaCrtcWrite(S3_CR40_SYS_CONFIG, cr40 | S3_CR40_ENGINE_ENABLE); // Set up pixel format in CR50 for the engine uint8_t cr50 = vgaCrtcRead(S3_CR50_EXT_SYS_CTRL_1); cr50 &= 0xC0; // clear pixel length bits switch (vesa.bpp) { case 8: cr50 |= S3_CR50_PIX_8BPP; break; case 15: case 16: cr50 |= S3_CR50_PIX_16BPP; break; case 32: cr50 |= S3_CR50_PIX_32BPP; break; } vgaCrtcWrite(S3_CR50_EXT_SYS_CTRL_1, cr50); // Set up hardware cursor location at end of VRAM // Cursor image is 1KB (64x64 2bpp), aligned to 1KB priv->cursorOffset = priv->vramSize - S3_HW_CURSOR_BYTES; priv->cursorOffset &= ~(S3_HW_CURSOR_BYTES - 1); // Set capability flags drv->caps = ACAP_RECT_FILL | ACAP_RECT_FILL_PAT | ACAP_BITBLT | ACAP_COLOR_EXPAND | ACAP_HOST_BLIT | ACAP_LINE_DRAW | ACAP_HW_CURSOR | ACAP_CLIP; // Set full-screen clip rectangle s3SetClip(drv, 0, 0, vesa.width, vesa.height); // Wait for engine to be ready s3WaitIdle(drv); return true; } // ============================================================ // s3LineDraw // ============================================================ // // Bresenham line drawing using the S3 hardware engine. // The engine implements the Bresenham algorithm natively -- // we provide the initial error term and step values. static void s3LineDraw(AccelDriverT *drv, int32_t x1, int32_t y1, int32_t x2, int32_t y2, uint32_t color) { S3PrivateT *priv = (S3PrivateT *)drv->privData; int32_t dx = x2 - x1; int32_t dy = y2 - y1; // Determine octant and make dx/dy positive uint16_t cmd = S3_CMD_LINE | S3_CMD_DRAW | S3_CMD_LAST_PIXEL; if (dx >= 0) { cmd |= S3_CMD_DIR_X_POS; } else { dx = -dx; } if (dy >= 0) { cmd |= S3_CMD_DIR_Y_POS; } else { dy = -dy; } int32_t majAxis; int32_t minAxis; if (dx >= dy) { // X is major axis majAxis = dx; minAxis = dy; } else { // Y is major axis cmd |= S3_CMD_DIR_Y_MAJOR; majAxis = dy; minAxis = dx; } if (majAxis == 0) { return; } // Bresenham parameters: // axialStep = 2 * minAxis // diagonalStep = 2 * (minAxis - majAxis) // errorTerm = 2 * minAxis - majAxis int32_t axialStep = 2 * minAxis; int32_t diagStep = 2 * (minAxis - majAxis); int32_t errTerm = 2 * minAxis - majAxis; s3WaitFifo(priv, 7); s3WriteReg16(priv, S3_FRGD_COLOR, color); s3WriteReg16(priv, S3_FRGD_MIX, S3_MIX_SRC_FRGD | S3_MIX_ROP_SRC); s3WriteReg16(priv, S3_MULTIFUNC_CTRL, S3_MF_PIX_CNTL | S3_PIXCNTL_MIX_FRGD); s3WriteReg16(priv, S3_CUR_X, x1); s3WriteReg16(priv, S3_CUR_Y, y1); s3WriteReg16(priv, S3_DESTY_AXSTP, axialStep); s3WriteReg16(priv, S3_DESTX_DIASTP, diagStep); s3WaitFifo(priv, 3); s3WriteReg16(priv, S3_ERR_TERM, errTerm); s3WriteReg16(priv, S3_MAJ_AXIS_PCNT, majAxis); s3WriteReg16(priv, S3_CMD, cmd); } // ============================================================ // s3MoveCursor // ============================================================ // // Moves the hardware cursor to the given screen position. // The S3 cursor registers are in CRTC extended registers CR46-CR49. static void s3MoveCursor(AccelDriverT *drv, int32_t x, int32_t y) { (void)drv; // Handle negative coordinates (cursor partially off-screen) // by setting the cursor origin offset in the image if (x < 0) { x = 0; } if (y < 0) { y = 0; } vgaCrtcWrite(S3_CR46_HW_CURSOR_XHI, (x >> 8) & 0x07); vgaCrtcWrite(S3_CR47_HW_CURSOR_XLO, x & 0xFF); vgaCrtcWrite(S3_CR48_HW_CURSOR_YHI, (y >> 8) & 0x07); vgaCrtcWrite(S3_CR49_HW_CURSOR_YLO, y & 0xFF); } // ============================================================ // s3RectFill // ============================================================ // // Solid rectangle fill using the S3 hardware engine. // Sets the foreground color, selects foreground-only mix mode // with copy ROP, then issues a rectangle command. static void s3RectFill(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, uint32_t color) { S3PrivateT *priv = (S3PrivateT *)drv->privData; if (w <= 0 || h <= 0) { return; } s3WaitFifo(priv, 7); // Set foreground color s3WriteReg16(priv, S3_FRGD_COLOR, color); // Foreground mix: source = foreground color, ROP = copy s3WriteReg16(priv, S3_FRGD_MIX, S3_MIX_SRC_FRGD | S3_MIX_ROP_SRC); // Write mask: all bits enabled s3WriteReg16(priv, S3_WRT_MASK, 0xFFFF); // Pixel control: always use foreground mix s3WriteReg16(priv, S3_MULTIFUNC_CTRL, S3_MF_PIX_CNTL | S3_PIXCNTL_MIX_FRGD); // Starting position s3WriteReg16(priv, S3_CUR_X, x); s3WriteReg16(priv, S3_CUR_Y, y); // Dimensions (count is pixels - 1) s3WriteReg16(priv, S3_MAJ_AXIS_PCNT, w - 1); s3WaitFifo(priv, 2); s3WriteReg16(priv, S3_MULTIFUNC_CTRL, S3_MF_MIN_AXIS_PCNT | (h - 1)); // Command: rectangle, draw, positive X and Y, packed mode s3WriteReg16(priv, S3_CMD, S3_CMD_RECT | S3_CMD_DRAW | S3_CMD_ACROSS_PLANE | S3_CMD_DIR_X_POS | S3_CMD_DIR_Y_POS); } // ============================================================ // s3RectFillPat // ============================================================ // // 8x8 mono pattern fill using CPU data mix mode. The pattern is // 8 bytes (one per row, MSB-first), tiled across the rectangle. // 1-bits use the foreground color, 0-bits use the background. // Data is fed through PIX_TRANS as 16-bit words, repeating the // 8-row pattern for the full height of the rectangle. static void s3RectFillPat(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, const uint8_t *pattern, uint32_t fg, uint32_t bg) { S3PrivateT *priv = (S3PrivateT *)drv->privData; if (w <= 0 || h <= 0) { return; } s3WaitFifo(priv, 8); // Set colors s3WriteReg16(priv, S3_FRGD_COLOR, fg); s3WriteReg16(priv, S3_BKGD_COLOR, bg); // Foreground mix: source = foreground color, ROP = copy s3WriteReg16(priv, S3_FRGD_MIX, S3_MIX_SRC_FRGD | S3_MIX_ROP_SRC); // Background mix: source = background color, ROP = copy s3WriteReg16(priv, S3_BKGD_MIX, S3_MIX_SRC_BKGD | S3_MIX_ROP_SRC); // Pixel control: CPU data selects fg/bg mix s3WriteReg16(priv, S3_MULTIFUNC_CTRL, S3_MF_PIX_CNTL | S3_PIXCNTL_MIX_CPU); // Destination and dimensions s3WriteReg16(priv, S3_CUR_X, x); s3WriteReg16(priv, S3_CUR_Y, y); s3WriteReg16(priv, S3_MAJ_AXIS_PCNT, w - 1); s3WaitFifo(priv, 2); s3WriteReg16(priv, S3_MULTIFUNC_CTRL, S3_MF_MIN_AXIS_PCNT | (h - 1)); // Command: rectangle, draw, left-to-right top-to-bottom, CPU data s3WriteReg16(priv, S3_CMD, S3_CMD_RECT | S3_CMD_DRAW | S3_CMD_ACROSS_PLANE | S3_CMD_DIR_X_POS | S3_CMD_DIR_Y_POS | S3_CMD_16BIT_IO); // Feed tiled pattern data through PIX_TRANS. // Each row of the pattern is 1 byte (8 pixels), tiled across the width. int32_t wordsPerRow = (w + 15) / 16; for (int32_t row = 0; row < h; row++) { uint8_t patByte = pattern[row & 7]; s3WaitFifo(priv, 1); for (int32_t word = 0; word < wordsPerRow; word++) { s3WriteReg16(priv, S3_PIX_TRANS, (patByte << 8) | patByte); } } } // ============================================================ // s3SetClip // ============================================================ // // Programs the hardware scissor rectangle. All subsequent // drawing operations are clipped to this region. static void s3SetClip(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h) { S3PrivateT *priv = (S3PrivateT *)drv->privData; s3WaitFifo(priv, 4); s3WriteReg16(priv, S3_MULTIFUNC_CTRL, S3_MF_SCISSORS_L | (x & 0x0FFF)); s3WriteReg16(priv, S3_MULTIFUNC_CTRL, S3_MF_SCISSORS_T | (y & 0x0FFF)); s3WriteReg16(priv, S3_MULTIFUNC_CTRL, S3_MF_SCISSORS_R | ((x + w - 1) & 0x0FFF)); s3WriteReg16(priv, S3_MULTIFUNC_CTRL, S3_MF_SCISSORS_B | ((y + h - 1) & 0x0FFF)); } // ============================================================ // s3SetCursor // ============================================================ // // Uploads a cursor image to VRAM and configures the hardware // cursor registers. The S3 hardware cursor is 64x64 pixels, // stored as two bit planes (AND mask and XOR mask) at the // cursor address in VRAM. // // S3 cursor VRAM format: // 1024 bytes total = 512 bytes AND + 512 bytes XOR // Each row: 8 bytes AND mask, 8 bytes XOR mask (interleaved // by row on some chips, or plane-sequential on others). // For Trio64: rows are interleaved (AND row, XOR row, ...). static void s3SetCursor(AccelDriverT *drv, const HwCursorImageT *image) { S3PrivateT *priv = (S3PrivateT *)drv->privData; if (!image) { s3ShowCursor(drv, false); return; } // Wait for engine idle before writing to VRAM s3WaitIdle(drv); // Write cursor image to VRAM at cursorOffset // Format: for each of 64 rows, write 8 bytes AND then 8 bytes XOR uint8_t *cursorMem = drv->mode.framebuffer + priv->cursorOffset; for (int32_t row = 0; row < S3_HW_CURSOR_SIZE; row++) { for (int32_t byte = 0; byte < 8; byte++) { int32_t srcIdx = row * 8 + byte; uint8_t andByte; uint8_t xorByte; if (row < image->height && byte < (image->width + 7) / 8) { andByte = image->andMask[srcIdx]; xorByte = image->xorMask[srcIdx]; } else { // Outside the image: transparent (AND=0xFF, XOR=0x00) andByte = 0xFF; xorByte = 0x00; } // Interleaved format: AND row bytes, then XOR row bytes cursorMem[row * 16 + byte] = andByte; cursorMem[row * 16 + byte + 8] = xorByte; } } // Set cursor address in VRAM (in units of 1KB) uint16_t cursorAddr = priv->cursorOffset / 1024; vgaCrtcWrite(S3_CR4C_HW_CURSOR_ADDR_HI, (cursorAddr >> 8) & 0x0F); vgaCrtcWrite(S3_CR4D_HW_CURSOR_ADDR_LO, cursorAddr & 0xFF); } // ============================================================ // s3ShowCursor // ============================================================ // // Enables or disables the hardware cursor via CR45. static void s3ShowCursor(AccelDriverT *drv, bool visible) { (void)drv; uint8_t cr45 = vgaCrtcRead(S3_CR45_HW_CURSOR_MODE); if (visible) { cr45 |= 0x01; // enable hardware cursor } else { cr45 &= ~0x01; // disable hardware cursor } vgaCrtcWrite(S3_CR45_HW_CURSOR_MODE, cr45); } // ============================================================ // s3Shutdown // ============================================================ // // Restores text mode and cleans up. The VESA/VGA BIOS text mode // restore handles resetting all the S3-specific registers. static void s3Shutdown(AccelDriverT *drv) { S3PrivateT *priv = (S3PrivateT *)drv->privData; s3ShowCursor(drv, false); dpmiUnmapFramebuffer(&priv->mmioMapping); dpmiUnmapFramebuffer(&priv->lfbMapping); vgaRestoreTextMode(); } // ============================================================ // s3UnlockRegs // ============================================================ // // Unlocks S3 extended CRTC registers. Three levels: // CR38 = 0x48 : unlock S3 VGA registers (CR30-CR3F) // CR39 = 0xA5 : unlock S3 system registers (CR40-CR5F) // Also unlock standard CRTC protection for timing regs. static void s3UnlockRegs(void) { vgaCrtcWrite(S3_CR38_LOCK_1, 0x48); vgaCrtcWrite(S3_CR39_LOCK_2, 0xA5); vgaCrtcUnlock(); } // ============================================================ // s3WaitFifo // ============================================================ // // Waits until the S3 command FIFO has at least 'slots' free // entries. The FIFO depth is 8 on Trio64. Reading GP_STAT // returns a bitmask where bits 7:0 indicate how many slots // are free (each bit = one more slot free, from MSB to LSB). static void s3WaitFifo(S3PrivateT *priv, int32_t slots) { // Build the required mask: if we need N slots free, we need // bit (8 - N) to be set in GP_STAT bits 7:0. // Bits: 0x80=1free, 0x40=2free, ..., 0x01=8free uint16_t mask = 0x0100 >> slots; for (int32_t i = 0; i < S3_MAX_IDLE_WAIT; i++) { if (s3ReadReg16(priv, S3_GP_STAT) & mask) { return; } } } // ============================================================ // s3WaitIdle // ============================================================ // // Waits until the S3 graphics engine is completely idle. // The engine is idle when the BUSY bit (bit 9) of GP_STAT is clear // AND the FIFO is empty (bit 10 is set). static void s3WaitIdle(AccelDriverT *drv) { S3PrivateT *priv = (S3PrivateT *)drv->privData; for (int32_t i = 0; i < S3_MAX_IDLE_WAIT; i++) { uint16_t stat = s3ReadReg16(priv, S3_GP_STAT); if (!(stat & S3_GP_STAT_BUSY)) { return; } } }