DOS_Video/s3Trio.c
2026-04-13 19:40:45 -05:00

1216 lines
42 KiB
C

// s3Trio.c -- S3 Trio64/Vision864/Vision968 accelerated video driver
//
// Supports the S3 86C764 (Trio64), 86C765 (Trio64V+), 86C868 (Vision868),
// 86C864 (Vision864), 86C964 (Vision964), 86C968 (Vision968), and
// 86C732 (Trio32) chipsets.
//
// The S3 2D acceleration engine (sometimes called the "graphics engine"
// or "BitBLT engine") provides hardware-accelerated:
// - Solid rectangle fill
// - 8x8 mono/color pattern fill
// - Screen-to-screen BitBLT
// - Mono color expansion (for text rendering)
// - Bresenham line draw
// - Hardware clipping rectangle
// - 64x64 two-color hardware cursor
//
// Register access:
// The S3 extended registers are accessed through CRTC index/data
// ports (0x3D4/0x3D5) at indices 0x30-0x6D. These must be unlocked
// by writing specific key values to CR38 and CR39.
//
// The 2D engine registers are at I/O ports 0x82E8-0xBEE8 (legacy)
// or via MMIO at the linear framebuffer base + 0x1000000 on newer
// chips (Trio64+). We use MMIO when available (Trio64, ViRGE) for
// faster register access, falling back to I/O on older Vision chips.
//
// VESA mode setting:
// We use VBE BIOS calls for mode setting rather than programming
// CRTC timings directly. This is simpler and more reliable across
// the S3 chip variants (which have subtly different timing register
// layouts). After VESA sets the mode, we unlock the S3 extended
// registers and enable the acceleration engine.
#include "accelVid.h"
#include "vgaCommon.h"
#include "pci.h"
#include <pc.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/nearptr.h>
// ============================================================
// S3 vendor/device IDs
// ============================================================
#define S3_VENDOR_ID 0x5333
#define S3_TRIO32 0x8810
#define S3_TRIO64 0x8811
#define S3_TRIO64V_PLUS 0x8814
#define S3_VISION864 0x88C0
#define S3_VISION864P 0x88C1
#define S3_VISION868 0x8880
#define S3_VISION964 0x88D0
#define S3_VISION968 0x88F0
#define S3_VISION968_ALT 0x88F1
#define S3_VIRGE 0x5631
#define S3_VIRGE_VX 0x883D
#define S3_VIRGE_DX 0x8A01
#define S3_VIRGE_GX2 0x8A10
#define S3_VIRGE_MX 0x8C01
#define S3_VIRGE_MXP 0x8C03
#define S3_SAVAGE3D 0x8A20
#define S3_SAVAGE3D_MV 0x8A21
#define S3_SAVAGE4 0x8A22
#define S3_SAVAGE_MX 0x8C10
#define S3_SAVAGE_MX_MV 0x8C11
#define S3_SAVAGE_IX 0x8C12
#define S3_SAVAGE_IX_MV 0x8C13
#define S3_SAVAGE_2000 0x9102
// Terminated by {0, 0}
static const uint16_t sS3DeviceIds[] = {
S3_VENDOR_ID, S3_TRIO32,
S3_VENDOR_ID, S3_TRIO64,
S3_VENDOR_ID, S3_TRIO64V_PLUS,
S3_VENDOR_ID, S3_VIRGE,
S3_VENDOR_ID, S3_VIRGE_VX,
S3_VENDOR_ID, S3_VIRGE_DX,
S3_VENDOR_ID, S3_VIRGE_GX2,
S3_VENDOR_ID, S3_VIRGE_MX,
S3_VENDOR_ID, S3_VIRGE_MXP,
S3_VENDOR_ID, S3_SAVAGE3D,
S3_VENDOR_ID, S3_SAVAGE3D_MV,
S3_VENDOR_ID, S3_SAVAGE4,
S3_VENDOR_ID, S3_SAVAGE_MX,
S3_VENDOR_ID, S3_SAVAGE_MX_MV,
S3_VENDOR_ID, S3_SAVAGE_IX,
S3_VENDOR_ID, S3_SAVAGE_IX_MV,
S3_VENDOR_ID, S3_SAVAGE_2000,
S3_VENDOR_ID, S3_VISION864,
S3_VENDOR_ID, S3_VISION864P,
S3_VENDOR_ID, S3_VISION868,
S3_VENDOR_ID, S3_VISION964,
S3_VENDOR_ID, S3_VISION968,
S3_VENDOR_ID, S3_VISION968_ALT,
0, 0
};
// ============================================================
// S3 extended CRTC register indices
// ============================================================
#define S3_CR30_CHIP_ID 0x30
#define S3_CR31_MEM_CONFIG 0x31
#define S3_CR33_BACKWARD_COMPAT 0x33
#define S3_CR34_BACKWARD_COMPAT 0x34
#define S3_CR35_CRTC_LOCK 0x35
#define S3_CR38_LOCK_1 0x38 // unlock with 0x48
#define S3_CR39_LOCK_2 0x39 // unlock with 0xA5
#define S3_CR40_SYS_CONFIG 0x40
#define S3_CR40_ENGINE_ENABLE 0x01 // bit 0: enable graphics engine
#define S3_CR42_MODE_CONTROL 0x42
#define S3_CR43_EXT_MODE 0x43
#define S3_CR45_HW_CURSOR_MODE 0x45
#define S3_CR46_HW_CURSOR_XHI 0x46
#define S3_CR47_HW_CURSOR_XLO 0x47
#define S3_CR48_HW_CURSOR_YHI 0x48
#define S3_CR49_HW_CURSOR_YLO 0x49
#define S3_CR4A_HW_CURSOR_FG_HI 0x4A
#define S3_CR4B_HW_CURSOR_FG_LO 0x4B
#define S3_CR4C_HW_CURSOR_ADDR_HI 0x4C
#define S3_CR4D_HW_CURSOR_ADDR_LO 0x4D
#define S3_CR4E_HW_CURSOR_BG_HI 0x4E
#define S3_CR4F_HW_CURSOR_BG_LO 0x4F
#define S3_CR50_EXT_SYS_CTRL_1 0x50
// CR50 pixel length bits (bits 5:4)
#define S3_CR50_PIX_8BPP 0x00
#define S3_CR50_PIX_16BPP 0x10
#define S3_CR50_PIX_32BPP 0x30
#define S3_CR51_EXT_SYS_CTRL_2 0x51
#define S3_CR53_EXT_MEM_CTRL_1 0x53
#define S3_CR54_EXT_MEM_CTRL_2 0x54
#define S3_CR55_EXT_DAC_CTRL 0x55
#define S3_CR58_LFB_CTRL 0x58
#define S3_CR59_LFB_ADDR_HI 0x59
#define S3_CR5A_LFB_ADDR_LO 0x5A
#define S3_CR5D_EXT_HCNT 0x5D
#define S3_CR5E_EXT_VCNT 0x5E
#define S3_CR67_EXT_MISC_CTRL_2 0x67
#define S3_CR6A_EXT_MISC_CTRL_3 0x6A
// ============================================================
// S3 2D engine I/O ports (legacy access)
// ============================================================
//
// These are the standard S3 accelerator register ports. All S3
// chips from the 928 onward support this I/O port interface.
#define S3_CUR_Y 0x82E8
#define S3_CUR_X 0x86E8
#define S3_DESTY_AXSTP 0x8AE8 // destination Y / axial step
#define S3_DESTX_DIASTP 0x8EE8 // destination X / diagonal step
#define S3_ERR_TERM 0x92E8
#define S3_MAJ_AXIS_PCNT 0x96E8 // major axis pixel count
#define S3_GP_STAT 0x9AE8 // graphics processor status
#define S3_CMD 0x9AE8 // command register (write)
#define S3_SHORT_STROKE 0x9EE8
#define S3_BKGD_COLOR 0xA2E8
#define S3_FRGD_COLOR 0xA6E8
#define S3_WRT_MASK 0xAAE8
#define S3_RD_MASK 0xAEE8
#define S3_COLOR_CMP 0xB2E8
#define S3_BKGD_MIX 0xB6E8
#define S3_FRGD_MIX 0xBAE8
#define S3_MULTIFUNC_CTRL 0xBEE8 // multi-function control register
#define S3_PIX_TRANS 0xE2E8 // pixel data transfer
// ============================================================
// S3 MULTIFUNC_CTRL sub-register indices
// ============================================================
//
// The multi-function control register at 0xBEE8 is a multiplexed
// port: bits 15:12 select the sub-register, bits 11:0 are the value.
#define S3_MF_MIN_AXIS_PCNT 0x0000 // minor axis pixel count
#define S3_MF_SCISSORS_T 0x1000 // scissors top
#define S3_MF_SCISSORS_L 0x2000 // scissors left
#define S3_MF_SCISSORS_B 0x3000 // scissors bottom
#define S3_MF_SCISSORS_R 0x4000 // scissors right
#define S3_MF_PIX_CNTL 0xA000 // pixel control
#define S3_MF_MULT_MISC_2 0xD000 // multi misc 2
#define S3_MF_READ_SEL 0xE000 // read register select
// ============================================================
// S3 command register bits
// ============================================================
// Command type (bits 15:13 for Trio64)
#define S3_CMD_NOP 0x0000
#define S3_CMD_LINE 0x2000
#define S3_CMD_RECT 0x4000
#define S3_CMD_POLY_LINE 0x6000
#define S3_CMD_NOP2 0x8000
#define S3_CMD_BITBLT 0xC000
// Drawing direction bits (bits 7:5)
#define S3_CMD_DRAW 0x0010 // draw (vs. move)
#define S3_CMD_DIR_X_POS 0x0020 // X direction positive
#define S3_CMD_DIR_Y_POS 0x0040 // Y direction positive
#define S3_CMD_DIR_X_MAJOR 0x0000 // X is major axis
#define S3_CMD_DIR_Y_MAJOR 0x0080 // Y is major axis
// Additional command bits
#define S3_CMD_PLANAR 0x0002 // planar mode
// Bit 2 has dual meaning depending on command type:
// For RECT/BITBLT: across-plane (packed pixel) mode
// For LINE: include last pixel
#define S3_CMD_ACROSS_PLANE 0x0004
#define S3_CMD_LAST_PIXEL 0x0004
#define S3_CMD_BYTE_SWAP 0x1000 // byte swap for pixel transfer
#define S3_CMD_16BIT_IO 0x0200 // 16-bit pixel transfer
#define S3_CMD_32BIT_IO 0x0400 // 32-bit pixel transfer
// Source select (bits 8:7 of command when applicable)
// Actually in PIX_CNTL register
// ============================================================
// S3 MIX register values
// ============================================================
//
// The foreground and background MIX registers control what source
// is used and what ROP is applied.
//
// Bits 4:0 = ROP (raster operation)
// Bits 6:5 = source select:
// 00 = background color register
// 01 = foreground color register
// 10 = pixel data from CPU (via PIX_TRANS)
// 11 = display memory (screen source)
#define S3_MIX_SRC_BKGD 0x00
#define S3_MIX_SRC_FRGD 0x20
#define S3_MIX_SRC_CPU 0x40
#define S3_MIX_SRC_DISPLAY 0x60
// Common raster operations (bits 4:0)
#define S3_MIX_ROP_NOT_DST 0x00 // NOT dest
#define S3_MIX_ROP_ZERO 0x01 // 0
#define S3_MIX_ROP_ONE 0x02 // 1
#define S3_MIX_ROP_DST 0x03 // dest (nop)
#define S3_MIX_ROP_NOT_SRC 0x04 // NOT source
#define S3_MIX_ROP_SRC_XOR_DST 0x05 // source XOR dest
#define S3_MIX_ROP_NOT_SRC_AND 0x06 // NOT source AND dest
#define S3_MIX_ROP_SRC_AND_DST 0x0C // source AND dest
#define S3_MIX_ROP_SRC 0x07 // source (copy)
#define S3_MIX_ROP_NOT_SRC_OR 0x0B // NOT source OR dest
#define S3_MIX_ROP_SRC_OR_DST 0x0E // source OR dest
// ============================================================
// S3 PIX_CNTL (pixel control) values
// ============================================================
//
// Written via MULTIFUNC_CTRL with index 0xA000.
// Controls the source of foreground/background mix selection.
//
// Bits 7:6 = mix select:
// 00 = foreground mix always
// 01 = cpu data determines mix (color expansion)
// 10 = pixel data determines mix
// 11 = video memory determines mix
#define S3_PIXCNTL_MIX_FRGD 0x0000 // always use foreground mix
#define S3_PIXCNTL_MIX_CPU 0x0040 // CPU data selects mix (color expansion)
#define S3_PIXCNTL_MIX_DISPLAY 0x0080 // display memory selects mix
// ============================================================
// S3 GP_STAT bits
// ============================================================
#define S3_GP_STAT_BUSY 0x0200 // graphics engine busy
#define S3_GP_STAT_FIFO_EMPTY 0x0400 // all FIFO slots empty
#define S3_GP_STAT_FIFO_1 0x0080 // at least 1 FIFO slot free
#define S3_GP_STAT_FIFO_2 0x0040 // at least 2 FIFO slots free
#define S3_GP_STAT_FIFO_3 0x0020 // at least 3 FIFO slots free
#define S3_GP_STAT_FIFO_4 0x0010 // at least 4 FIFO slots free
#define S3_GP_STAT_FIFO_5 0x0008 // at least 5 FIFO slots free
#define S3_GP_STAT_FIFO_6 0x0004 // at least 6 FIFO slots free
#define S3_GP_STAT_FIFO_7 0x0002 // at least 7 FIFO slots free
#define S3_GP_STAT_FIFO_8 0x0001 // at least 8 FIFO slots free
// Hardware cursor constants
#define S3_HW_CURSOR_SIZE 64 // 64x64 pixels
#define S3_HW_CURSOR_BYTES 1024 // 64*64/8 * 2 planes = 1024 bytes
// Maximum wait iterations to prevent infinite loops on broken hardware
#define S3_MAX_IDLE_WAIT 1000000
// MMIO region offset from LFB base (Trio64/ViRGE new-style MMIO)
#define S3_MMIO_OFFSET 0x1000000
#define S3_MMIO_SIZE 0x10000 // 64KB MMIO window
// ============================================================
// S3 MMIO register offset mapping
// ============================================================
//
// The S3 "new MMIO" maps the enhanced registers into a 64KB
// window at LFB + 0x1000000. The I/O port addresses map to
// MMIO offsets as follows:
// I/O 0x82E8 -> MMIO 0x82E8 (same offset within 64KB window)
//
// For 16-bit register access: write to offset as uint16_t
// For 32-bit register access: write to offset as uint32_t
// ============================================================
// Private driver state
// ============================================================
typedef struct {
uint32_t lfbPhysAddr; // physical address of LFB
uint32_t vramSize; // total VRAM in bytes
uint32_t cursorOffset; // VRAM offset for cursor image
int32_t bytesPerPixel;
int32_t screenPitch; // bytes per scanline
bool isTrio; // true for Trio32/64/V+/ViRGE
bool useMMIO; // true if MMIO is available
volatile uint8_t *mmio; // mapped MMIO base pointer (NULL if I/O mode)
DpmiMappingT lfbMapping;
DpmiMappingT mmioMapping;
} S3PrivateT;
// ============================================================
// Prototypes
// ============================================================
static void s3BitBlt(AccelDriverT *drv, int32_t srcX, int32_t srcY, int32_t dstX, int32_t dstY, int32_t w, int32_t h);
static void s3ColorExpand(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h, uint32_t fg, uint32_t bg);
static bool s3Detect(AccelDriverT *drv);
static void s3HostBlit(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h);
static bool s3Init(AccelDriverT *drv, const AccelModeRequestT *req);
static void s3LineDraw(AccelDriverT *drv, int32_t x1, int32_t y1, int32_t x2, int32_t y2, uint32_t color);
static void s3MoveCursor(AccelDriverT *drv, int32_t x, int32_t y);
static void s3RectFill(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, uint32_t color);
static void s3RectFillPat(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, const uint8_t *pattern, uint32_t fg, uint32_t bg);
static void s3SetClip(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h);
static void s3SetCursor(AccelDriverT *drv, const HwCursorImageT *image);
static void s3ShowCursor(AccelDriverT *drv, bool visible);
static void s3Shutdown(AccelDriverT *drv);
static void s3UnlockRegs(void);
static void s3WaitFifo(S3PrivateT *priv, int32_t slots);
static void s3WaitIdle(AccelDriverT *drv);
// ============================================================
// Driver instance
// ============================================================
static S3PrivateT sS3Private;
static AccelDriverT sS3Driver = {
.name = "S3 Trio64",
.chipFamily = "s3",
.caps = 0,
.privData = &sS3Private,
.detect = s3Detect,
.init = s3Init,
.shutdown = s3Shutdown,
.waitIdle = s3WaitIdle,
.setClip = s3SetClip,
.rectFill = s3RectFill,
.rectFillPat = s3RectFillPat,
.bitBlt = s3BitBlt,
.hostBlit = s3HostBlit,
.colorExpand = s3ColorExpand,
.lineDraw = s3LineDraw,
.setCursor = s3SetCursor,
.moveCursor = s3MoveCursor,
.showCursor = s3ShowCursor,
};
// ============================================================
// s3RegisterDriver
// ============================================================
//
// Called from main() to register the S3 driver with the manager.
void s3RegisterDriver(void) {
accelRegisterDriver(&sS3Driver);
}
// ============================================================
// S3 register access helpers
// ============================================================
//
// When MMIO is available (Trio64, ViRGE, Savage), register access
// goes through the MMIO window at LFB + 0x1000000. The I/O port
// addresses map directly to MMIO offsets within the 64KB window.
// When MMIO is not available (Vision series), we fall back to
// I/O port access.
//
// Using MMIO is faster because: (1) memory writes can be posted
// and pipelined by the CPU, (2) no I/O port decode penalty, and
// (3) on Pentium+, memory writes are faster than I/O instructions.
static inline void s3WriteReg16(S3PrivateT *priv, uint16_t port, uint16_t val) {
if (priv->useMMIO) {
*(volatile uint16_t *)(priv->mmio + (port & 0xFFFF)) = val;
} else {
outportw(port, val);
}
}
static inline uint16_t s3ReadReg16(S3PrivateT *priv, uint16_t port) {
if (priv->useMMIO) {
return *(volatile uint16_t *)(priv->mmio + (port & 0xFFFF));
}
return inportw(port);
}
// ============================================================
// s3BitBlt
// ============================================================
//
// Screen-to-screen BitBLT using the S3 hardware engine.
// Handles overlapping source and destination by adjusting the
// blit direction. The S3 engine can blit in any of four
// directions (positive/negative X/Y).
static void s3BitBlt(AccelDriverT *drv, int32_t srcX, int32_t srcY, int32_t dstX, int32_t dstY, int32_t w, int32_t h) {
S3PrivateT *priv = (S3PrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
// Determine blit direction to handle overlapping regions
uint16_t cmd = S3_CMD_BITBLT | S3_CMD_DRAW | S3_CMD_ACROSS_PLANE;
int32_t sx = srcX;
int32_t sy = srcY;
int32_t dx = dstX;
int32_t dy = dstY;
if (dstX <= srcX) {
cmd |= S3_CMD_DIR_X_POS;
} else {
sx += w - 1;
dx += w - 1;
}
if (dstY <= srcY) {
cmd |= S3_CMD_DIR_Y_POS;
} else {
sy += h - 1;
dy += h - 1;
}
s3WaitFifo(priv, 7);
// Foreground mix: source = display memory, ROP = copy
s3WriteReg16(priv, S3_FRGD_MIX, S3_MIX_SRC_DISPLAY | S3_MIX_ROP_SRC);
s3WriteReg16(priv, S3_WRT_MASK, 0xFFFF);
// Pixel control: always foreground mix
s3WriteReg16(priv, S3_MULTIFUNC_CTRL, S3_MF_PIX_CNTL | S3_PIXCNTL_MIX_FRGD);
// Source position
s3WriteReg16(priv, S3_CUR_X, sx);
s3WriteReg16(priv, S3_CUR_Y, sy);
// Destination position
s3WriteReg16(priv, S3_DESTX_DIASTP, dx);
s3WriteReg16(priv, S3_DESTY_AXSTP, dy);
s3WaitFifo(priv, 3);
// Dimensions (count is pixels - 1)
s3WriteReg16(priv, S3_MAJ_AXIS_PCNT, w - 1);
s3WriteReg16(priv, S3_MULTIFUNC_CTRL, S3_MF_MIN_AXIS_PCNT | (h - 1));
// Fire
s3WriteReg16(priv, S3_CMD, cmd);
}
// ============================================================
// s3ColorExpand
// ============================================================
//
// Monochrome-to-color expansion using CPU-driven pixel transfer.
// This is used for text rendering: each byte of srcBuf contains
// 8 monochrome pixels (MSB first), which the engine expands to
// full-color using the foreground and background color registers.
//
// The S3 engine is set to CPU data mix mode: for each bit in
// the transferred data, 1 = use foreground color, 0 = use
// background color.
static void s3ColorExpand(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h, uint32_t fg, uint32_t bg) {
S3PrivateT *priv = (S3PrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
s3WaitFifo(priv, 8);
// Set colors
s3WriteReg16(priv, S3_FRGD_COLOR, fg);
s3WriteReg16(priv, S3_BKGD_COLOR, bg);
// Foreground mix: source = foreground color, ROP = copy
s3WriteReg16(priv, S3_FRGD_MIX, S3_MIX_SRC_FRGD | S3_MIX_ROP_SRC);
// Background mix: source = background color, ROP = copy
s3WriteReg16(priv, S3_BKGD_MIX, S3_MIX_SRC_BKGD | S3_MIX_ROP_SRC);
// Pixel control: CPU data selects fg/bg mix
s3WriteReg16(priv, S3_MULTIFUNC_CTRL, S3_MF_PIX_CNTL | S3_PIXCNTL_MIX_CPU);
// Destination and dimensions
s3WriteReg16(priv, S3_CUR_X, dstX);
s3WriteReg16(priv, S3_CUR_Y, dstY);
s3WriteReg16(priv, S3_MAJ_AXIS_PCNT, w - 1);
s3WaitFifo(priv, 2);
s3WriteReg16(priv, S3_MULTIFUNC_CTRL, S3_MF_MIN_AXIS_PCNT | (h - 1));
// Command: rectangle, draw, left-to-right top-to-bottom, CPU data
uint16_t cmd = S3_CMD_RECT | S3_CMD_DRAW | S3_CMD_ACROSS_PLANE
| S3_CMD_DIR_X_POS | S3_CMD_DIR_Y_POS
| S3_CMD_16BIT_IO;
s3WriteReg16(priv, S3_CMD, cmd);
// Transfer monochrome data to the engine one scanline at a time.
// The engine expects MSB-first bit order, which matches our
// convention. Data must be written to PIX_TRANS in 16-bit words.
int32_t wordsPerRow = (w + 15) / 16;
for (int32_t row = 0; row < h; row++) {
const uint8_t *rowData = srcBuf + row * srcPitch;
s3WaitFifo(priv, 1);
for (int32_t word = 0; word < wordsPerRow; word++) {
int32_t byteOff = word * 2;
uint8_t hi = (byteOff < srcPitch) ? rowData[byteOff] : 0;
uint8_t lo = (byteOff + 1 < srcPitch) ? rowData[byteOff + 1] : 0;
s3WriteReg16(priv, S3_PIX_TRANS, (hi << 8) | lo);
}
}
}
// ============================================================
// s3Detect
// ============================================================
//
// Scans PCI for any S3 chip in our supported list. Does not
// touch any hardware registers (detect must be side-effect-free).
static bool s3Detect(AccelDriverT *drv) {
int32_t matchIdx;
if (!pciFindDeviceList(sS3DeviceIds, &drv->pciDev, &matchIdx)) {
return false;
}
// Set the driver name based on the specific chip found
switch (drv->pciDev.deviceId) {
case S3_TRIO32:
drv->name = "S3 Trio32";
break;
case S3_TRIO64:
drv->name = "S3 Trio64";
break;
case S3_TRIO64V_PLUS:
drv->name = "S3 Trio64V+";
break;
case S3_VISION864:
case S3_VISION864P:
drv->name = "S3 Vision864";
break;
case S3_VISION868:
drv->name = "S3 Vision868";
break;
case S3_VISION964:
drv->name = "S3 Vision964";
break;
case S3_VIRGE:
drv->name = "S3 ViRGE";
break;
case S3_VIRGE_VX:
drv->name = "S3 ViRGE/VX";
break;
case S3_VIRGE_DX:
drv->name = "S3 ViRGE/DX";
break;
case S3_VIRGE_GX2:
drv->name = "S3 ViRGE/GX2";
break;
case S3_VIRGE_MX:
case S3_VIRGE_MXP:
drv->name = "S3 ViRGE/MX";
break;
case S3_SAVAGE3D:
case S3_SAVAGE3D_MV:
drv->name = "S3 Savage3D";
break;
case S3_SAVAGE4:
drv->name = "S3 Savage4";
break;
case S3_SAVAGE_MX:
case S3_SAVAGE_MX_MV:
drv->name = "S3 Savage/MX";
break;
case S3_SAVAGE_IX:
case S3_SAVAGE_IX_MV:
drv->name = "S3 Savage/IX";
break;
case S3_SAVAGE_2000:
drv->name = "S3 Savage 2000";
break;
case S3_VISION968:
case S3_VISION968_ALT:
drv->name = "S3 Vision968";
break;
default:
drv->name = "S3 (unknown)";
break;
}
return true;
}
// ============================================================
// s3HostBlit
// ============================================================
//
// CPU-to-screen blit via the PIX_TRANS port. Transfers packed
// pixel data from system RAM to VRAM through the engine. The
// engine handles the destination address calculation and pitch
// alignment, so the CPU just streams data.
static void s3HostBlit(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h) {
S3PrivateT *priv = (S3PrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
int32_t bpp = priv->bytesPerPixel;
int32_t rowBytes = w * bpp;
int32_t wordCount = (rowBytes + 1) / 2;
s3WaitFifo(priv, 7);
// Foreground mix: source = CPU data, ROP = copy
s3WriteReg16(priv, S3_FRGD_MIX, S3_MIX_SRC_CPU | S3_MIX_ROP_SRC);
s3WriteReg16(priv, S3_WRT_MASK, 0xFFFF);
// Pixel control: always foreground mix
s3WriteReg16(priv, S3_MULTIFUNC_CTRL, S3_MF_PIX_CNTL | S3_PIXCNTL_MIX_FRGD);
// Destination position
s3WriteReg16(priv, S3_CUR_X, dstX);
s3WriteReg16(priv, S3_CUR_Y, dstY);
// Dimensions
s3WriteReg16(priv, S3_MAJ_AXIS_PCNT, w - 1);
s3WriteReg16(priv, S3_MULTIFUNC_CTRL, S3_MF_MIN_AXIS_PCNT | (h - 1));
s3WaitFifo(priv, 1);
// Command: rectangle, draw, CPU data, left-to-right top-to-bottom
s3WriteReg16(priv, S3_CMD, S3_CMD_RECT | S3_CMD_DRAW | S3_CMD_ACROSS_PLANE
| S3_CMD_DIR_X_POS | S3_CMD_DIR_Y_POS
| S3_CMD_16BIT_IO);
// Transfer pixel data row by row through PIX_TRANS
for (int32_t row = 0; row < h; row++) {
const uint8_t *rowData = srcBuf + row * srcPitch;
for (int32_t word = 0; word < wordCount; word++) {
int32_t byteOff = word * 2;
uint8_t lo = rowData[byteOff];
uint8_t hi = (byteOff + 1 < rowBytes) ? rowData[byteOff + 1] : 0;
s3WriteReg16(priv, S3_PIX_TRANS, (hi << 8) | lo);
}
}
}
// ============================================================
// s3Init
// ============================================================
//
// Initializes the S3 chip: sets the requested video mode via
// VESA, unlocks extended registers, enables the 2D engine, and
// maps the linear framebuffer.
//
// Mode setting strategy: use VESA VBE to set the mode (with LFB
// flag bit 14 set), then unlock S3 extended registers and
// configure the acceleration engine. This avoids the complexity
// of programming S3-specific CRTC timing registers while still
// getting full hardware acceleration.
static bool s3Init(AccelDriverT *drv, const AccelModeRequestT *req) {
S3PrivateT *priv = (S3PrivateT *)drv->privData;
memset(priv, 0, sizeof(*priv));
priv->isTrio = (drv->pciDev.deviceId == S3_TRIO32
|| drv->pciDev.deviceId == S3_TRIO64
|| drv->pciDev.deviceId == S3_TRIO64V_PLUS
|| drv->pciDev.deviceId == S3_VIRGE
|| drv->pciDev.deviceId == S3_VIRGE_VX
|| drv->pciDev.deviceId == S3_VIRGE_DX
|| drv->pciDev.deviceId == S3_VIRGE_GX2
|| drv->pciDev.deviceId == S3_VIRGE_MX
|| drv->pciDev.deviceId == S3_VIRGE_MXP
|| drv->pciDev.deviceId == S3_SAVAGE3D
|| drv->pciDev.deviceId == S3_SAVAGE3D_MV
|| drv->pciDev.deviceId == S3_SAVAGE4
|| drv->pciDev.deviceId == S3_SAVAGE_MX
|| drv->pciDev.deviceId == S3_SAVAGE_MX_MV
|| drv->pciDev.deviceId == S3_SAVAGE_IX
|| drv->pciDev.deviceId == S3_SAVAGE_IX_MV
|| drv->pciDev.deviceId == S3_SAVAGE_2000);
// Determine VRAM size and LFB address from BAR0
uint32_t barSize = pciSizeBar(drv->pciDev.bus, drv->pciDev.dev,
drv->pciDev.func, PCI_BAR0);
uint32_t bar0 = pciRead32(drv->pciDev.bus, drv->pciDev.dev,
drv->pciDev.func, PCI_BAR0);
priv->vramSize = barSize;
priv->lfbPhysAddr = bar0 & 0xFFFFFFF0;
// Unlock S3 extended registers
s3UnlockRegs();
// Cross-check VRAM size from CR36 on Trio chips
if (priv->isTrio) {
uint8_t cr36 = vgaCrtcRead(0x36);
uint32_t ramFromCr36;
switch ((cr36 >> 5) & 0x07) {
case 0: ramFromCr36 = 4 * 1024 * 1024; break;
case 2: ramFromCr36 = 3 * 1024 * 1024; break;
case 4: ramFromCr36 = 2 * 1024 * 1024; break;
case 6: ramFromCr36 = 1 * 1024 * 1024; break;
default: ramFromCr36 = 1 * 1024 * 1024; break;
}
if (barSize < 512 * 1024 || barSize > 64 * 1024 * 1024) {
priv->vramSize = ramFromCr36;
}
}
// Find and set VESA mode
VesaModeResultT vesa;
if (!vesaFindAndSetMode(req->width, req->height, req->bpp, &vesa)) {
return false;
}
// Map LFB via DPMI
if (!dpmiMapFramebuffer(priv->lfbPhysAddr, priv->vramSize, &priv->lfbMapping)) {
vgaRestoreTextMode();
return false;
}
// Fill in driver mode info
priv->bytesPerPixel = (vesa.bpp + 7) / 8;
priv->screenPitch = vesa.pitch;
drv->mode.width = vesa.width;
drv->mode.height = vesa.height;
drv->mode.bpp = vesa.bpp;
drv->mode.pitch = vesa.pitch;
drv->mode.framebuffer = priv->lfbMapping.ptr;
drv->mode.vramSize = priv->vramSize;
drv->mode.offscreenBase = vesa.pitch * vesa.height;
// Map MMIO region for Trio64/ViRGE (at LFB + 16MB)
priv->useMMIO = false;
priv->mmio = NULL;
if (priv->isTrio) {
if (dpmiMapFramebuffer(priv->lfbPhysAddr + S3_MMIO_OFFSET, S3_MMIO_SIZE, &priv->mmioMapping)) {
priv->useMMIO = true;
priv->mmio = (volatile uint8_t *)priv->mmioMapping.ptr;
}
}
// Re-unlock after mode set (VESA may re-lock)
s3UnlockRegs();
// Enable the graphics engine
// CR40 bit 0 = enable graphics engine
uint8_t cr40 = vgaCrtcRead(S3_CR40_SYS_CONFIG);
vgaCrtcWrite(S3_CR40_SYS_CONFIG, cr40 | S3_CR40_ENGINE_ENABLE);
// Set up pixel format in CR50 for the engine
uint8_t cr50 = vgaCrtcRead(S3_CR50_EXT_SYS_CTRL_1);
cr50 &= 0xC0; // clear pixel length bits
switch (vesa.bpp) {
case 8:
cr50 |= S3_CR50_PIX_8BPP;
break;
case 15:
case 16:
cr50 |= S3_CR50_PIX_16BPP;
break;
case 32:
cr50 |= S3_CR50_PIX_32BPP;
break;
}
vgaCrtcWrite(S3_CR50_EXT_SYS_CTRL_1, cr50);
// Set up hardware cursor location at end of VRAM
// Cursor image is 1KB (64x64 2bpp), aligned to 1KB
priv->cursorOffset = priv->vramSize - S3_HW_CURSOR_BYTES;
priv->cursorOffset &= ~(S3_HW_CURSOR_BYTES - 1);
// Set capability flags
drv->caps = ACAP_RECT_FILL
| ACAP_RECT_FILL_PAT
| ACAP_BITBLT
| ACAP_COLOR_EXPAND
| ACAP_HOST_BLIT
| ACAP_LINE_DRAW
| ACAP_HW_CURSOR
| ACAP_CLIP;
// Set full-screen clip rectangle
s3SetClip(drv, 0, 0, vesa.width, vesa.height);
// Wait for engine to be ready
s3WaitIdle(drv);
return true;
}
// ============================================================
// s3LineDraw
// ============================================================
//
// Bresenham line drawing using the S3 hardware engine.
// The engine implements the Bresenham algorithm natively --
// we provide the initial error term and step values.
static void s3LineDraw(AccelDriverT *drv, int32_t x1, int32_t y1, int32_t x2, int32_t y2, uint32_t color) {
S3PrivateT *priv = (S3PrivateT *)drv->privData;
int32_t dx = x2 - x1;
int32_t dy = y2 - y1;
// Determine octant and make dx/dy positive
uint16_t cmd = S3_CMD_LINE | S3_CMD_DRAW | S3_CMD_LAST_PIXEL;
if (dx >= 0) {
cmd |= S3_CMD_DIR_X_POS;
} else {
dx = -dx;
}
if (dy >= 0) {
cmd |= S3_CMD_DIR_Y_POS;
} else {
dy = -dy;
}
int32_t majAxis;
int32_t minAxis;
if (dx >= dy) {
// X is major axis
majAxis = dx;
minAxis = dy;
} else {
// Y is major axis
cmd |= S3_CMD_DIR_Y_MAJOR;
majAxis = dy;
minAxis = dx;
}
if (majAxis == 0) {
return;
}
// Bresenham parameters:
// axialStep = 2 * minAxis
// diagonalStep = 2 * (minAxis - majAxis)
// errorTerm = 2 * minAxis - majAxis
int32_t axialStep = 2 * minAxis;
int32_t diagStep = 2 * (minAxis - majAxis);
int32_t errTerm = 2 * minAxis - majAxis;
s3WaitFifo(priv, 7);
s3WriteReg16(priv, S3_FRGD_COLOR, color);
s3WriteReg16(priv, S3_FRGD_MIX, S3_MIX_SRC_FRGD | S3_MIX_ROP_SRC);
s3WriteReg16(priv, S3_MULTIFUNC_CTRL, S3_MF_PIX_CNTL | S3_PIXCNTL_MIX_FRGD);
s3WriteReg16(priv, S3_CUR_X, x1);
s3WriteReg16(priv, S3_CUR_Y, y1);
s3WriteReg16(priv, S3_DESTY_AXSTP, axialStep);
s3WriteReg16(priv, S3_DESTX_DIASTP, diagStep);
s3WaitFifo(priv, 3);
s3WriteReg16(priv, S3_ERR_TERM, errTerm);
s3WriteReg16(priv, S3_MAJ_AXIS_PCNT, majAxis);
s3WriteReg16(priv, S3_CMD, cmd);
}
// ============================================================
// s3MoveCursor
// ============================================================
//
// Moves the hardware cursor to the given screen position.
// The S3 cursor registers are in CRTC extended registers CR46-CR49.
static void s3MoveCursor(AccelDriverT *drv, int32_t x, int32_t y) {
(void)drv;
// Handle negative coordinates (cursor partially off-screen)
// by setting the cursor origin offset in the image
if (x < 0) {
x = 0;
}
if (y < 0) {
y = 0;
}
vgaCrtcWrite(S3_CR46_HW_CURSOR_XHI, (x >> 8) & 0x07);
vgaCrtcWrite(S3_CR47_HW_CURSOR_XLO, x & 0xFF);
vgaCrtcWrite(S3_CR48_HW_CURSOR_YHI, (y >> 8) & 0x07);
vgaCrtcWrite(S3_CR49_HW_CURSOR_YLO, y & 0xFF);
}
// ============================================================
// s3RectFill
// ============================================================
//
// Solid rectangle fill using the S3 hardware engine.
// Sets the foreground color, selects foreground-only mix mode
// with copy ROP, then issues a rectangle command.
static void s3RectFill(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, uint32_t color) {
S3PrivateT *priv = (S3PrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
s3WaitFifo(priv, 7);
// Set foreground color
s3WriteReg16(priv, S3_FRGD_COLOR, color);
// Foreground mix: source = foreground color, ROP = copy
s3WriteReg16(priv, S3_FRGD_MIX, S3_MIX_SRC_FRGD | S3_MIX_ROP_SRC);
// Write mask: all bits enabled
s3WriteReg16(priv, S3_WRT_MASK, 0xFFFF);
// Pixel control: always use foreground mix
s3WriteReg16(priv, S3_MULTIFUNC_CTRL, S3_MF_PIX_CNTL | S3_PIXCNTL_MIX_FRGD);
// Starting position
s3WriteReg16(priv, S3_CUR_X, x);
s3WriteReg16(priv, S3_CUR_Y, y);
// Dimensions (count is pixels - 1)
s3WriteReg16(priv, S3_MAJ_AXIS_PCNT, w - 1);
s3WaitFifo(priv, 2);
s3WriteReg16(priv, S3_MULTIFUNC_CTRL, S3_MF_MIN_AXIS_PCNT | (h - 1));
// Command: rectangle, draw, positive X and Y, packed mode
s3WriteReg16(priv, S3_CMD, S3_CMD_RECT | S3_CMD_DRAW | S3_CMD_ACROSS_PLANE
| S3_CMD_DIR_X_POS | S3_CMD_DIR_Y_POS);
}
// ============================================================
// s3RectFillPat
// ============================================================
//
// 8x8 mono pattern fill using CPU data mix mode. The pattern is
// 8 bytes (one per row, MSB-first), tiled across the rectangle.
// 1-bits use the foreground color, 0-bits use the background.
// Data is fed through PIX_TRANS as 16-bit words, repeating the
// 8-row pattern for the full height of the rectangle.
static void s3RectFillPat(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, const uint8_t *pattern, uint32_t fg, uint32_t bg) {
S3PrivateT *priv = (S3PrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
s3WaitFifo(priv, 8);
// Set colors
s3WriteReg16(priv, S3_FRGD_COLOR, fg);
s3WriteReg16(priv, S3_BKGD_COLOR, bg);
// Foreground mix: source = foreground color, ROP = copy
s3WriteReg16(priv, S3_FRGD_MIX, S3_MIX_SRC_FRGD | S3_MIX_ROP_SRC);
// Background mix: source = background color, ROP = copy
s3WriteReg16(priv, S3_BKGD_MIX, S3_MIX_SRC_BKGD | S3_MIX_ROP_SRC);
// Pixel control: CPU data selects fg/bg mix
s3WriteReg16(priv, S3_MULTIFUNC_CTRL, S3_MF_PIX_CNTL | S3_PIXCNTL_MIX_CPU);
// Destination and dimensions
s3WriteReg16(priv, S3_CUR_X, x);
s3WriteReg16(priv, S3_CUR_Y, y);
s3WriteReg16(priv, S3_MAJ_AXIS_PCNT, w - 1);
s3WaitFifo(priv, 2);
s3WriteReg16(priv, S3_MULTIFUNC_CTRL, S3_MF_MIN_AXIS_PCNT | (h - 1));
// Command: rectangle, draw, left-to-right top-to-bottom, CPU data
s3WriteReg16(priv, S3_CMD, S3_CMD_RECT | S3_CMD_DRAW | S3_CMD_ACROSS_PLANE
| S3_CMD_DIR_X_POS | S3_CMD_DIR_Y_POS
| S3_CMD_16BIT_IO);
// Feed tiled pattern data through PIX_TRANS.
// Each row of the pattern is 1 byte (8 pixels), tiled across the width.
int32_t wordsPerRow = (w + 15) / 16;
for (int32_t row = 0; row < h; row++) {
uint8_t patByte = pattern[row & 7];
s3WaitFifo(priv, 1);
for (int32_t word = 0; word < wordsPerRow; word++) {
s3WriteReg16(priv, S3_PIX_TRANS, (patByte << 8) | patByte);
}
}
}
// ============================================================
// s3SetClip
// ============================================================
//
// Programs the hardware scissor rectangle. All subsequent
// drawing operations are clipped to this region.
static void s3SetClip(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h) {
S3PrivateT *priv = (S3PrivateT *)drv->privData;
s3WaitFifo(priv, 4);
s3WriteReg16(priv, S3_MULTIFUNC_CTRL, S3_MF_SCISSORS_L | (x & 0x0FFF));
s3WriteReg16(priv, S3_MULTIFUNC_CTRL, S3_MF_SCISSORS_T | (y & 0x0FFF));
s3WriteReg16(priv, S3_MULTIFUNC_CTRL, S3_MF_SCISSORS_R | ((x + w - 1) & 0x0FFF));
s3WriteReg16(priv, S3_MULTIFUNC_CTRL, S3_MF_SCISSORS_B | ((y + h - 1) & 0x0FFF));
}
// ============================================================
// s3SetCursor
// ============================================================
//
// Uploads a cursor image to VRAM and configures the hardware
// cursor registers. The S3 hardware cursor is 64x64 pixels,
// stored as two bit planes (AND mask and XOR mask) at the
// cursor address in VRAM.
//
// S3 cursor VRAM format:
// 1024 bytes total = 512 bytes AND + 512 bytes XOR
// Each row: 8 bytes AND mask, 8 bytes XOR mask (interleaved
// by row on some chips, or plane-sequential on others).
// For Trio64: rows are interleaved (AND row, XOR row, ...).
static void s3SetCursor(AccelDriverT *drv, const HwCursorImageT *image) {
S3PrivateT *priv = (S3PrivateT *)drv->privData;
if (!image) {
s3ShowCursor(drv, false);
return;
}
// Wait for engine idle before writing to VRAM
s3WaitIdle(drv);
// Write cursor image to VRAM at cursorOffset
// Format: for each of 64 rows, write 8 bytes AND then 8 bytes XOR
uint8_t *cursorMem = drv->mode.framebuffer + priv->cursorOffset;
for (int32_t row = 0; row < S3_HW_CURSOR_SIZE; row++) {
for (int32_t byte = 0; byte < 8; byte++) {
int32_t srcIdx = row * 8 + byte;
uint8_t andByte;
uint8_t xorByte;
if (row < image->height && byte < (image->width + 7) / 8) {
andByte = image->andMask[srcIdx];
xorByte = image->xorMask[srcIdx];
} else {
// Outside the image: transparent (AND=0xFF, XOR=0x00)
andByte = 0xFF;
xorByte = 0x00;
}
// Interleaved format: AND row bytes, then XOR row bytes
cursorMem[row * 16 + byte] = andByte;
cursorMem[row * 16 + byte + 8] = xorByte;
}
}
// Set cursor address in VRAM (in units of 1KB)
uint16_t cursorAddr = priv->cursorOffset / 1024;
vgaCrtcWrite(S3_CR4C_HW_CURSOR_ADDR_HI, (cursorAddr >> 8) & 0x0F);
vgaCrtcWrite(S3_CR4D_HW_CURSOR_ADDR_LO, cursorAddr & 0xFF);
}
// ============================================================
// s3ShowCursor
// ============================================================
//
// Enables or disables the hardware cursor via CR45.
static void s3ShowCursor(AccelDriverT *drv, bool visible) {
(void)drv;
uint8_t cr45 = vgaCrtcRead(S3_CR45_HW_CURSOR_MODE);
if (visible) {
cr45 |= 0x01; // enable hardware cursor
} else {
cr45 &= ~0x01; // disable hardware cursor
}
vgaCrtcWrite(S3_CR45_HW_CURSOR_MODE, cr45);
}
// ============================================================
// s3Shutdown
// ============================================================
//
// Restores text mode and cleans up. The VESA/VGA BIOS text mode
// restore handles resetting all the S3-specific registers.
static void s3Shutdown(AccelDriverT *drv) {
S3PrivateT *priv = (S3PrivateT *)drv->privData;
s3ShowCursor(drv, false);
dpmiUnmapFramebuffer(&priv->mmioMapping);
dpmiUnmapFramebuffer(&priv->lfbMapping);
vgaRestoreTextMode();
}
// ============================================================
// s3UnlockRegs
// ============================================================
//
// Unlocks S3 extended CRTC registers. Three levels:
// CR38 = 0x48 : unlock S3 VGA registers (CR30-CR3F)
// CR39 = 0xA5 : unlock S3 system registers (CR40-CR5F)
// Also unlock standard CRTC protection for timing regs.
static void s3UnlockRegs(void) {
vgaCrtcWrite(S3_CR38_LOCK_1, 0x48);
vgaCrtcWrite(S3_CR39_LOCK_2, 0xA5);
vgaCrtcUnlock();
}
// ============================================================
// s3WaitFifo
// ============================================================
//
// Waits until the S3 command FIFO has at least 'slots' free
// entries. The FIFO depth is 8 on Trio64. Reading GP_STAT
// returns a bitmask where bits 7:0 indicate how many slots
// are free (each bit = one more slot free, from MSB to LSB).
static void s3WaitFifo(S3PrivateT *priv, int32_t slots) {
// Build the required mask: if we need N slots free, we need
// bit (8 - N) to be set in GP_STAT bits 7:0.
// Bits: 0x80=1free, 0x40=2free, ..., 0x01=8free
uint16_t mask = 0x0100 >> slots;
for (int32_t i = 0; i < S3_MAX_IDLE_WAIT; i++) {
if (s3ReadReg16(priv, S3_GP_STAT) & mask) {
return;
}
}
}
// ============================================================
// s3WaitIdle
// ============================================================
//
// Waits until the S3 graphics engine is completely idle.
// The engine is idle when the BUSY bit (bit 9) of GP_STAT is clear
// AND the FIFO is empty (bit 10 is set).
static void s3WaitIdle(AccelDriverT *drv) {
S3PrivateT *priv = (S3PrivateT *)drv->privData;
for (int32_t i = 0; i < S3_MAX_IDLE_WAIT; i++) {
uint16_t stat = s3ReadReg16(priv, S3_GP_STAT);
if (!(stat & S3_GP_STAT_BUSY)) {
return;
}
}
}