DOS_Video/atiMach64.c
2026-04-13 19:40:45 -05:00

960 lines
33 KiB
C

// atiMach64.c -- ATI Mach64 / Rage accelerated video driver
//
// Supports the ATI Mach64 family: GX, CX, CT, ET, VT, GT (Rage II),
// and Rage Pro. These were among the most capable 2D accelerators
// of the mid-1990s, with features including:
// - Solid and pattern rectangle fill
// - Screen-to-screen BitBLT
// - Host-to-screen blit (CPU data transfer)
// - Monochrome color expansion
// - Bresenham line draw
// - Trapezoid fill
// - Hardware scissor rectangle
// - 64x64 two-color hardware cursor
//
// Register access:
// The Mach64 has two register access methods:
// 1. I/O port: registers at block I/O base + offset. The base
// is typically 0x02EC for Mach64, determined by CONFIG_CHIP_ID.
// 2. MMIO: register block at end of LFB (BAR0 + aperture_size - 1KB)
// or via a dedicated BAR.
//
// We use MMIO for speed. The register block is 1KB at the end
// of the aperture (LFB base + size - 0x400 on most variants,
// or LFB base + size - 0x800 for 8MB apertures).
//
// Some early Mach64 chips (GX/CX) may not support MMIO well;
// for those we fall back to I/O port access.
#include "accelVid.h"
#include "vgaCommon.h"
#include "pci.h"
#include <pc.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// ============================================================
// ATI vendor/device IDs
// ============================================================
#define ATI_VENDOR_ID 0x1002
#define ATI_MACH64_GX 0x4758 // Mach64 GX
#define ATI_MACH64_CX 0x4358 // Mach64 CX
#define ATI_MACH64_CT 0x4354 // Mach64 CT
#define ATI_MACH64_ET 0x4554 // Mach64 ET
#define ATI_MACH64_VT 0x5654 // Mach64 VT
#define ATI_MACH64_VT_B 0x5655 // Mach64 VT-B
#define ATI_MACH64_GT 0x4754 // Mach64 GT (3D Rage II)
#define ATI_MACH64_GT_B 0x4755 // Mach64 GT-B (3D Rage II+)
#define ATI_RAGE_PRO 0x4750 // Rage Pro
#define ATI_RAGE_PRO_AGP 0x4752 // Rage Pro AGP
#define ATI_RAGE_XL_PCI 0x4752 // Rage XL PCI (shares ID with Pro AGP)
#define ATI_RAGE_128_RE 0x5245 // Rage 128 RE
#define ATI_RAGE_128_RF 0x5246 // Rage 128 RF
#define ATI_RAGE_128_RK 0x524B // Rage 128 RK
#define ATI_RAGE_128_RL 0x524C // Rage 128 RL
#define ATI_RAGE_128_PRO_PF 0x5046 // Rage 128 Pro PF
#define ATI_RAGE_128_PRO_PR 0x5052 // Rage 128 Pro PR
#define ATI_RAGE_FURY 0x5046 // Rage Fury (same as 128 Pro PF)
static const uint16_t sAtiDeviceIds[] = {
ATI_VENDOR_ID, ATI_MACH64_GX,
ATI_VENDOR_ID, ATI_MACH64_CX,
ATI_VENDOR_ID, ATI_MACH64_CT,
ATI_VENDOR_ID, ATI_MACH64_ET,
ATI_VENDOR_ID, ATI_MACH64_VT,
ATI_VENDOR_ID, ATI_MACH64_VT_B,
ATI_VENDOR_ID, ATI_MACH64_GT,
ATI_VENDOR_ID, ATI_MACH64_GT_B,
ATI_VENDOR_ID, ATI_RAGE_PRO,
ATI_VENDOR_ID, ATI_RAGE_PRO_AGP,
ATI_VENDOR_ID, ATI_RAGE_128_RE,
ATI_VENDOR_ID, ATI_RAGE_128_RF,
ATI_VENDOR_ID, ATI_RAGE_128_RK,
ATI_VENDOR_ID, ATI_RAGE_128_RL,
ATI_VENDOR_ID, ATI_RAGE_128_PRO_PF,
ATI_VENDOR_ID, ATI_RAGE_128_PRO_PR,
0, 0
};
// ============================================================
// Mach64 register offsets (from MMIO base)
// ============================================================
//
// The Mach64 has a flat register space. For I/O access, these
// offsets are added to the I/O base port. For MMIO, they're
// byte offsets from the MMIO base address.
// Drawing engine source registers
#define ATI_SRC_OFF_PITCH 0x0000 // source offset and pitch
#define ATI_SRC_Y 0x0004 // source Y
#define ATI_SRC_X 0x0008 // source X (alias: SRC_HEIGHT1)
#define ATI_SRC_Y_X 0x000C // source Y and X combined
#define ATI_SRC_WIDTH1 0x0010
#define ATI_SRC_HEIGHT1 0x0014
// Drawing engine destination registers
#define ATI_DST_OFF_PITCH 0x0040 // destination offset and pitch
#define ATI_DST_Y 0x0044
#define ATI_DST_X 0x0048
#define ATI_DST_Y_X 0x004C
#define ATI_DST_HEIGHT 0x0050
#define ATI_DST_WIDTH 0x0054
#define ATI_DST_HEIGHT_WIDTH 0x0058 // triggers blit
#define ATI_DST_X_WIDTH 0x005C
#define ATI_DST_BRES_ERR 0x0064
#define ATI_DST_BRES_INC 0x0068
#define ATI_DST_BRES_DEC 0x006C
#define ATI_DST_BRES_LNTH 0x0070
#define ATI_DST_BRES_LNTH_END 0x0074 // triggers line draw
// Host data (CPU-to-screen)
#define ATI_HOST_DATA0 0x0200
// Scissor registers
#define ATI_SC_LEFT 0x00A0
#define ATI_SC_RIGHT 0x00A4
#define ATI_SC_TOP 0x00A8
#define ATI_SC_BOTTOM 0x00AC
// Drawing processor registers
#define ATI_DP_BKGD_CLR 0x00B0
#define ATI_DP_FRGD_CLR 0x00B4
#define ATI_DP_WRITE_MASK 0x00B8
#define ATI_DP_CHAIN_MASK 0x00BC
#define ATI_DP_PIX_WIDTH 0x00D0
#define ATI_DP_MIX 0x00D4
#define ATI_DP_SRC 0x00D8
// Clock/config
#define ATI_CLR_CMP_CNTL 0x0100
#define ATI_GUI_TRAJ_CNTL 0x00CC
#define ATI_GUI_STAT 0x00CE // I/O only; for MMIO see below
// FIFO and status (MMIO addresses)
#define ATI_FIFO_STAT 0x0310
#define ATI_GUI_STAT_MMIO 0x0338
// Hardware cursor
#define ATI_CUR_CLR0 0x0260
#define ATI_CUR_CLR1 0x0264
#define ATI_CUR_OFFSET 0x0268
#define ATI_CUR_HORZ_VERT_POSN 0x026C
#define ATI_CUR_HORZ_VERT_OFF 0x0270
#define ATI_GEN_TEST_CNTL 0x0034 // general test/cursor control
// Memory config
#define ATI_MEM_CNTL 0x0140
// I/O and MMIO constants
#define ATI_IO_BASE_DEFAULT 0x02EC // default block I/O base port
#define ATI_MMIO_SIZE 0x0400 // MMIO block size (1KB at end of aperture)
#define ATI_CONFIG_CHIP_ID 0x00E0
// ============================================================
// Mach64 DP_MIX values
// ============================================================
//
// The drawing processor MIX register controls the raster operation
// for foreground (bits 20:16) and background (bits 4:0).
#define ATI_MIX_NOT_DST 0x00
#define ATI_MIX_ZERO 0x01
#define ATI_MIX_ONE 0x02
#define ATI_MIX_DST 0x03
#define ATI_MIX_NOT_SRC 0x04
#define ATI_MIX_XOR 0x05
#define ATI_MIX_XNOR 0x06
#define ATI_MIX_COPY 0x07 // dest = source (most common)
#define ATI_MIX_NOT_SRC_AND 0x08
#define ATI_MIX_SRC_AND_DST 0x0C
#define ATI_MIX_SRC_OR_DST 0x0E
// Foreground mix is in bits 20:16, background in bits 4:0
#define ATI_FRGD_MIX(rop) ((uint32_t)(rop) << 16)
#define ATI_BKGD_MIX(rop) ((uint32_t)(rop))
// ============================================================
// Mach64 DP_SRC values
// ============================================================
#define ATI_SRC_BKGD_CLR 0x00 // background color register
#define ATI_SRC_FRGD_CLR 0x01 // foreground color register
#define ATI_SRC_HOST 0x02 // CPU host data
#define ATI_SRC_BLIT 0x03 // video memory (blit)
#define ATI_SRC_PATTERN 0x04 // pattern register
// DP_SRC packs three source selects: mono src (bits 10:8),
// foreground src (bits 18:16 on some, or bits 10:8), background src
// In practice, the format is:
// bits 2:0 = background source
// bits 10:8 = foreground source
// bits 18:16 = mono source (for color expand)
#define ATI_DP_SRC_BKGD(s) ((uint32_t)(s))
#define ATI_DP_SRC_FRGD(s) ((uint32_t)(s) << 8)
#define ATI_DP_SRC_MONO(s) ((uint32_t)(s) << 16)
// ============================================================
// Mach64 DP_PIX_WIDTH values
// ============================================================
#define ATI_PIX_8BPP 0x02
#define ATI_PIX_15BPP 0x03
#define ATI_PIX_16BPP 0x04
#define ATI_PIX_32BPP 0x06
// HOST byte/word/dword order -- use native (little-endian)
#define ATI_HOST_BYTE_ORDER 0x00
// GUI_TRAJ_CNTL direction bits
#define ATI_DST_X_DIR_LEFT 0x00
#define ATI_DST_X_DIR_RIGHT 0x01
#define ATI_DST_Y_DIR_UP 0x00
#define ATI_DST_Y_DIR_DOWN 0x02
// GUI_STAT busy bit
#define ATI_GUI_STAT_BUSY 0x00000001
#define ATI_FIFO_STAT_MASK 0x0000FFFF
// Hardware cursor size
#define ATI_HW_CURSOR_SIZE 64
#define ATI_HW_CURSOR_BYTES 1024 // 64*64*2bpp/8
// Maximum wait iterations
#define ATI_MAX_IDLE_WAIT 1000000
// ============================================================
// Private driver state
// ============================================================
typedef struct {
uint32_t lfbPhysAddr;
uint32_t vramSize;
uint32_t cursorOffset;
int32_t bytesPerPixel;
int32_t screenPitch;
volatile uint32_t *mmio; // mapped MMIO register base
uint32_t mmioPhysAddr;
bool useIo; // fall back to I/O on old GX/CX
uint16_t ioBase; // I/O base port for register access
DpmiMappingT lfbMapping;
} AtiPrivateT;
// ============================================================
// Prototypes
// ============================================================
static void atiBitBlt(AccelDriverT *drv, int32_t srcX, int32_t srcY, int32_t dstX, int32_t dstY, int32_t w, int32_t h);
static void atiColorExpand(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h, uint32_t fg, uint32_t bg);
static bool atiDetect(AccelDriverT *drv);
static void atiHostBlit(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h);
static bool atiInit(AccelDriverT *drv, const AccelModeRequestT *req);
static void atiLineDraw(AccelDriverT *drv, int32_t x1, int32_t y1, int32_t x2, int32_t y2, uint32_t color);
static void atiMoveCursor(AccelDriverT *drv, int32_t x, int32_t y);
static void atiRectFill(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, uint32_t color);
static void atiRectFillPat(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, const uint8_t *pattern, uint32_t fg, uint32_t bg);
static void atiSetClip(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h);
static void atiSetCursor(AccelDriverT *drv, const HwCursorImageT *image);
static void atiShowCursor(AccelDriverT *drv, bool visible);
static void atiShutdown(AccelDriverT *drv);
static void atiWaitFifo(AtiPrivateT *priv, int32_t entries);
static void atiWaitIdle(AccelDriverT *drv);
static void atiWriteReg(AtiPrivateT *priv, uint32_t reg, uint32_t val);
static uint32_t atiReadReg(AtiPrivateT *priv, uint32_t reg);
// ============================================================
// Driver instance
// ============================================================
static AtiPrivateT sAtiPrivate;
static AccelDriverT sAtiDriver = {
.name = "ATI Mach64",
.chipFamily = "ati",
.caps = 0,
.privData = &sAtiPrivate,
.detect = atiDetect,
.init = atiInit,
.shutdown = atiShutdown,
.waitIdle = atiWaitIdle,
.setClip = atiSetClip,
.rectFill = atiRectFill,
.rectFillPat = atiRectFillPat,
.bitBlt = atiBitBlt,
.hostBlit = atiHostBlit,
.colorExpand = atiColorExpand,
.lineDraw = atiLineDraw,
.setCursor = atiSetCursor,
.moveCursor = atiMoveCursor,
.showCursor = atiShowCursor,
};
// ============================================================
// atiRegisterDriver
// ============================================================
void atiRegisterDriver(void) {
accelRegisterDriver(&sAtiDriver);
}
// ============================================================
// atiReadReg / atiWriteReg
// ============================================================
//
// Register access abstraction. Uses MMIO when available, falls
// back to I/O port access on older chips.
static uint32_t atiReadReg(AtiPrivateT *priv, uint32_t reg) {
if (priv->useIo) {
return inportl(priv->ioBase + reg);
}
return priv->mmio[reg / 4];
}
static void atiWriteReg(AtiPrivateT *priv, uint32_t reg, uint32_t val) {
if (priv->useIo) {
outportl(priv->ioBase + reg, val);
return;
}
priv->mmio[reg / 4] = val;
}
// ============================================================
// atiBitBlt
// ============================================================
//
// Screen-to-screen BitBLT. The Mach64 engine handles overlapping
// regions automatically based on the trajectory control register.
static void atiBitBlt(AccelDriverT *drv, int32_t srcX, int32_t srcY, int32_t dstX, int32_t dstY, int32_t w, int32_t h) {
AtiPrivateT *priv = (AtiPrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
// Determine blit direction
uint32_t direction = ATI_DST_X_DIR_RIGHT | ATI_DST_Y_DIR_DOWN;
int32_t sx = srcX;
int32_t sy = srcY;
int32_t dx = dstX;
int32_t dy = dstY;
if (srcX < dstX) {
direction &= ~ATI_DST_X_DIR_RIGHT;
sx += w - 1;
dx += w - 1;
}
if (srcY < dstY) {
direction &= ~ATI_DST_Y_DIR_DOWN;
sy += h - 1;
dy += h - 1;
}
atiWaitFifo(priv, 7);
atiWriteReg(priv, ATI_GUI_TRAJ_CNTL, direction);
atiWriteReg(priv, ATI_DP_MIX, ATI_FRGD_MIX(ATI_MIX_COPY) | ATI_BKGD_MIX(ATI_MIX_COPY));
atiWriteReg(priv, ATI_DP_SRC, ATI_DP_SRC_FRGD(ATI_SRC_BLIT));
atiWriteReg(priv, ATI_SRC_Y_X, ((uint32_t)sx << 16) | (uint32_t)sy);
atiWriteReg(priv, ATI_SRC_WIDTH1, w);
atiWriteReg(priv, ATI_DST_Y_X, ((uint32_t)dx << 16) | (uint32_t)dy);
atiWriteReg(priv, ATI_DST_HEIGHT_WIDTH, ((uint32_t)w << 16) | (uint32_t)h);
}
// ============================================================
// atiColorExpand
// ============================================================
//
// Monochrome-to-color expansion via the host data path.
// Converts 1bpp source bitmap to full-color pixels using the
// Mach64 engine. Source data is packed MSB-first, padded to
// dword boundaries per scanline.
static void atiColorExpand(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h, uint32_t fg, uint32_t bg) {
AtiPrivateT *priv = (AtiPrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
// Number of dwords per scanline of monochrome data
int32_t dwordsPerRow = (w + 31) / 32;
// Set up color expand: mono source from host, fg/bg from color regs
atiWaitFifo(priv, 7);
atiWriteReg(priv, ATI_DP_FRGD_CLR, fg);
atiWriteReg(priv, ATI_DP_BKGD_CLR, bg);
atiWriteReg(priv, ATI_DP_SRC, ATI_DP_SRC_MONO(ATI_SRC_HOST) | ATI_DP_SRC_FRGD(ATI_SRC_FRGD_CLR) | ATI_DP_SRC_BKGD(ATI_SRC_BKGD_CLR));
atiWriteReg(priv, ATI_DP_MIX, ATI_FRGD_MIX(ATI_MIX_COPY) | ATI_BKGD_MIX(ATI_MIX_COPY));
atiWriteReg(priv, ATI_GUI_TRAJ_CNTL, ATI_DST_X_DIR_RIGHT | ATI_DST_Y_DIR_DOWN);
atiWriteReg(priv, ATI_DST_Y_X, ((uint32_t)dstX << 16) | (uint32_t)dstY);
atiWriteReg(priv, ATI_DST_HEIGHT_WIDTH, ((uint32_t)w << 16) | (uint32_t)h);
// Feed monochrome data row by row through HOST_DATA0
for (int32_t row = 0; row < h; row++) {
const uint8_t *rowPtr = srcBuf + row * srcPitch;
for (int32_t dw = 0; dw < dwordsPerRow; dw++) {
// Pack bytes into a dword (MSB-first bit order)
int32_t byteOff = dw * 4;
uint32_t data = 0;
for (int32_t b = 0; b < 4; b++) {
uint8_t srcByte = 0;
if (byteOff + b < srcPitch) {
srcByte = rowPtr[byteOff + b];
}
data |= (uint32_t)srcByte << (24 - b * 8);
}
atiWaitFifo(priv, 1);
atiWriteReg(priv, ATI_HOST_DATA0, data);
}
}
}
// ============================================================
// atiDetect
// ============================================================
static bool atiDetect(AccelDriverT *drv) {
int32_t matchIdx;
if (!pciFindDeviceList(sAtiDeviceIds, &drv->pciDev, &matchIdx)) {
return false;
}
switch (drv->pciDev.deviceId) {
case ATI_MACH64_GX:
drv->name = "ATI Mach64 GX";
break;
case ATI_MACH64_CX:
drv->name = "ATI Mach64 CX";
break;
case ATI_MACH64_CT:
drv->name = "ATI Mach64 CT";
break;
case ATI_MACH64_ET:
drv->name = "ATI Mach64 ET";
break;
case ATI_MACH64_VT:
case ATI_MACH64_VT_B:
drv->name = "ATI Mach64 VT";
break;
case ATI_MACH64_GT:
case ATI_MACH64_GT_B:
drv->name = "ATI 3D Rage II";
break;
case ATI_RAGE_PRO:
case ATI_RAGE_PRO_AGP:
drv->name = "ATI Rage Pro";
break;
case ATI_RAGE_128_RE:
case ATI_RAGE_128_RF:
case ATI_RAGE_128_RK:
case ATI_RAGE_128_RL:
drv->name = "ATI Rage 128";
break;
case ATI_RAGE_128_PRO_PF:
case ATI_RAGE_128_PRO_PR:
drv->name = "ATI Rage 128 Pro";
break;
default:
drv->name = "ATI Mach64";
break;
}
return true;
}
// ============================================================
// atiHostBlit
// ============================================================
//
// CPU-to-screen blit. Transfers pixel data from system memory
// to VRAM through the Mach64 host data registers.
static void atiHostBlit(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h) {
AtiPrivateT *priv = (AtiPrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
int32_t bytesPerPixel = priv->bytesPerPixel;
int32_t rowBytes = w * bytesPerPixel;
int32_t dwordsPerRow = (rowBytes + 3) / 4;
// Set up host-to-screen blit
atiWaitFifo(priv, 5);
atiWriteReg(priv, ATI_DP_SRC, ATI_DP_SRC_FRGD(ATI_SRC_HOST));
atiWriteReg(priv, ATI_DP_MIX, ATI_FRGD_MIX(ATI_MIX_COPY) | ATI_BKGD_MIX(ATI_MIX_COPY));
atiWriteReg(priv, ATI_GUI_TRAJ_CNTL, ATI_DST_X_DIR_RIGHT | ATI_DST_Y_DIR_DOWN);
atiWriteReg(priv, ATI_DST_Y_X, ((uint32_t)dstX << 16) | (uint32_t)dstY);
atiWriteReg(priv, ATI_DST_HEIGHT_WIDTH, ((uint32_t)w << 16) | (uint32_t)h);
// Write pixel data row by row through HOST_DATA0
for (int32_t row = 0; row < h; row++) {
const uint8_t *rowPtr = srcBuf + row * srcPitch;
for (int32_t dw = 0; dw < dwordsPerRow; dw++) {
int32_t byteOff = dw * 4;
uint32_t data = 0;
// Pack bytes into a dword (little-endian native order)
for (int32_t b = 0; b < 4; b++) {
if (byteOff + b < rowBytes) {
data |= (uint32_t)rowPtr[byteOff + b] << (b * 8);
}
}
atiWaitFifo(priv, 1);
atiWriteReg(priv, ATI_HOST_DATA0, data);
}
}
}
// ============================================================
// atiInit
// ============================================================
static bool atiInit(AccelDriverT *drv, const AccelModeRequestT *req) {
AtiPrivateT *priv = (AtiPrivateT *)drv->privData;
memset(priv, 0, sizeof(*priv));
// Determine if this is an old GX/CX (I/O only) or newer (MMIO)
priv->useIo = (drv->pciDev.deviceId == ATI_MACH64_GX
|| drv->pciDev.deviceId == ATI_MACH64_CX);
priv->ioBase = ATI_IO_BASE_DEFAULT;
// Get LFB address and size from PCI BAR0
uint32_t bar0 = pciRead32(drv->pciDev.bus, drv->pciDev.dev,
drv->pciDev.func, PCI_BAR0);
priv->lfbPhysAddr = bar0 & 0xFFFFFFF0;
uint32_t barSize = pciSizeBar(drv->pciDev.bus, drv->pciDev.dev, drv->pciDev.func, PCI_BAR0);
// Aperture size != VRAM size on Mach64 (aperture is typically 8MB)
// Read actual VRAM from MEM_CNTL register
uint32_t memCntl;
if (priv->useIo) {
memCntl = inportl(priv->ioBase + ATI_MEM_CNTL);
} else {
// Need a temporary MMIO mapping to read MEM_CNTL
// MMIO is at the end of the aperture
priv->mmioPhysAddr = priv->lfbPhysAddr + barSize - ATI_MMIO_SIZE;
memCntl = 0; // will determine from aperture size
}
// Determine VRAM size
if (memCntl != 0) {
uint32_t memSize = memCntl & 0x07;
switch (memSize) {
case 0: priv->vramSize = 512 * 1024; break;
case 1: priv->vramSize = 1024 * 1024; break;
case 2: priv->vramSize = 2 * 1024 * 1024; break;
case 3: priv->vramSize = 4 * 1024 * 1024; break;
case 4: priv->vramSize = 6 * 1024 * 1024; break;
case 5: priv->vramSize = 8 * 1024 * 1024; break;
default: priv->vramSize = 2 * 1024 * 1024; break;
}
} else {
// Conservative fallback
priv->vramSize = (barSize > 8 * 1024 * 1024) ? 4 * 1024 * 1024 : barSize;
}
// Set VESA mode
VesaModeResultT vesa;
if (!vesaFindAndSetMode(req->width, req->height, req->bpp, &vesa)) {
return false;
}
// Map LFB + MMIO region (map entire aperture; MMIO is at end)
if (!dpmiMapFramebuffer(priv->lfbPhysAddr, barSize, &priv->lfbMapping)) {
vgaRestoreTextMode();
return false;
}
priv->bytesPerPixel = (vesa.bpp + 7) / 8;
priv->screenPitch = vesa.pitch;
drv->mode.width = vesa.width;
drv->mode.height = vesa.height;
drv->mode.bpp = vesa.bpp;
drv->mode.pitch = vesa.pitch;
drv->mode.framebuffer = priv->lfbMapping.ptr;
drv->mode.vramSize = priv->vramSize;
drv->mode.offscreenBase = vesa.pitch * vesa.height;
// Set up MMIO pointer at end of aperture
if (!priv->useIo) {
priv->mmio = (volatile uint32_t *)(priv->lfbMapping.ptr + barSize - ATI_MMIO_SIZE);
}
// Configure the drawing engine pixel width
uint32_t pixWidth;
switch (vesa.bpp) {
case 8: pixWidth = ATI_PIX_8BPP; break;
case 15: pixWidth = ATI_PIX_15BPP; break;
case 16: pixWidth = ATI_PIX_16BPP; break;
case 32: pixWidth = ATI_PIX_32BPP; break;
default: pixWidth = ATI_PIX_16BPP; break;
}
// DP_PIX_WIDTH: set all fields to the same depth
uint32_t dpPixWidth = pixWidth
| (pixWidth << 4) // host data
| (pixWidth << 8) // source
| (pixWidth << 16) // destination
| (pixWidth << 28); // default
atiWaitFifo(priv, 2);
atiWriteReg(priv, ATI_DP_PIX_WIDTH, dpPixWidth);
atiWriteReg(priv, ATI_DP_WRITE_MASK, 0xFFFFFFFF);
// Set DST_OFF_PITCH: offset = 0, pitch in units of 8 pixels
uint32_t pitch8 = vesa.pitch / priv->bytesPerPixel / 8;
atiWriteReg(priv, ATI_DST_OFF_PITCH, pitch8 << 22);
atiWriteReg(priv, ATI_SRC_OFF_PITCH, pitch8 << 22);
// Set up cursor at end of VRAM
priv->cursorOffset = priv->vramSize - ATI_HW_CURSOR_BYTES;
priv->cursorOffset &= ~(ATI_HW_CURSOR_BYTES - 1);
drv->caps = ACAP_RECT_FILL
| ACAP_RECT_FILL_PAT
| ACAP_BITBLT
| ACAP_HOST_BLIT
| ACAP_COLOR_EXPAND
| ACAP_LINE_DRAW
| ACAP_HW_CURSOR
| ACAP_CLIP;
// Full screen clip
atiSetClip(drv, 0, 0, vesa.width, vesa.height);
atiWaitIdle(drv);
return true;
}
// ============================================================
// atiLineDraw
// ============================================================
//
// Bresenham line draw using the Mach64 DST_BRES registers.
static void atiLineDraw(AccelDriverT *drv, int32_t x1, int32_t y1, int32_t x2, int32_t y2, uint32_t color) {
AtiPrivateT *priv = (AtiPrivateT *)drv->privData;
int32_t dx = x2 - x1;
int32_t dy = y2 - y1;
uint32_t direction = ATI_DST_X_DIR_RIGHT | ATI_DST_Y_DIR_DOWN;
if (dx < 0) {
dx = -dx;
direction &= ~ATI_DST_X_DIR_RIGHT;
}
if (dy < 0) {
dy = -dy;
direction &= ~ATI_DST_Y_DIR_DOWN;
}
int32_t majAxis;
int32_t minAxis;
if (dx >= dy) {
majAxis = dx;
minAxis = dy;
} else {
majAxis = dy;
minAxis = dx;
// Swap X/Y major
direction |= 0x04; // Y major axis select
}
if (majAxis == 0) {
return;
}
int32_t errTerm = 2 * minAxis - majAxis;
int32_t errInc = 2 * minAxis;
int32_t errDec = 2 * (minAxis - majAxis);
atiWaitFifo(priv, 8);
atiWriteReg(priv, ATI_GUI_TRAJ_CNTL, direction);
atiWriteReg(priv, ATI_DP_MIX, ATI_FRGD_MIX(ATI_MIX_COPY));
atiWriteReg(priv, ATI_DP_SRC, ATI_DP_SRC_FRGD(ATI_SRC_FRGD_CLR));
atiWriteReg(priv, ATI_DP_FRGD_CLR, color);
atiWriteReg(priv, ATI_DST_Y_X, ((uint32_t)x1 << 16) | (uint32_t)y1);
atiWriteReg(priv, ATI_DST_BRES_ERR, errTerm);
atiWriteReg(priv, ATI_DST_BRES_INC, errInc);
atiWriteReg(priv, ATI_DST_BRES_DEC, errDec);
atiWaitFifo(priv, 1);
atiWriteReg(priv, ATI_DST_BRES_LNTH, majAxis + 1);
}
// ============================================================
// atiMoveCursor
// ============================================================
static void atiMoveCursor(AccelDriverT *drv, int32_t x, int32_t y) {
AtiPrivateT *priv = (AtiPrivateT *)drv->privData;
uint32_t offset = 0;
if (x < 0) {
offset |= ((-x) & 0x3F) << 16;
x = 0;
}
if (y < 0) {
offset |= (-y) & 0x3F;
y = 0;
}
atiWriteReg(priv, ATI_CUR_HORZ_VERT_OFF, offset);
atiWriteReg(priv, ATI_CUR_HORZ_VERT_POSN,
((uint32_t)x << 16) | (uint32_t)y);
}
// ============================================================
// atiRectFill
// ============================================================
static void atiRectFill(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, uint32_t color) {
AtiPrivateT *priv = (AtiPrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
atiWaitFifo(priv, 5);
atiWriteReg(priv, ATI_GUI_TRAJ_CNTL, ATI_DST_X_DIR_RIGHT | ATI_DST_Y_DIR_DOWN);
atiWriteReg(priv, ATI_DP_MIX, ATI_FRGD_MIX(ATI_MIX_COPY));
atiWriteReg(priv, ATI_DP_SRC, ATI_DP_SRC_FRGD(ATI_SRC_FRGD_CLR));
atiWriteReg(priv, ATI_DP_FRGD_CLR, color);
atiWriteReg(priv, ATI_DST_Y_X, ((uint32_t)x << 16) | (uint32_t)y);
atiWaitFifo(priv, 1);
atiWriteReg(priv, ATI_DST_HEIGHT_WIDTH, ((uint32_t)w << 16) | (uint32_t)h);
}
// ============================================================
// atiRectFillPat
// ============================================================
//
// 8x8 mono pattern fill using the host data path. The pattern is
// 8 bytes (one per row, MSB-first), tiled across the rectangle.
// 1-bits use the foreground color, 0-bits use the background.
// Data is fed through HOST_DATA0, repeating the 8-row pattern
// for the full height, with each row padded to a dword boundary.
static void atiRectFillPat(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, const uint8_t *pattern, uint32_t fg, uint32_t bg) {
AtiPrivateT *priv = (AtiPrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
// Number of dwords per scanline of monochrome data
int32_t dwordsPerRow = (w + 31) / 32;
// Set up color expand: mono source from host, fg/bg from color regs
atiWaitFifo(priv, 7);
atiWriteReg(priv, ATI_DP_FRGD_CLR, fg);
atiWriteReg(priv, ATI_DP_BKGD_CLR, bg);
atiWriteReg(priv, ATI_DP_SRC, ATI_DP_SRC_MONO(ATI_SRC_HOST) | ATI_DP_SRC_FRGD(ATI_SRC_FRGD_CLR) | ATI_DP_SRC_BKGD(ATI_SRC_BKGD_CLR));
atiWriteReg(priv, ATI_DP_MIX, ATI_FRGD_MIX(ATI_MIX_COPY) | ATI_BKGD_MIX(ATI_MIX_COPY));
atiWriteReg(priv, ATI_GUI_TRAJ_CNTL, ATI_DST_X_DIR_RIGHT | ATI_DST_Y_DIR_DOWN);
atiWriteReg(priv, ATI_DST_Y_X, ((uint32_t)x << 16) | (uint32_t)y);
atiWriteReg(priv, ATI_DST_HEIGHT_WIDTH, ((uint32_t)w << 16) | (uint32_t)h);
// Feed tiled pattern data through HOST_DATA0
for (int32_t row = 0; row < h; row++) {
uint8_t patByte = pattern[row & 7];
for (int32_t dw = 0; dw < dwordsPerRow; dw++) {
// Replicate the pattern byte across all 4 bytes of the dword.
// MSB-first bit order: place the pattern byte in the high byte.
uint32_t data = ((uint32_t)patByte << 24)
| ((uint32_t)patByte << 16)
| ((uint32_t)patByte << 8)
| (uint32_t)patByte;
atiWaitFifo(priv, 1);
atiWriteReg(priv, ATI_HOST_DATA0, data);
}
}
}
// ============================================================
// atiSetClip
// ============================================================
static void atiSetClip(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h) {
AtiPrivateT *priv = (AtiPrivateT *)drv->privData;
atiWaitFifo(priv, 4);
atiWriteReg(priv, ATI_SC_LEFT, x);
atiWriteReg(priv, ATI_SC_TOP, y);
atiWriteReg(priv, ATI_SC_RIGHT, x + w - 1);
atiWriteReg(priv, ATI_SC_BOTTOM, y + h - 1);
}
// ============================================================
// atiSetCursor
// ============================================================
static void atiSetCursor(AccelDriverT *drv, const HwCursorImageT *image) {
AtiPrivateT *priv = (AtiPrivateT *)drv->privData;
if (!image) {
atiShowCursor(drv, false);
return;
}
atiWaitIdle(drv);
// Write cursor image to VRAM
// Mach64 cursor format: 64x64, 2bpp, rows of 16 bytes
// Bit encoding: 00=cursor color 0, 01=cursor color 1,
// 10=transparent, 11=inverted
uint8_t *cursorMem = drv->mode.framebuffer + priv->cursorOffset;
for (int32_t row = 0; row < ATI_HW_CURSOR_SIZE; row++) {
for (int32_t byte = 0; byte < 16; byte++) {
uint8_t val = 0xAA; // all transparent (10 pattern)
if (row < image->height && byte < (image->width + 3) / 4) {
// Convert AND/XOR to Mach64 2bpp encoding
int32_t bitOff = byte * 4;
uint8_t andBits = 0;
uint8_t xorBits = 0;
if (bitOff / 8 < (image->width + 7) / 8) {
andBits = image->andMask[row * 8 + bitOff / 8];
xorBits = image->xorMask[row * 8 + bitOff / 8];
}
// Pack 4 pixels into one byte (2 bits each)
val = 0;
for (int32_t px = 0; px < 4; px++) {
int32_t srcBit = (bitOff + px) % 8;
uint8_t andBit = (andBits >> (7 - srcBit)) & 1;
uint8_t xorBit = (xorBits >> (7 - srcBit)) & 1;
uint8_t pixel;
if (andBit && !xorBit) {
pixel = 0x02; // transparent
} else if (andBit && xorBit) {
pixel = 0x03; // inverted
} else if (!andBit && xorBit) {
pixel = 0x01; // cursor color 1
} else {
pixel = 0x00; // cursor color 0
}
val |= pixel << (6 - px * 2);
}
}
cursorMem[row * 16 + byte] = val;
}
}
// Set cursor offset (in units of 8 bytes)
atiWriteReg(priv, ATI_CUR_OFFSET, priv->cursorOffset / 8);
// Set cursor colors (white foreground, black background)
atiWriteReg(priv, ATI_CUR_CLR0, 0x00000000);
atiWriteReg(priv, ATI_CUR_CLR1, 0x00FFFFFF);
}
// ============================================================
// atiShowCursor
// ============================================================
static void atiShowCursor(AccelDriverT *drv, bool visible) {
AtiPrivateT *priv = (AtiPrivateT *)drv->privData;
uint32_t val = atiReadReg(priv, ATI_GEN_TEST_CNTL);
if (visible) {
val |= 0x80; // enable cursor
} else {
val &= ~0x80;
}
atiWriteReg(priv, ATI_GEN_TEST_CNTL, val);
}
// ============================================================
// atiShutdown
// ============================================================
static void atiShutdown(AccelDriverT *drv) {
AtiPrivateT *priv = (AtiPrivateT *)drv->privData;
atiShowCursor(drv, false);
dpmiUnmapFramebuffer(&priv->lfbMapping);
vgaRestoreTextMode();
}
// ============================================================
// atiWaitFifo
// ============================================================
//
// Wait until the Mach64 FIFO has at least 'entries' free slots.
// The FIFO_STAT register indicates free entries (bits 15:0,
// value = 0x8000 means 0 free, lower values mean more free).
static void atiWaitFifo(AtiPrivateT *priv, int32_t entries) {
uint32_t mask = ATI_FIFO_STAT_MASK >> entries;
for (int32_t i = 0; i < ATI_MAX_IDLE_WAIT; i++) {
if (!(atiReadReg(priv, ATI_FIFO_STAT) & mask)) {
return;
}
}
}
// ============================================================
// atiWaitIdle
// ============================================================
static void atiWaitIdle(AccelDriverT *drv) {
AtiPrivateT *priv = (AtiPrivateT *)drv->privData;
// First wait for FIFO to drain
atiWaitFifo(priv, 16);
// Then wait for engine idle
for (int32_t i = 0; i < ATI_MAX_IDLE_WAIT; i++) {
if (!(atiReadReg(priv, ATI_GUI_STAT_MMIO) & ATI_GUI_STAT_BUSY)) {
return;
}
}
}