// atiMach64.c -- ATI Mach64 / Rage accelerated video driver // // Supports the ATI Mach64 family: GX, CX, CT, ET, VT, GT (Rage II), // and Rage Pro. These were among the most capable 2D accelerators // of the mid-1990s, with features including: // - Solid and pattern rectangle fill // - Screen-to-screen BitBLT // - Host-to-screen blit (CPU data transfer) // - Monochrome color expansion // - Bresenham line draw // - Trapezoid fill // - Hardware scissor rectangle // - 64x64 two-color hardware cursor // // Register access: // The Mach64 has two register access methods: // 1. I/O port: registers at block I/O base + offset. The base // is typically 0x02EC for Mach64, determined by CONFIG_CHIP_ID. // 2. MMIO: register block at end of LFB (BAR0 + aperture_size - 1KB) // or via a dedicated BAR. // // We use MMIO for speed. The register block is 1KB at the end // of the aperture (LFB base + size - 0x400 on most variants, // or LFB base + size - 0x800 for 8MB apertures). // // Some early Mach64 chips (GX/CX) may not support MMIO well; // for those we fall back to I/O port access. #include "accelVid.h" #include "vgaCommon.h" #include "pci.h" #include #include #include #include // ============================================================ // ATI vendor/device IDs // ============================================================ #define ATI_VENDOR_ID 0x1002 #define ATI_MACH64_GX 0x4758 // Mach64 GX #define ATI_MACH64_CX 0x4358 // Mach64 CX #define ATI_MACH64_CT 0x4354 // Mach64 CT #define ATI_MACH64_ET 0x4554 // Mach64 ET #define ATI_MACH64_VT 0x5654 // Mach64 VT #define ATI_MACH64_VT_B 0x5655 // Mach64 VT-B #define ATI_MACH64_GT 0x4754 // Mach64 GT (3D Rage II) #define ATI_MACH64_GT_B 0x4755 // Mach64 GT-B (3D Rage II+) #define ATI_RAGE_PRO 0x4750 // Rage Pro #define ATI_RAGE_PRO_AGP 0x4752 // Rage Pro AGP #define ATI_RAGE_XL_PCI 0x4752 // Rage XL PCI (shares ID with Pro AGP) #define ATI_RAGE_128_RE 0x5245 // Rage 128 RE #define ATI_RAGE_128_RF 0x5246 // Rage 128 RF #define ATI_RAGE_128_RK 0x524B // Rage 128 RK #define ATI_RAGE_128_RL 0x524C // Rage 128 RL #define ATI_RAGE_128_PRO_PF 0x5046 // Rage 128 Pro PF #define ATI_RAGE_128_PRO_PR 0x5052 // Rage 128 Pro PR #define ATI_RAGE_FURY 0x5046 // Rage Fury (same as 128 Pro PF) static const uint16_t sAtiDeviceIds[] = { ATI_VENDOR_ID, ATI_MACH64_GX, ATI_VENDOR_ID, ATI_MACH64_CX, ATI_VENDOR_ID, ATI_MACH64_CT, ATI_VENDOR_ID, ATI_MACH64_ET, ATI_VENDOR_ID, ATI_MACH64_VT, ATI_VENDOR_ID, ATI_MACH64_VT_B, ATI_VENDOR_ID, ATI_MACH64_GT, ATI_VENDOR_ID, ATI_MACH64_GT_B, ATI_VENDOR_ID, ATI_RAGE_PRO, ATI_VENDOR_ID, ATI_RAGE_PRO_AGP, ATI_VENDOR_ID, ATI_RAGE_128_RE, ATI_VENDOR_ID, ATI_RAGE_128_RF, ATI_VENDOR_ID, ATI_RAGE_128_RK, ATI_VENDOR_ID, ATI_RAGE_128_RL, ATI_VENDOR_ID, ATI_RAGE_128_PRO_PF, ATI_VENDOR_ID, ATI_RAGE_128_PRO_PR, 0, 0 }; // ============================================================ // Mach64 register offsets (from MMIO base) // ============================================================ // // The Mach64 has a flat register space. For I/O access, these // offsets are added to the I/O base port. For MMIO, they're // byte offsets from the MMIO base address. // Drawing engine source registers #define ATI_SRC_OFF_PITCH 0x0000 // source offset and pitch #define ATI_SRC_Y 0x0004 // source Y #define ATI_SRC_X 0x0008 // source X (alias: SRC_HEIGHT1) #define ATI_SRC_Y_X 0x000C // source Y and X combined #define ATI_SRC_WIDTH1 0x0010 #define ATI_SRC_HEIGHT1 0x0014 // Drawing engine destination registers #define ATI_DST_OFF_PITCH 0x0040 // destination offset and pitch #define ATI_DST_Y 0x0044 #define ATI_DST_X 0x0048 #define ATI_DST_Y_X 0x004C #define ATI_DST_HEIGHT 0x0050 #define ATI_DST_WIDTH 0x0054 #define ATI_DST_HEIGHT_WIDTH 0x0058 // triggers blit #define ATI_DST_X_WIDTH 0x005C #define ATI_DST_BRES_ERR 0x0064 #define ATI_DST_BRES_INC 0x0068 #define ATI_DST_BRES_DEC 0x006C #define ATI_DST_BRES_LNTH 0x0070 #define ATI_DST_BRES_LNTH_END 0x0074 // triggers line draw // Host data (CPU-to-screen) #define ATI_HOST_DATA0 0x0200 // Scissor registers #define ATI_SC_LEFT 0x00A0 #define ATI_SC_RIGHT 0x00A4 #define ATI_SC_TOP 0x00A8 #define ATI_SC_BOTTOM 0x00AC // Drawing processor registers #define ATI_DP_BKGD_CLR 0x00B0 #define ATI_DP_FRGD_CLR 0x00B4 #define ATI_DP_WRITE_MASK 0x00B8 #define ATI_DP_CHAIN_MASK 0x00BC #define ATI_DP_PIX_WIDTH 0x00D0 #define ATI_DP_MIX 0x00D4 #define ATI_DP_SRC 0x00D8 // Clock/config #define ATI_CLR_CMP_CNTL 0x0100 #define ATI_GUI_TRAJ_CNTL 0x00CC #define ATI_GUI_STAT 0x00CE // I/O only; for MMIO see below // FIFO and status (MMIO addresses) #define ATI_FIFO_STAT 0x0310 #define ATI_GUI_STAT_MMIO 0x0338 // Hardware cursor #define ATI_CUR_CLR0 0x0260 #define ATI_CUR_CLR1 0x0264 #define ATI_CUR_OFFSET 0x0268 #define ATI_CUR_HORZ_VERT_POSN 0x026C #define ATI_CUR_HORZ_VERT_OFF 0x0270 #define ATI_GEN_TEST_CNTL 0x0034 // general test/cursor control // Memory config #define ATI_MEM_CNTL 0x0140 // I/O and MMIO constants #define ATI_IO_BASE_DEFAULT 0x02EC // default block I/O base port #define ATI_MMIO_SIZE 0x0400 // MMIO block size (1KB at end of aperture) #define ATI_CONFIG_CHIP_ID 0x00E0 // ============================================================ // Mach64 DP_MIX values // ============================================================ // // The drawing processor MIX register controls the raster operation // for foreground (bits 20:16) and background (bits 4:0). #define ATI_MIX_NOT_DST 0x00 #define ATI_MIX_ZERO 0x01 #define ATI_MIX_ONE 0x02 #define ATI_MIX_DST 0x03 #define ATI_MIX_NOT_SRC 0x04 #define ATI_MIX_XOR 0x05 #define ATI_MIX_XNOR 0x06 #define ATI_MIX_COPY 0x07 // dest = source (most common) #define ATI_MIX_NOT_SRC_AND 0x08 #define ATI_MIX_SRC_AND_DST 0x0C #define ATI_MIX_SRC_OR_DST 0x0E // Foreground mix is in bits 20:16, background in bits 4:0 #define ATI_FRGD_MIX(rop) ((uint32_t)(rop) << 16) #define ATI_BKGD_MIX(rop) ((uint32_t)(rop)) // ============================================================ // Mach64 DP_SRC values // ============================================================ #define ATI_SRC_BKGD_CLR 0x00 // background color register #define ATI_SRC_FRGD_CLR 0x01 // foreground color register #define ATI_SRC_HOST 0x02 // CPU host data #define ATI_SRC_BLIT 0x03 // video memory (blit) #define ATI_SRC_PATTERN 0x04 // pattern register // DP_SRC packs three source selects: mono src (bits 10:8), // foreground src (bits 18:16 on some, or bits 10:8), background src // In practice, the format is: // bits 2:0 = background source // bits 10:8 = foreground source // bits 18:16 = mono source (for color expand) #define ATI_DP_SRC_BKGD(s) ((uint32_t)(s)) #define ATI_DP_SRC_FRGD(s) ((uint32_t)(s) << 8) #define ATI_DP_SRC_MONO(s) ((uint32_t)(s) << 16) // ============================================================ // Mach64 DP_PIX_WIDTH values // ============================================================ #define ATI_PIX_8BPP 0x02 #define ATI_PIX_15BPP 0x03 #define ATI_PIX_16BPP 0x04 #define ATI_PIX_32BPP 0x06 // HOST byte/word/dword order -- use native (little-endian) #define ATI_HOST_BYTE_ORDER 0x00 // GUI_TRAJ_CNTL direction bits #define ATI_DST_X_DIR_LEFT 0x00 #define ATI_DST_X_DIR_RIGHT 0x01 #define ATI_DST_Y_DIR_UP 0x00 #define ATI_DST_Y_DIR_DOWN 0x02 // GUI_STAT busy bit #define ATI_GUI_STAT_BUSY 0x00000001 #define ATI_FIFO_STAT_MASK 0x0000FFFF // Hardware cursor size #define ATI_HW_CURSOR_SIZE 64 #define ATI_HW_CURSOR_BYTES 1024 // 64*64*2bpp/8 // Maximum wait iterations #define ATI_MAX_IDLE_WAIT 1000000 // ============================================================ // Private driver state // ============================================================ typedef struct { uint32_t lfbPhysAddr; uint32_t vramSize; uint32_t cursorOffset; int32_t bytesPerPixel; int32_t screenPitch; volatile uint32_t *mmio; // mapped MMIO register base uint32_t mmioPhysAddr; bool useIo; // fall back to I/O on old GX/CX uint16_t ioBase; // I/O base port for register access DpmiMappingT lfbMapping; } AtiPrivateT; // ============================================================ // Prototypes // ============================================================ static void atiBitBlt(AccelDriverT *drv, int32_t srcX, int32_t srcY, int32_t dstX, int32_t dstY, int32_t w, int32_t h); static void atiColorExpand(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h, uint32_t fg, uint32_t bg); static bool atiDetect(AccelDriverT *drv); static void atiHostBlit(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h); static bool atiInit(AccelDriverT *drv, const AccelModeRequestT *req); static void atiLineDraw(AccelDriverT *drv, int32_t x1, int32_t y1, int32_t x2, int32_t y2, uint32_t color); static void atiMoveCursor(AccelDriverT *drv, int32_t x, int32_t y); static void atiRectFill(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, uint32_t color); static void atiRectFillPat(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, const uint8_t *pattern, uint32_t fg, uint32_t bg); static void atiSetClip(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h); static void atiSetCursor(AccelDriverT *drv, const HwCursorImageT *image); static void atiShowCursor(AccelDriverT *drv, bool visible); static void atiShutdown(AccelDriverT *drv); static void atiWaitFifo(AtiPrivateT *priv, int32_t entries); static void atiWaitIdle(AccelDriverT *drv); static void atiWriteReg(AtiPrivateT *priv, uint32_t reg, uint32_t val); static uint32_t atiReadReg(AtiPrivateT *priv, uint32_t reg); // ============================================================ // Driver instance // ============================================================ static AtiPrivateT sAtiPrivate; static AccelDriverT sAtiDriver = { .name = "ATI Mach64", .chipFamily = "ati", .caps = 0, .privData = &sAtiPrivate, .detect = atiDetect, .init = atiInit, .shutdown = atiShutdown, .waitIdle = atiWaitIdle, .setClip = atiSetClip, .rectFill = atiRectFill, .rectFillPat = atiRectFillPat, .bitBlt = atiBitBlt, .hostBlit = atiHostBlit, .colorExpand = atiColorExpand, .lineDraw = atiLineDraw, .setCursor = atiSetCursor, .moveCursor = atiMoveCursor, .showCursor = atiShowCursor, }; // ============================================================ // atiRegisterDriver // ============================================================ void atiRegisterDriver(void) { accelRegisterDriver(&sAtiDriver); } // ============================================================ // atiReadReg / atiWriteReg // ============================================================ // // Register access abstraction. Uses MMIO when available, falls // back to I/O port access on older chips. static uint32_t atiReadReg(AtiPrivateT *priv, uint32_t reg) { if (priv->useIo) { return inportl(priv->ioBase + reg); } return priv->mmio[reg / 4]; } static void atiWriteReg(AtiPrivateT *priv, uint32_t reg, uint32_t val) { if (priv->useIo) { outportl(priv->ioBase + reg, val); return; } priv->mmio[reg / 4] = val; } // ============================================================ // atiBitBlt // ============================================================ // // Screen-to-screen BitBLT. The Mach64 engine handles overlapping // regions automatically based on the trajectory control register. static void atiBitBlt(AccelDriverT *drv, int32_t srcX, int32_t srcY, int32_t dstX, int32_t dstY, int32_t w, int32_t h) { AtiPrivateT *priv = (AtiPrivateT *)drv->privData; if (w <= 0 || h <= 0) { return; } // Determine blit direction uint32_t direction = ATI_DST_X_DIR_RIGHT | ATI_DST_Y_DIR_DOWN; int32_t sx = srcX; int32_t sy = srcY; int32_t dx = dstX; int32_t dy = dstY; if (srcX < dstX) { direction &= ~ATI_DST_X_DIR_RIGHT; sx += w - 1; dx += w - 1; } if (srcY < dstY) { direction &= ~ATI_DST_Y_DIR_DOWN; sy += h - 1; dy += h - 1; } atiWaitFifo(priv, 7); atiWriteReg(priv, ATI_GUI_TRAJ_CNTL, direction); atiWriteReg(priv, ATI_DP_MIX, ATI_FRGD_MIX(ATI_MIX_COPY) | ATI_BKGD_MIX(ATI_MIX_COPY)); atiWriteReg(priv, ATI_DP_SRC, ATI_DP_SRC_FRGD(ATI_SRC_BLIT)); atiWriteReg(priv, ATI_SRC_Y_X, ((uint32_t)sx << 16) | (uint32_t)sy); atiWriteReg(priv, ATI_SRC_WIDTH1, w); atiWriteReg(priv, ATI_DST_Y_X, ((uint32_t)dx << 16) | (uint32_t)dy); atiWriteReg(priv, ATI_DST_HEIGHT_WIDTH, ((uint32_t)w << 16) | (uint32_t)h); } // ============================================================ // atiColorExpand // ============================================================ // // Monochrome-to-color expansion via the host data path. // Converts 1bpp source bitmap to full-color pixels using the // Mach64 engine. Source data is packed MSB-first, padded to // dword boundaries per scanline. static void atiColorExpand(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h, uint32_t fg, uint32_t bg) { AtiPrivateT *priv = (AtiPrivateT *)drv->privData; if (w <= 0 || h <= 0) { return; } // Number of dwords per scanline of monochrome data int32_t dwordsPerRow = (w + 31) / 32; // Set up color expand: mono source from host, fg/bg from color regs atiWaitFifo(priv, 7); atiWriteReg(priv, ATI_DP_FRGD_CLR, fg); atiWriteReg(priv, ATI_DP_BKGD_CLR, bg); atiWriteReg(priv, ATI_DP_SRC, ATI_DP_SRC_MONO(ATI_SRC_HOST) | ATI_DP_SRC_FRGD(ATI_SRC_FRGD_CLR) | ATI_DP_SRC_BKGD(ATI_SRC_BKGD_CLR)); atiWriteReg(priv, ATI_DP_MIX, ATI_FRGD_MIX(ATI_MIX_COPY) | ATI_BKGD_MIX(ATI_MIX_COPY)); atiWriteReg(priv, ATI_GUI_TRAJ_CNTL, ATI_DST_X_DIR_RIGHT | ATI_DST_Y_DIR_DOWN); atiWriteReg(priv, ATI_DST_Y_X, ((uint32_t)dstX << 16) | (uint32_t)dstY); atiWriteReg(priv, ATI_DST_HEIGHT_WIDTH, ((uint32_t)w << 16) | (uint32_t)h); // Feed monochrome data row by row through HOST_DATA0 for (int32_t row = 0; row < h; row++) { const uint8_t *rowPtr = srcBuf + row * srcPitch; for (int32_t dw = 0; dw < dwordsPerRow; dw++) { // Pack bytes into a dword (MSB-first bit order) int32_t byteOff = dw * 4; uint32_t data = 0; for (int32_t b = 0; b < 4; b++) { uint8_t srcByte = 0; if (byteOff + b < srcPitch) { srcByte = rowPtr[byteOff + b]; } data |= (uint32_t)srcByte << (24 - b * 8); } atiWaitFifo(priv, 1); atiWriteReg(priv, ATI_HOST_DATA0, data); } } } // ============================================================ // atiDetect // ============================================================ static bool atiDetect(AccelDriverT *drv) { int32_t matchIdx; if (!pciFindDeviceList(sAtiDeviceIds, &drv->pciDev, &matchIdx)) { return false; } switch (drv->pciDev.deviceId) { case ATI_MACH64_GX: drv->name = "ATI Mach64 GX"; break; case ATI_MACH64_CX: drv->name = "ATI Mach64 CX"; break; case ATI_MACH64_CT: drv->name = "ATI Mach64 CT"; break; case ATI_MACH64_ET: drv->name = "ATI Mach64 ET"; break; case ATI_MACH64_VT: case ATI_MACH64_VT_B: drv->name = "ATI Mach64 VT"; break; case ATI_MACH64_GT: case ATI_MACH64_GT_B: drv->name = "ATI 3D Rage II"; break; case ATI_RAGE_PRO: case ATI_RAGE_PRO_AGP: drv->name = "ATI Rage Pro"; break; case ATI_RAGE_128_RE: case ATI_RAGE_128_RF: case ATI_RAGE_128_RK: case ATI_RAGE_128_RL: drv->name = "ATI Rage 128"; break; case ATI_RAGE_128_PRO_PF: case ATI_RAGE_128_PRO_PR: drv->name = "ATI Rage 128 Pro"; break; default: drv->name = "ATI Mach64"; break; } return true; } // ============================================================ // atiHostBlit // ============================================================ // // CPU-to-screen blit. Transfers pixel data from system memory // to VRAM through the Mach64 host data registers. static void atiHostBlit(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h) { AtiPrivateT *priv = (AtiPrivateT *)drv->privData; if (w <= 0 || h <= 0) { return; } int32_t bytesPerPixel = priv->bytesPerPixel; int32_t rowBytes = w * bytesPerPixel; int32_t dwordsPerRow = (rowBytes + 3) / 4; // Set up host-to-screen blit atiWaitFifo(priv, 5); atiWriteReg(priv, ATI_DP_SRC, ATI_DP_SRC_FRGD(ATI_SRC_HOST)); atiWriteReg(priv, ATI_DP_MIX, ATI_FRGD_MIX(ATI_MIX_COPY) | ATI_BKGD_MIX(ATI_MIX_COPY)); atiWriteReg(priv, ATI_GUI_TRAJ_CNTL, ATI_DST_X_DIR_RIGHT | ATI_DST_Y_DIR_DOWN); atiWriteReg(priv, ATI_DST_Y_X, ((uint32_t)dstX << 16) | (uint32_t)dstY); atiWriteReg(priv, ATI_DST_HEIGHT_WIDTH, ((uint32_t)w << 16) | (uint32_t)h); // Write pixel data row by row through HOST_DATA0 for (int32_t row = 0; row < h; row++) { const uint8_t *rowPtr = srcBuf + row * srcPitch; for (int32_t dw = 0; dw < dwordsPerRow; dw++) { int32_t byteOff = dw * 4; uint32_t data = 0; // Pack bytes into a dword (little-endian native order) for (int32_t b = 0; b < 4; b++) { if (byteOff + b < rowBytes) { data |= (uint32_t)rowPtr[byteOff + b] << (b * 8); } } atiWaitFifo(priv, 1); atiWriteReg(priv, ATI_HOST_DATA0, data); } } } // ============================================================ // atiInit // ============================================================ static bool atiInit(AccelDriverT *drv, const AccelModeRequestT *req) { AtiPrivateT *priv = (AtiPrivateT *)drv->privData; memset(priv, 0, sizeof(*priv)); // Determine if this is an old GX/CX (I/O only) or newer (MMIO) priv->useIo = (drv->pciDev.deviceId == ATI_MACH64_GX || drv->pciDev.deviceId == ATI_MACH64_CX); priv->ioBase = ATI_IO_BASE_DEFAULT; // Get LFB address and size from PCI BAR0 uint32_t bar0 = pciRead32(drv->pciDev.bus, drv->pciDev.dev, drv->pciDev.func, PCI_BAR0); priv->lfbPhysAddr = bar0 & 0xFFFFFFF0; uint32_t barSize = pciSizeBar(drv->pciDev.bus, drv->pciDev.dev, drv->pciDev.func, PCI_BAR0); // Aperture size != VRAM size on Mach64 (aperture is typically 8MB) // Read actual VRAM from MEM_CNTL register uint32_t memCntl; if (priv->useIo) { memCntl = inportl(priv->ioBase + ATI_MEM_CNTL); } else { // Need a temporary MMIO mapping to read MEM_CNTL // MMIO is at the end of the aperture priv->mmioPhysAddr = priv->lfbPhysAddr + barSize - ATI_MMIO_SIZE; memCntl = 0; // will determine from aperture size } // Determine VRAM size if (memCntl != 0) { uint32_t memSize = memCntl & 0x07; switch (memSize) { case 0: priv->vramSize = 512 * 1024; break; case 1: priv->vramSize = 1024 * 1024; break; case 2: priv->vramSize = 2 * 1024 * 1024; break; case 3: priv->vramSize = 4 * 1024 * 1024; break; case 4: priv->vramSize = 6 * 1024 * 1024; break; case 5: priv->vramSize = 8 * 1024 * 1024; break; default: priv->vramSize = 2 * 1024 * 1024; break; } } else { // Conservative fallback priv->vramSize = (barSize > 8 * 1024 * 1024) ? 4 * 1024 * 1024 : barSize; } // Set VESA mode VesaModeResultT vesa; if (!vesaFindAndSetMode(req->width, req->height, req->bpp, &vesa)) { return false; } // Map LFB + MMIO region (map entire aperture; MMIO is at end) if (!dpmiMapFramebuffer(priv->lfbPhysAddr, barSize, &priv->lfbMapping)) { vgaRestoreTextMode(); return false; } priv->bytesPerPixel = (vesa.bpp + 7) / 8; priv->screenPitch = vesa.pitch; drv->mode.width = vesa.width; drv->mode.height = vesa.height; drv->mode.bpp = vesa.bpp; drv->mode.pitch = vesa.pitch; drv->mode.framebuffer = priv->lfbMapping.ptr; drv->mode.vramSize = priv->vramSize; drv->mode.offscreenBase = vesa.pitch * vesa.height; // Set up MMIO pointer at end of aperture if (!priv->useIo) { priv->mmio = (volatile uint32_t *)(priv->lfbMapping.ptr + barSize - ATI_MMIO_SIZE); } // Configure the drawing engine pixel width uint32_t pixWidth; switch (vesa.bpp) { case 8: pixWidth = ATI_PIX_8BPP; break; case 15: pixWidth = ATI_PIX_15BPP; break; case 16: pixWidth = ATI_PIX_16BPP; break; case 32: pixWidth = ATI_PIX_32BPP; break; default: pixWidth = ATI_PIX_16BPP; break; } // DP_PIX_WIDTH: set all fields to the same depth uint32_t dpPixWidth = pixWidth | (pixWidth << 4) // host data | (pixWidth << 8) // source | (pixWidth << 16) // destination | (pixWidth << 28); // default atiWaitFifo(priv, 2); atiWriteReg(priv, ATI_DP_PIX_WIDTH, dpPixWidth); atiWriteReg(priv, ATI_DP_WRITE_MASK, 0xFFFFFFFF); // Set DST_OFF_PITCH: offset = 0, pitch in units of 8 pixels uint32_t pitch8 = vesa.pitch / priv->bytesPerPixel / 8; atiWriteReg(priv, ATI_DST_OFF_PITCH, pitch8 << 22); atiWriteReg(priv, ATI_SRC_OFF_PITCH, pitch8 << 22); // Set up cursor at end of VRAM priv->cursorOffset = priv->vramSize - ATI_HW_CURSOR_BYTES; priv->cursorOffset &= ~(ATI_HW_CURSOR_BYTES - 1); drv->caps = ACAP_RECT_FILL | ACAP_RECT_FILL_PAT | ACAP_BITBLT | ACAP_HOST_BLIT | ACAP_COLOR_EXPAND | ACAP_LINE_DRAW | ACAP_HW_CURSOR | ACAP_CLIP; // Full screen clip atiSetClip(drv, 0, 0, vesa.width, vesa.height); atiWaitIdle(drv); return true; } // ============================================================ // atiLineDraw // ============================================================ // // Bresenham line draw using the Mach64 DST_BRES registers. static void atiLineDraw(AccelDriverT *drv, int32_t x1, int32_t y1, int32_t x2, int32_t y2, uint32_t color) { AtiPrivateT *priv = (AtiPrivateT *)drv->privData; int32_t dx = x2 - x1; int32_t dy = y2 - y1; uint32_t direction = ATI_DST_X_DIR_RIGHT | ATI_DST_Y_DIR_DOWN; if (dx < 0) { dx = -dx; direction &= ~ATI_DST_X_DIR_RIGHT; } if (dy < 0) { dy = -dy; direction &= ~ATI_DST_Y_DIR_DOWN; } int32_t majAxis; int32_t minAxis; if (dx >= dy) { majAxis = dx; minAxis = dy; } else { majAxis = dy; minAxis = dx; // Swap X/Y major direction |= 0x04; // Y major axis select } if (majAxis == 0) { return; } int32_t errTerm = 2 * minAxis - majAxis; int32_t errInc = 2 * minAxis; int32_t errDec = 2 * (minAxis - majAxis); atiWaitFifo(priv, 8); atiWriteReg(priv, ATI_GUI_TRAJ_CNTL, direction); atiWriteReg(priv, ATI_DP_MIX, ATI_FRGD_MIX(ATI_MIX_COPY)); atiWriteReg(priv, ATI_DP_SRC, ATI_DP_SRC_FRGD(ATI_SRC_FRGD_CLR)); atiWriteReg(priv, ATI_DP_FRGD_CLR, color); atiWriteReg(priv, ATI_DST_Y_X, ((uint32_t)x1 << 16) | (uint32_t)y1); atiWriteReg(priv, ATI_DST_BRES_ERR, errTerm); atiWriteReg(priv, ATI_DST_BRES_INC, errInc); atiWriteReg(priv, ATI_DST_BRES_DEC, errDec); atiWaitFifo(priv, 1); atiWriteReg(priv, ATI_DST_BRES_LNTH, majAxis + 1); } // ============================================================ // atiMoveCursor // ============================================================ static void atiMoveCursor(AccelDriverT *drv, int32_t x, int32_t y) { AtiPrivateT *priv = (AtiPrivateT *)drv->privData; uint32_t offset = 0; if (x < 0) { offset |= ((-x) & 0x3F) << 16; x = 0; } if (y < 0) { offset |= (-y) & 0x3F; y = 0; } atiWriteReg(priv, ATI_CUR_HORZ_VERT_OFF, offset); atiWriteReg(priv, ATI_CUR_HORZ_VERT_POSN, ((uint32_t)x << 16) | (uint32_t)y); } // ============================================================ // atiRectFill // ============================================================ static void atiRectFill(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, uint32_t color) { AtiPrivateT *priv = (AtiPrivateT *)drv->privData; if (w <= 0 || h <= 0) { return; } atiWaitFifo(priv, 5); atiWriteReg(priv, ATI_GUI_TRAJ_CNTL, ATI_DST_X_DIR_RIGHT | ATI_DST_Y_DIR_DOWN); atiWriteReg(priv, ATI_DP_MIX, ATI_FRGD_MIX(ATI_MIX_COPY)); atiWriteReg(priv, ATI_DP_SRC, ATI_DP_SRC_FRGD(ATI_SRC_FRGD_CLR)); atiWriteReg(priv, ATI_DP_FRGD_CLR, color); atiWriteReg(priv, ATI_DST_Y_X, ((uint32_t)x << 16) | (uint32_t)y); atiWaitFifo(priv, 1); atiWriteReg(priv, ATI_DST_HEIGHT_WIDTH, ((uint32_t)w << 16) | (uint32_t)h); } // ============================================================ // atiRectFillPat // ============================================================ // // 8x8 mono pattern fill using the host data path. The pattern is // 8 bytes (one per row, MSB-first), tiled across the rectangle. // 1-bits use the foreground color, 0-bits use the background. // Data is fed through HOST_DATA0, repeating the 8-row pattern // for the full height, with each row padded to a dword boundary. static void atiRectFillPat(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, const uint8_t *pattern, uint32_t fg, uint32_t bg) { AtiPrivateT *priv = (AtiPrivateT *)drv->privData; if (w <= 0 || h <= 0) { return; } // Number of dwords per scanline of monochrome data int32_t dwordsPerRow = (w + 31) / 32; // Set up color expand: mono source from host, fg/bg from color regs atiWaitFifo(priv, 7); atiWriteReg(priv, ATI_DP_FRGD_CLR, fg); atiWriteReg(priv, ATI_DP_BKGD_CLR, bg); atiWriteReg(priv, ATI_DP_SRC, ATI_DP_SRC_MONO(ATI_SRC_HOST) | ATI_DP_SRC_FRGD(ATI_SRC_FRGD_CLR) | ATI_DP_SRC_BKGD(ATI_SRC_BKGD_CLR)); atiWriteReg(priv, ATI_DP_MIX, ATI_FRGD_MIX(ATI_MIX_COPY) | ATI_BKGD_MIX(ATI_MIX_COPY)); atiWriteReg(priv, ATI_GUI_TRAJ_CNTL, ATI_DST_X_DIR_RIGHT | ATI_DST_Y_DIR_DOWN); atiWriteReg(priv, ATI_DST_Y_X, ((uint32_t)x << 16) | (uint32_t)y); atiWriteReg(priv, ATI_DST_HEIGHT_WIDTH, ((uint32_t)w << 16) | (uint32_t)h); // Feed tiled pattern data through HOST_DATA0 for (int32_t row = 0; row < h; row++) { uint8_t patByte = pattern[row & 7]; for (int32_t dw = 0; dw < dwordsPerRow; dw++) { // Replicate the pattern byte across all 4 bytes of the dword. // MSB-first bit order: place the pattern byte in the high byte. uint32_t data = ((uint32_t)patByte << 24) | ((uint32_t)patByte << 16) | ((uint32_t)patByte << 8) | (uint32_t)patByte; atiWaitFifo(priv, 1); atiWriteReg(priv, ATI_HOST_DATA0, data); } } } // ============================================================ // atiSetClip // ============================================================ static void atiSetClip(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h) { AtiPrivateT *priv = (AtiPrivateT *)drv->privData; atiWaitFifo(priv, 4); atiWriteReg(priv, ATI_SC_LEFT, x); atiWriteReg(priv, ATI_SC_TOP, y); atiWriteReg(priv, ATI_SC_RIGHT, x + w - 1); atiWriteReg(priv, ATI_SC_BOTTOM, y + h - 1); } // ============================================================ // atiSetCursor // ============================================================ static void atiSetCursor(AccelDriverT *drv, const HwCursorImageT *image) { AtiPrivateT *priv = (AtiPrivateT *)drv->privData; if (!image) { atiShowCursor(drv, false); return; } atiWaitIdle(drv); // Write cursor image to VRAM // Mach64 cursor format: 64x64, 2bpp, rows of 16 bytes // Bit encoding: 00=cursor color 0, 01=cursor color 1, // 10=transparent, 11=inverted uint8_t *cursorMem = drv->mode.framebuffer + priv->cursorOffset; for (int32_t row = 0; row < ATI_HW_CURSOR_SIZE; row++) { for (int32_t byte = 0; byte < 16; byte++) { uint8_t val = 0xAA; // all transparent (10 pattern) if (row < image->height && byte < (image->width + 3) / 4) { // Convert AND/XOR to Mach64 2bpp encoding int32_t bitOff = byte * 4; uint8_t andBits = 0; uint8_t xorBits = 0; if (bitOff / 8 < (image->width + 7) / 8) { andBits = image->andMask[row * 8 + bitOff / 8]; xorBits = image->xorMask[row * 8 + bitOff / 8]; } // Pack 4 pixels into one byte (2 bits each) val = 0; for (int32_t px = 0; px < 4; px++) { int32_t srcBit = (bitOff + px) % 8; uint8_t andBit = (andBits >> (7 - srcBit)) & 1; uint8_t xorBit = (xorBits >> (7 - srcBit)) & 1; uint8_t pixel; if (andBit && !xorBit) { pixel = 0x02; // transparent } else if (andBit && xorBit) { pixel = 0x03; // inverted } else if (!andBit && xorBit) { pixel = 0x01; // cursor color 1 } else { pixel = 0x00; // cursor color 0 } val |= pixel << (6 - px * 2); } } cursorMem[row * 16 + byte] = val; } } // Set cursor offset (in units of 8 bytes) atiWriteReg(priv, ATI_CUR_OFFSET, priv->cursorOffset / 8); // Set cursor colors (white foreground, black background) atiWriteReg(priv, ATI_CUR_CLR0, 0x00000000); atiWriteReg(priv, ATI_CUR_CLR1, 0x00FFFFFF); } // ============================================================ // atiShowCursor // ============================================================ static void atiShowCursor(AccelDriverT *drv, bool visible) { AtiPrivateT *priv = (AtiPrivateT *)drv->privData; uint32_t val = atiReadReg(priv, ATI_GEN_TEST_CNTL); if (visible) { val |= 0x80; // enable cursor } else { val &= ~0x80; } atiWriteReg(priv, ATI_GEN_TEST_CNTL, val); } // ============================================================ // atiShutdown // ============================================================ static void atiShutdown(AccelDriverT *drv) { AtiPrivateT *priv = (AtiPrivateT *)drv->privData; atiShowCursor(drv, false); dpmiUnmapFramebuffer(&priv->lfbMapping); vgaRestoreTextMode(); } // ============================================================ // atiWaitFifo // ============================================================ // // Wait until the Mach64 FIFO has at least 'entries' free slots. // The FIFO_STAT register indicates free entries (bits 15:0, // value = 0x8000 means 0 free, lower values mean more free). static void atiWaitFifo(AtiPrivateT *priv, int32_t entries) { uint32_t mask = ATI_FIFO_STAT_MASK >> entries; for (int32_t i = 0; i < ATI_MAX_IDLE_WAIT; i++) { if (!(atiReadReg(priv, ATI_FIFO_STAT) & mask)) { return; } } } // ============================================================ // atiWaitIdle // ============================================================ static void atiWaitIdle(AccelDriverT *drv) { AtiPrivateT *priv = (AtiPrivateT *)drv->privData; // First wait for FIFO to drain atiWaitFifo(priv, 16); // Then wait for engine idle for (int32_t i = 0; i < ATI_MAX_IDLE_WAIT; i++) { if (!(atiReadReg(priv, ATI_GUI_STAT_MMIO) & ATI_GUI_STAT_BUSY)) { return; } } }