// nvidia.c -- Nvidia RIVA 128/TNT/TNT2 accelerated video driver // // Supports the Nvidia RIVA family: RIVA 128, RIVA 128 ZX, TNT, // TNT2, TNT2 Ultra, TNT2 M64, and Vanta. These were high- // performance 2D/3D accelerators of the late 1990s featuring: // - Solid rectangle fill // - Screen-to-screen BitBLT // - Host-to-screen blit (CPU data transfer) // - Hardware clip rectangle // - 64x64 two-color hardware cursor via PRAMDAC // // Register access: // The NV architecture uses memory-mapped I/O via BAR0 (16MB // MMIO register space) and BAR1 (framebuffer). The 2D engine // is accessed through the FIFO user space at BAR0 + 0x800000, // which provides subchannel-based access to graphics objects. // // Subchannel layout: // Sub 0 (0x0000): ROP // Sub 1 (0x2000): Clip // Sub 2 (0x4000): Pattern // Sub 3 (0x6000): GdiRectangle (solid fill) // Sub 4 (0x8000): ScreenScreenBlt // Sub 5 (0xA000): ImageFromCpu // // Each subchannel has methods starting at +0x0100 within // its range. The PGRAPH_STATUS register at 0x400700 indicates // engine busy status (0 = idle). #include "accelVid.h" #include "vgaCommon.h" #include "pci.h" #include #include #include #include #include // ============================================================ // Nvidia vendor/device IDs // ============================================================ #define NV_VENDOR_ID 0x10DE #define NV_RIVA_128 0x0018 // RIVA 128 #define NV_RIVA_128_ZX 0x0019 // RIVA 128 ZX #define NV_TNT 0x0020 // RIVA TNT #define NV_TNT2 0x0028 // RIVA TNT2 #define NV_TNT2_ULTRA 0x0029 // RIVA TNT2 Ultra #define NV_TNT2_M64 0x002D // RIVA TNT2 M64 #define NV_VANTA 0x002C // Vanta static const uint16_t sNvDeviceIds[] = { NV_VENDOR_ID, NV_RIVA_128, NV_VENDOR_ID, NV_RIVA_128_ZX, NV_VENDOR_ID, NV_TNT, NV_VENDOR_ID, NV_TNT2, NV_VENDOR_ID, NV_TNT2_ULTRA, NV_VENDOR_ID, NV_TNT2_M64, NV_VENDOR_ID, NV_VANTA, 0, 0 }; // ============================================================ // MMIO register offsets (from BAR0) // ============================================================ // PGRAPH status #define NV_PGRAPH_STATUS 0x400700 // 0 = idle // PRAMDAC hardware cursor #define NV_PRAMDAC_CURSOR_CFG 0x680300 // bit 0 = enable, bits 2:1 = color mode #define NV_PRAMDAC_CURSOR_POS 0x680320 // cursor X/Y position // PRAMIN area -- cursor image storage offset in VRAM // The cursor image lives at the top of VRAM, 1KB for 32x32 or 4KB for 64x64. // PRAMDAC fetches it from the address configured in NV_PRAMDAC_CURSOR_START. #define NV_PRAMDAC_CURSOR_START 0x680324 // cursor image VRAM offset // PFB -- framebuffer config (for reading VRAM size) #define NV_PFB_BOOT_0 0x100000 // boot config (NV3) #define NV_PFB_CFG_0 0x100200 // framebuffer config (NV4/NV5) // ============================================================ // FIFO user space offsets (from BAR0 + 0x800000) // ============================================================ // // Subchannel base addresses within the user FIFO area. #define NV_FIFO_BASE 0x800000 // Subchannel 0: ROP #define NV_ROP_SUBCHAN 0x0000 #define NV_ROP_ROP 0x0300 // raster operation // Subchannel 1: Clip #define NV_CLIP_SUBCHAN 0x2000 #define NV_CLIP_POINT 0x2300 // x | y<<16 #define NV_CLIP_SIZE 0x2304 // w | h<<16 // Subchannel 3: GdiRectangle (solid fill) #define NV_RECT_SUBCHAN 0x6000 #define NV_RECT_COLOR 0x62FC // fill color #define NV_RECT_POINT 0x6300 // x | y<<16 #define NV_RECT_SIZE 0x6304 // w | h<<16 (triggers fill) // Subchannel 4: ScreenScreenBlt #define NV_BLIT_SUBCHAN 0x8000 #define NV_BLIT_POINT_IN 0x8300 // srcX | srcY<<16 #define NV_BLIT_POINT_OUT 0x8304 // dstX | dstY<<16 #define NV_BLIT_SIZE 0x8308 // w | h<<16 // Subchannel 5: ImageFromCpu #define NV_IMAGE_SUBCHAN 0xA000 #define NV_IMAGE_POINT 0xA300 // dstX | dstY<<16 #define NV_IMAGE_SIZE_OUT 0xA304 // w | h<<16 #define NV_IMAGE_SIZE_IN 0xA308 // srcW | srcH<<16 #define NV_IMAGE_DATA 0xA400 // color data (dwords) // ============================================================ // Constants // ============================================================ #define NV_ROP_COPY 0xCC // dest = src #define NV_MMIO_SIZE 0x1000000 // 16MB MMIO region #define NV_MAX_IDLE_WAIT 1000000 #define NV_HW_CURSOR_SIZE 64 #define NV_HW_CURSOR_BYTES (NV_HW_CURSOR_SIZE * NV_HW_CURSOR_SIZE * 2 / 8) // Cursor config bits #define NV_CURSOR_ENABLE 0x01 #define NV_CURSOR_MODE_2COLOR 0x00 // 2-color mode (bits 2:1 = 0) // RIVA 128 (NV3) vs TNT (NV4/NV5) detection #define NV_ARCH_NV3 3 #define NV_ARCH_NV4 4 // ============================================================ // Private driver state // ============================================================ typedef struct { volatile uint32_t *mmio; // mapped MMIO base (BAR0) volatile uint32_t *fifo; // FIFO user space (BAR0 + 0x800000) uint32_t mmioPhysAddr; uint32_t lfbPhysAddr; uint32_t vramSize; uint32_t cursorOffset; // cursor image offset in VRAM int32_t bytesPerPixel; int32_t screenPitch; int32_t arch; // NV_ARCH_NV3 or NV_ARCH_NV4 DpmiMappingT mmioMapping; DpmiMappingT lfbMapping; } NvPrivateT; // ============================================================ // Prototypes // ============================================================ static void nvBitBlt(AccelDriverT *drv, int32_t srcX, int32_t srcY, int32_t dstX, int32_t dstY, int32_t w, int32_t h); static bool nvDetect(AccelDriverT *drv); static uint32_t nvDetectVram(NvPrivateT *priv); static void nvHostBlit(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h); static bool nvInit(AccelDriverT *drv, const AccelModeRequestT *req); static void nvMoveCursor(AccelDriverT *drv, int32_t x, int32_t y); static void nvRectFill(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, uint32_t color); static void nvSetClip(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h); static void nvSetCursor(AccelDriverT *drv, const HwCursorImageT *image); static void nvSetupEngine(NvPrivateT *priv); static void nvShowCursor(AccelDriverT *drv, bool visible); static void nvShutdown(AccelDriverT *drv); static void nvWaitIdle(AccelDriverT *drv); static void nvWriteFifo(NvPrivateT *priv, uint32_t offset, uint32_t val); static uint32_t nvReadMmio(NvPrivateT *priv, uint32_t offset); static void nvWriteMmio(NvPrivateT *priv, uint32_t offset, uint32_t val); // ============================================================ // Driver instance // ============================================================ static NvPrivateT sNvPrivate; static AccelDriverT sNvDriver = { .name = "Nvidia RIVA", .chipFamily = "nvidia", .caps = 0, .privData = &sNvPrivate, .detect = nvDetect, .init = nvInit, .shutdown = nvShutdown, .waitIdle = nvWaitIdle, .setClip = nvSetClip, .rectFill = nvRectFill, .rectFillPat = NULL, .bitBlt = nvBitBlt, .hostBlit = nvHostBlit, .colorExpand = NULL, .lineDraw = NULL, .setCursor = nvSetCursor, .moveCursor = nvMoveCursor, .showCursor = nvShowCursor, }; // ============================================================ // nvRegisterDriver // ============================================================ void nvRegisterDriver(void) { accelRegisterDriver(&sNvDriver); } // ============================================================ // nvBitBlt // ============================================================ // // Screen-to-screen blit via the ScreenScreenBlt subchannel. // The NV engine handles overlapping source/destination regions // internally when the blit direction is set appropriately. static void nvBitBlt(AccelDriverT *drv, int32_t srcX, int32_t srcY, int32_t dstX, int32_t dstY, int32_t w, int32_t h) { NvPrivateT *priv = (NvPrivateT *)drv->privData; if (w <= 0 || h <= 0) { return; } nvWaitIdle(drv); nvWriteFifo(priv, NV_BLIT_POINT_IN, (uint32_t)srcX | ((uint32_t)srcY << 16)); nvWriteFifo(priv, NV_BLIT_POINT_OUT, (uint32_t)dstX | ((uint32_t)dstY << 16)); nvWriteFifo(priv, NV_BLIT_SIZE, (uint32_t)w | ((uint32_t)h << 16)); } // ============================================================ // nvDetect // ============================================================ static bool nvDetect(AccelDriverT *drv) { int32_t matchIdx; if (!pciFindDeviceList(sNvDeviceIds, &drv->pciDev, &matchIdx)) { return false; } switch (drv->pciDev.deviceId) { case NV_RIVA_128: drv->name = "Nvidia RIVA 128"; break; case NV_RIVA_128_ZX: drv->name = "Nvidia RIVA 128 ZX"; break; case NV_TNT: drv->name = "Nvidia RIVA TNT"; break; case NV_TNT2: drv->name = "Nvidia RIVA TNT2"; break; case NV_TNT2_ULTRA: drv->name = "Nvidia RIVA TNT2 Ultra"; break; case NV_TNT2_M64: drv->name = "Nvidia RIVA TNT2 M64"; break; case NV_VANTA: drv->name = "Nvidia Vanta"; break; default: drv->name = "Nvidia RIVA"; break; } return true; } // ============================================================ // nvDetectVram // ============================================================ // // Read VRAM size from the PFB registers. NV3 (RIVA 128) uses // PFB_BOOT_0, while NV4/NV5 (TNT/TNT2) use PFB_CFG_0. static uint32_t nvDetectVram(NvPrivateT *priv) { if (priv->arch == NV_ARCH_NV3) { // NV3: PFB_BOOT_0 bits 1:0 encode VRAM size uint32_t boot0 = nvReadMmio(priv, NV_PFB_BOOT_0); uint32_t sizeIdx = boot0 & 0x03; switch (sizeIdx) { case 0: return 8 * 1024 * 1024; case 1: return 2 * 1024 * 1024; case 2: return 4 * 1024 * 1024; default: return 4 * 1024 * 1024; } } // NV4/NV5: PFB_CFG_0 bits 1:0 encode VRAM size uint32_t cfg0 = nvReadMmio(priv, NV_PFB_CFG_0); uint32_t sizeIdx = cfg0 & 0x03; switch (sizeIdx) { case 0: return 32 * 1024 * 1024; case 1: return 4 * 1024 * 1024; case 2: return 8 * 1024 * 1024; case 3: return 16 * 1024 * 1024; default: return 4 * 1024 * 1024; } } // ============================================================ // nvHostBlit // ============================================================ // // CPU-to-screen blit via the ImageFromCpu subchannel. Transfers // pixel data from system memory to VRAM through the FIFO. static void nvHostBlit(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h) { NvPrivateT *priv = (NvPrivateT *)drv->privData; if (w <= 0 || h <= 0) { return; } int32_t rowBytes = w * priv->bytesPerPixel; int32_t dwordsPerRow = (rowBytes + 3) / 4; nvWaitIdle(drv); // Set up the image transfer nvWriteFifo(priv, NV_IMAGE_POINT, (uint32_t)dstX | ((uint32_t)dstY << 16)); nvWriteFifo(priv, NV_IMAGE_SIZE_OUT, (uint32_t)w | ((uint32_t)h << 16)); nvWriteFifo(priv, NV_IMAGE_SIZE_IN, (uint32_t)w | ((uint32_t)h << 16)); // Write pixel data row by row for (int32_t row = 0; row < h; row++) { const uint8_t *rowPtr = srcBuf + row * srcPitch; for (int32_t dw = 0; dw < dwordsPerRow; dw++) { int32_t byteOff = dw * 4; uint32_t data = 0; // Pack bytes into a dword (little-endian native order) for (int32_t b = 0; b < 4; b++) { if (byteOff + b < rowBytes) { data |= (uint32_t)rowPtr[byteOff + b] << (b * 8); } } // Write to the color data area; each dword goes to the // next sequential offset starting at NV_IMAGE_DATA. nvWriteFifo(priv, NV_IMAGE_DATA + (uint32_t)(dw * 4), data); } // Wait for engine between rows to avoid FIFO overflow nvWaitIdle(drv); } } // ============================================================ // nvInit // ============================================================ static bool nvInit(AccelDriverT *drv, const AccelModeRequestT *req) { NvPrivateT *priv = (NvPrivateT *)drv->privData; memset(priv, 0, sizeof(*priv)); // Determine architecture (NV3 vs NV4/NV5) if (drv->pciDev.deviceId == NV_RIVA_128 || drv->pciDev.deviceId == NV_RIVA_128_ZX) { priv->arch = NV_ARCH_NV3; } else { priv->arch = NV_ARCH_NV4; } // Get BAR0 (MMIO) and BAR1 (framebuffer) addresses uint32_t bar0 = pciRead32(drv->pciDev.bus, drv->pciDev.dev, drv->pciDev.func, PCI_BAR0); uint32_t bar1 = pciRead32(drv->pciDev.bus, drv->pciDev.dev, drv->pciDev.func, PCI_BAR1); priv->mmioPhysAddr = bar0 & 0xFFFFFFF0; priv->lfbPhysAddr = bar1 & 0xFFFFFFF0; // Size the framebuffer BAR uint32_t lfbBarSize = pciSizeBar(drv->pciDev.bus, drv->pciDev.dev, drv->pciDev.func, PCI_BAR1); // Enable bus mastering and memory space access uint16_t pciCmd = pciRead16(drv->pciDev.bus, drv->pciDev.dev, drv->pciDev.func, PCI_COMMAND); pciCmd |= PCI_CMD_MEM_ENABLE | PCI_CMD_BUS_MASTER; pciWrite16(drv->pciDev.bus, drv->pciDev.dev, drv->pciDev.func, PCI_COMMAND, pciCmd); // Map MMIO region (BAR0, 16MB) if (!dpmiMapFramebuffer(priv->mmioPhysAddr, NV_MMIO_SIZE, &priv->mmioMapping)) { return false; } priv->mmio = (volatile uint32_t *)priv->mmioMapping.ptr; priv->fifo = (volatile uint32_t *)(priv->mmioMapping.ptr + NV_FIFO_BASE); // Detect VRAM size priv->vramSize = nvDetectVram(priv); // Use whichever is smaller: the BAR size or detected VRAM if (lfbBarSize < priv->vramSize) { priv->vramSize = lfbBarSize; } // Set VESA mode VesaModeResultT vesa; if (!vesaFindAndSetMode(req->width, req->height, req->bpp, &vesa)) { dpmiUnmapFramebuffer(&priv->mmioMapping); return false; } // Map framebuffer (BAR1) if (!dpmiMapFramebuffer(priv->lfbPhysAddr, priv->vramSize, &priv->lfbMapping)) { vgaRestoreTextMode(); dpmiUnmapFramebuffer(&priv->mmioMapping); return false; } priv->bytesPerPixel = (vesa.bpp + 7) / 8; priv->screenPitch = vesa.pitch; drv->mode.width = vesa.width; drv->mode.height = vesa.height; drv->mode.bpp = vesa.bpp; drv->mode.pitch = vesa.pitch; drv->mode.framebuffer = priv->lfbMapping.ptr; drv->mode.vramSize = priv->vramSize; drv->mode.offscreenBase = vesa.pitch * vesa.height; // Reserve space for hardware cursor at end of VRAM priv->cursorOffset = priv->vramSize - NV_HW_CURSOR_BYTES; priv->cursorOffset &= ~(uint32_t)(NV_HW_CURSOR_BYTES - 1); // Initialize the 2D engine nvSetupEngine(priv); drv->caps = ACAP_RECT_FILL | ACAP_BITBLT | ACAP_HOST_BLIT | ACAP_HW_CURSOR | ACAP_CLIP; // Set full-screen clip nvSetClip(drv, 0, 0, vesa.width, vesa.height); nvWaitIdle(drv); return true; } // ============================================================ // nvMoveCursor // ============================================================ static void nvMoveCursor(AccelDriverT *drv, int32_t x, int32_t y) { NvPrivateT *priv = (NvPrivateT *)drv->privData; // PRAMDAC cursor position: bits 15:0 = X, bits 31:16 = Y // Negative values are handled by clamping to 0; the cursor // offset register could be used for sub-pixel adjustment but // that is not needed for typical use. if (x < 0) { x = 0; } if (y < 0) { y = 0; } nvWriteMmio(priv, NV_PRAMDAC_CURSOR_POS, (uint32_t)x | ((uint32_t)y << 16)); } // ============================================================ // nvReadMmio / nvWriteMmio // ============================================================ // // Direct MMIO register access via BAR0. static uint32_t nvReadMmio(NvPrivateT *priv, uint32_t offset) { return priv->mmio[offset / 4]; } static void nvWriteMmio(NvPrivateT *priv, uint32_t offset, uint32_t val) { priv->mmio[offset / 4] = val; } // ============================================================ // nvRectFill // ============================================================ // // Solid rectangle fill via the GdiRectangle subchannel. static void nvRectFill(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, uint32_t color) { NvPrivateT *priv = (NvPrivateT *)drv->privData; if (w <= 0 || h <= 0) { return; } nvWaitIdle(drv); nvWriteFifo(priv, NV_RECT_COLOR, color); nvWriteFifo(priv, NV_RECT_POINT, (uint32_t)x | ((uint32_t)y << 16)); nvWriteFifo(priv, NV_RECT_SIZE, (uint32_t)w | ((uint32_t)h << 16)); } // ============================================================ // nvSetClip // ============================================================ // // Set the hardware clip rectangle via the Clip subchannel. static void nvSetClip(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h) { NvPrivateT *priv = (NvPrivateT *)drv->privData; nvWaitIdle(drv); nvWriteFifo(priv, NV_CLIP_POINT, (uint32_t)x | ((uint32_t)y << 16)); nvWriteFifo(priv, NV_CLIP_SIZE, (uint32_t)w | ((uint32_t)h << 16)); } // ============================================================ // nvSetCursor // ============================================================ // // Upload a cursor image to VRAM and configure the PRAMDAC // to display it. The NV hardware cursor is 64x64, 2 bits per // pixel, stored in VRAM at the offset configured in // NV_PRAMDAC_CURSOR_START. // // 2bpp encoding: // 00 = cursor color 0 (background) // 01 = cursor color 1 (foreground) // 10 = transparent // 11 = inverted static void nvSetCursor(AccelDriverT *drv, const HwCursorImageT *image) { NvPrivateT *priv = (NvPrivateT *)drv->privData; if (!image) { nvShowCursor(drv, false); return; } nvWaitIdle(drv); // Write cursor image to VRAM at the reserved offset uint8_t *cursorMem = drv->mode.framebuffer + priv->cursorOffset; for (int32_t row = 0; row < NV_HW_CURSOR_SIZE; row++) { for (int32_t byteIdx = 0; byteIdx < 16; byteIdx++) { uint8_t val = 0xAA; // all transparent (10 pattern) if (row < image->height && byteIdx < (image->width + 3) / 4) { int32_t bitOff = byteIdx * 4; uint8_t andBits = 0; uint8_t xorBits = 0; if (bitOff / 8 < (image->width + 7) / 8) { andBits = image->andMask[row * 8 + bitOff / 8]; xorBits = image->xorMask[row * 8 + bitOff / 8]; } // Pack 4 pixels into one byte (2 bits each) val = 0; for (int32_t px = 0; px < 4; px++) { int32_t srcBit = (bitOff + px) % 8; uint8_t andBit = (andBits >> (7 - srcBit)) & 1; uint8_t xorBit = (xorBits >> (7 - srcBit)) & 1; uint8_t pixel; if (andBit && !xorBit) { pixel = 0x02; // transparent } else if (andBit && xorBit) { pixel = 0x03; // inverted } else if (!andBit && xorBit) { pixel = 0x01; // cursor color 1 } else { pixel = 0x00; // cursor color 0 } val |= pixel << (6 - px * 2); } } cursorMem[row * 16 + byteIdx] = val; } } // Point the PRAMDAC at the cursor image in VRAM nvWriteMmio(priv, NV_PRAMDAC_CURSOR_START, priv->cursorOffset); } // ============================================================ // nvSetupEngine // ============================================================ // // Initialize the 2D acceleration engine. Sets the ROP to copy // mode and prepares the FIFO subchannels for use. static void nvSetupEngine(NvPrivateT *priv) { // Set ROP to copy nvWriteFifo(priv, NV_ROP_ROP, NV_ROP_COPY); } // ============================================================ // nvShowCursor // ============================================================ static void nvShowCursor(AccelDriverT *drv, bool visible) { NvPrivateT *priv = (NvPrivateT *)drv->privData; uint32_t cfg = nvReadMmio(priv, NV_PRAMDAC_CURSOR_CFG); if (visible) { cfg |= NV_CURSOR_ENABLE; } else { cfg &= ~(uint32_t)NV_CURSOR_ENABLE; } nvWriteMmio(priv, NV_PRAMDAC_CURSOR_CFG, cfg); } // ============================================================ // nvShutdown // ============================================================ static void nvShutdown(AccelDriverT *drv) { NvPrivateT *priv = (NvPrivateT *)drv->privData; nvShowCursor(drv, false); vgaRestoreTextMode(); dpmiUnmapFramebuffer(&priv->lfbMapping); dpmiUnmapFramebuffer(&priv->mmioMapping); } // ============================================================ // nvWaitIdle // ============================================================ // // Wait for the PGRAPH engine to become idle by polling the // PGRAPH_STATUS register. static void nvWaitIdle(AccelDriverT *drv) { NvPrivateT *priv = (NvPrivateT *)drv->privData; for (int32_t i = 0; i < NV_MAX_IDLE_WAIT; i++) { if (nvReadMmio(priv, NV_PGRAPH_STATUS) == 0) { return; } } } // ============================================================ // nvWriteFifo // ============================================================ // // Write a value to the FIFO user space. The offset is relative // to the FIFO base (BAR0 + 0x800000). static void nvWriteFifo(NvPrivateT *priv, uint32_t offset, uint32_t val) { priv->fifo[offset / 4] = val; }