DOS_Video/nvidia.c
2026-04-13 19:40:45 -05:00

677 lines
22 KiB
C

// nvidia.c -- Nvidia RIVA 128/TNT/TNT2 accelerated video driver
//
// Supports the Nvidia RIVA family: RIVA 128, RIVA 128 ZX, TNT,
// TNT2, TNT2 Ultra, TNT2 M64, and Vanta. These were high-
// performance 2D/3D accelerators of the late 1990s featuring:
// - Solid rectangle fill
// - Screen-to-screen BitBLT
// - Host-to-screen blit (CPU data transfer)
// - Hardware clip rectangle
// - 64x64 two-color hardware cursor via PRAMDAC
//
// Register access:
// The NV architecture uses memory-mapped I/O via BAR0 (16MB
// MMIO register space) and BAR1 (framebuffer). The 2D engine
// is accessed through the FIFO user space at BAR0 + 0x800000,
// which provides subchannel-based access to graphics objects.
//
// Subchannel layout:
// Sub 0 (0x0000): ROP
// Sub 1 (0x2000): Clip
// Sub 2 (0x4000): Pattern
// Sub 3 (0x6000): GdiRectangle (solid fill)
// Sub 4 (0x8000): ScreenScreenBlt
// Sub 5 (0xA000): ImageFromCpu
//
// Each subchannel has methods starting at +0x0100 within
// its range. The PGRAPH_STATUS register at 0x400700 indicates
// engine busy status (0 = idle).
#include "accelVid.h"
#include "vgaCommon.h"
#include "pci.h"
#include <pc.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/nearptr.h>
// ============================================================
// Nvidia vendor/device IDs
// ============================================================
#define NV_VENDOR_ID 0x10DE
#define NV_RIVA_128 0x0018 // RIVA 128
#define NV_RIVA_128_ZX 0x0019 // RIVA 128 ZX
#define NV_TNT 0x0020 // RIVA TNT
#define NV_TNT2 0x0028 // RIVA TNT2
#define NV_TNT2_ULTRA 0x0029 // RIVA TNT2 Ultra
#define NV_TNT2_M64 0x002D // RIVA TNT2 M64
#define NV_VANTA 0x002C // Vanta
static const uint16_t sNvDeviceIds[] = {
NV_VENDOR_ID, NV_RIVA_128,
NV_VENDOR_ID, NV_RIVA_128_ZX,
NV_VENDOR_ID, NV_TNT,
NV_VENDOR_ID, NV_TNT2,
NV_VENDOR_ID, NV_TNT2_ULTRA,
NV_VENDOR_ID, NV_TNT2_M64,
NV_VENDOR_ID, NV_VANTA,
0, 0
};
// ============================================================
// MMIO register offsets (from BAR0)
// ============================================================
// PGRAPH status
#define NV_PGRAPH_STATUS 0x400700 // 0 = idle
// PRAMDAC hardware cursor
#define NV_PRAMDAC_CURSOR_CFG 0x680300 // bit 0 = enable, bits 2:1 = color mode
#define NV_PRAMDAC_CURSOR_POS 0x680320 // cursor X/Y position
// PRAMIN area -- cursor image storage offset in VRAM
// The cursor image lives at the top of VRAM, 1KB for 32x32 or 4KB for 64x64.
// PRAMDAC fetches it from the address configured in NV_PRAMDAC_CURSOR_START.
#define NV_PRAMDAC_CURSOR_START 0x680324 // cursor image VRAM offset
// PFB -- framebuffer config (for reading VRAM size)
#define NV_PFB_BOOT_0 0x100000 // boot config (NV3)
#define NV_PFB_CFG_0 0x100200 // framebuffer config (NV4/NV5)
// ============================================================
// FIFO user space offsets (from BAR0 + 0x800000)
// ============================================================
//
// Subchannel base addresses within the user FIFO area.
#define NV_FIFO_BASE 0x800000
// Subchannel 0: ROP
#define NV_ROP_SUBCHAN 0x0000
#define NV_ROP_ROP 0x0300 // raster operation
// Subchannel 1: Clip
#define NV_CLIP_SUBCHAN 0x2000
#define NV_CLIP_POINT 0x2300 // x | y<<16
#define NV_CLIP_SIZE 0x2304 // w | h<<16
// Subchannel 3: GdiRectangle (solid fill)
#define NV_RECT_SUBCHAN 0x6000
#define NV_RECT_COLOR 0x62FC // fill color
#define NV_RECT_POINT 0x6300 // x | y<<16
#define NV_RECT_SIZE 0x6304 // w | h<<16 (triggers fill)
// Subchannel 4: ScreenScreenBlt
#define NV_BLIT_SUBCHAN 0x8000
#define NV_BLIT_POINT_IN 0x8300 // srcX | srcY<<16
#define NV_BLIT_POINT_OUT 0x8304 // dstX | dstY<<16
#define NV_BLIT_SIZE 0x8308 // w | h<<16
// Subchannel 5: ImageFromCpu
#define NV_IMAGE_SUBCHAN 0xA000
#define NV_IMAGE_POINT 0xA300 // dstX | dstY<<16
#define NV_IMAGE_SIZE_OUT 0xA304 // w | h<<16
#define NV_IMAGE_SIZE_IN 0xA308 // srcW | srcH<<16
#define NV_IMAGE_DATA 0xA400 // color data (dwords)
// ============================================================
// Constants
// ============================================================
#define NV_ROP_COPY 0xCC // dest = src
#define NV_MMIO_SIZE 0x1000000 // 16MB MMIO region
#define NV_MAX_IDLE_WAIT 1000000
#define NV_HW_CURSOR_SIZE 64
#define NV_HW_CURSOR_BYTES (NV_HW_CURSOR_SIZE * NV_HW_CURSOR_SIZE * 2 / 8)
// Cursor config bits
#define NV_CURSOR_ENABLE 0x01
#define NV_CURSOR_MODE_2COLOR 0x00 // 2-color mode (bits 2:1 = 0)
// RIVA 128 (NV3) vs TNT (NV4/NV5) detection
#define NV_ARCH_NV3 3
#define NV_ARCH_NV4 4
// ============================================================
// Private driver state
// ============================================================
typedef struct {
volatile uint32_t *mmio; // mapped MMIO base (BAR0)
volatile uint32_t *fifo; // FIFO user space (BAR0 + 0x800000)
uint32_t mmioPhysAddr;
uint32_t lfbPhysAddr;
uint32_t vramSize;
uint32_t cursorOffset; // cursor image offset in VRAM
int32_t bytesPerPixel;
int32_t screenPitch;
int32_t arch; // NV_ARCH_NV3 or NV_ARCH_NV4
DpmiMappingT mmioMapping;
DpmiMappingT lfbMapping;
} NvPrivateT;
// ============================================================
// Prototypes
// ============================================================
static void nvBitBlt(AccelDriverT *drv, int32_t srcX, int32_t srcY, int32_t dstX, int32_t dstY, int32_t w, int32_t h);
static bool nvDetect(AccelDriverT *drv);
static uint32_t nvDetectVram(NvPrivateT *priv);
static void nvHostBlit(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h);
static bool nvInit(AccelDriverT *drv, const AccelModeRequestT *req);
static void nvMoveCursor(AccelDriverT *drv, int32_t x, int32_t y);
static void nvRectFill(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, uint32_t color);
static void nvSetClip(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h);
static void nvSetCursor(AccelDriverT *drv, const HwCursorImageT *image);
static void nvSetupEngine(NvPrivateT *priv);
static void nvShowCursor(AccelDriverT *drv, bool visible);
static void nvShutdown(AccelDriverT *drv);
static void nvWaitIdle(AccelDriverT *drv);
static void nvWriteFifo(NvPrivateT *priv, uint32_t offset, uint32_t val);
static uint32_t nvReadMmio(NvPrivateT *priv, uint32_t offset);
static void nvWriteMmio(NvPrivateT *priv, uint32_t offset, uint32_t val);
// ============================================================
// Driver instance
// ============================================================
static NvPrivateT sNvPrivate;
static AccelDriverT sNvDriver = {
.name = "Nvidia RIVA",
.chipFamily = "nvidia",
.caps = 0,
.privData = &sNvPrivate,
.detect = nvDetect,
.init = nvInit,
.shutdown = nvShutdown,
.waitIdle = nvWaitIdle,
.setClip = nvSetClip,
.rectFill = nvRectFill,
.rectFillPat = NULL,
.bitBlt = nvBitBlt,
.hostBlit = nvHostBlit,
.colorExpand = NULL,
.lineDraw = NULL,
.setCursor = nvSetCursor,
.moveCursor = nvMoveCursor,
.showCursor = nvShowCursor,
};
// ============================================================
// nvRegisterDriver
// ============================================================
void nvRegisterDriver(void) {
accelRegisterDriver(&sNvDriver);
}
// ============================================================
// nvBitBlt
// ============================================================
//
// Screen-to-screen blit via the ScreenScreenBlt subchannel.
// The NV engine handles overlapping source/destination regions
// internally when the blit direction is set appropriately.
static void nvBitBlt(AccelDriverT *drv, int32_t srcX, int32_t srcY, int32_t dstX, int32_t dstY, int32_t w, int32_t h) {
NvPrivateT *priv = (NvPrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
nvWaitIdle(drv);
nvWriteFifo(priv, NV_BLIT_POINT_IN, (uint32_t)srcX | ((uint32_t)srcY << 16));
nvWriteFifo(priv, NV_BLIT_POINT_OUT, (uint32_t)dstX | ((uint32_t)dstY << 16));
nvWriteFifo(priv, NV_BLIT_SIZE, (uint32_t)w | ((uint32_t)h << 16));
}
// ============================================================
// nvDetect
// ============================================================
static bool nvDetect(AccelDriverT *drv) {
int32_t matchIdx;
if (!pciFindDeviceList(sNvDeviceIds, &drv->pciDev, &matchIdx)) {
return false;
}
switch (drv->pciDev.deviceId) {
case NV_RIVA_128:
drv->name = "Nvidia RIVA 128";
break;
case NV_RIVA_128_ZX:
drv->name = "Nvidia RIVA 128 ZX";
break;
case NV_TNT:
drv->name = "Nvidia RIVA TNT";
break;
case NV_TNT2:
drv->name = "Nvidia RIVA TNT2";
break;
case NV_TNT2_ULTRA:
drv->name = "Nvidia RIVA TNT2 Ultra";
break;
case NV_TNT2_M64:
drv->name = "Nvidia RIVA TNT2 M64";
break;
case NV_VANTA:
drv->name = "Nvidia Vanta";
break;
default:
drv->name = "Nvidia RIVA";
break;
}
return true;
}
// ============================================================
// nvDetectVram
// ============================================================
//
// Read VRAM size from the PFB registers. NV3 (RIVA 128) uses
// PFB_BOOT_0, while NV4/NV5 (TNT/TNT2) use PFB_CFG_0.
static uint32_t nvDetectVram(NvPrivateT *priv) {
if (priv->arch == NV_ARCH_NV3) {
// NV3: PFB_BOOT_0 bits 1:0 encode VRAM size
uint32_t boot0 = nvReadMmio(priv, NV_PFB_BOOT_0);
uint32_t sizeIdx = boot0 & 0x03;
switch (sizeIdx) {
case 0: return 8 * 1024 * 1024;
case 1: return 2 * 1024 * 1024;
case 2: return 4 * 1024 * 1024;
default: return 4 * 1024 * 1024;
}
}
// NV4/NV5: PFB_CFG_0 bits 1:0 encode VRAM size
uint32_t cfg0 = nvReadMmio(priv, NV_PFB_CFG_0);
uint32_t sizeIdx = cfg0 & 0x03;
switch (sizeIdx) {
case 0: return 32 * 1024 * 1024;
case 1: return 4 * 1024 * 1024;
case 2: return 8 * 1024 * 1024;
case 3: return 16 * 1024 * 1024;
default: return 4 * 1024 * 1024;
}
}
// ============================================================
// nvHostBlit
// ============================================================
//
// CPU-to-screen blit via the ImageFromCpu subchannel. Transfers
// pixel data from system memory to VRAM through the FIFO.
static void nvHostBlit(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h) {
NvPrivateT *priv = (NvPrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
int32_t rowBytes = w * priv->bytesPerPixel;
int32_t dwordsPerRow = (rowBytes + 3) / 4;
nvWaitIdle(drv);
// Set up the image transfer
nvWriteFifo(priv, NV_IMAGE_POINT, (uint32_t)dstX | ((uint32_t)dstY << 16));
nvWriteFifo(priv, NV_IMAGE_SIZE_OUT, (uint32_t)w | ((uint32_t)h << 16));
nvWriteFifo(priv, NV_IMAGE_SIZE_IN, (uint32_t)w | ((uint32_t)h << 16));
// Write pixel data row by row
for (int32_t row = 0; row < h; row++) {
const uint8_t *rowPtr = srcBuf + row * srcPitch;
for (int32_t dw = 0; dw < dwordsPerRow; dw++) {
int32_t byteOff = dw * 4;
uint32_t data = 0;
// Pack bytes into a dword (little-endian native order)
for (int32_t b = 0; b < 4; b++) {
if (byteOff + b < rowBytes) {
data |= (uint32_t)rowPtr[byteOff + b] << (b * 8);
}
}
// Write to the color data area; each dword goes to the
// next sequential offset starting at NV_IMAGE_DATA.
nvWriteFifo(priv, NV_IMAGE_DATA + (uint32_t)(dw * 4), data);
}
// Wait for engine between rows to avoid FIFO overflow
nvWaitIdle(drv);
}
}
// ============================================================
// nvInit
// ============================================================
static bool nvInit(AccelDriverT *drv, const AccelModeRequestT *req) {
NvPrivateT *priv = (NvPrivateT *)drv->privData;
memset(priv, 0, sizeof(*priv));
// Determine architecture (NV3 vs NV4/NV5)
if (drv->pciDev.deviceId == NV_RIVA_128 || drv->pciDev.deviceId == NV_RIVA_128_ZX) {
priv->arch = NV_ARCH_NV3;
} else {
priv->arch = NV_ARCH_NV4;
}
// Get BAR0 (MMIO) and BAR1 (framebuffer) addresses
uint32_t bar0 = pciRead32(drv->pciDev.bus, drv->pciDev.dev, drv->pciDev.func, PCI_BAR0);
uint32_t bar1 = pciRead32(drv->pciDev.bus, drv->pciDev.dev, drv->pciDev.func, PCI_BAR1);
priv->mmioPhysAddr = bar0 & 0xFFFFFFF0;
priv->lfbPhysAddr = bar1 & 0xFFFFFFF0;
// Size the framebuffer BAR
uint32_t lfbBarSize = pciSizeBar(drv->pciDev.bus, drv->pciDev.dev, drv->pciDev.func, PCI_BAR1);
// Enable bus mastering and memory space access
uint16_t pciCmd = pciRead16(drv->pciDev.bus, drv->pciDev.dev, drv->pciDev.func, PCI_COMMAND);
pciCmd |= PCI_CMD_MEM_ENABLE | PCI_CMD_BUS_MASTER;
pciWrite16(drv->pciDev.bus, drv->pciDev.dev, drv->pciDev.func, PCI_COMMAND, pciCmd);
// Map MMIO region (BAR0, 16MB)
if (!dpmiMapFramebuffer(priv->mmioPhysAddr, NV_MMIO_SIZE, &priv->mmioMapping)) {
return false;
}
priv->mmio = (volatile uint32_t *)priv->mmioMapping.ptr;
priv->fifo = (volatile uint32_t *)(priv->mmioMapping.ptr + NV_FIFO_BASE);
// Detect VRAM size
priv->vramSize = nvDetectVram(priv);
// Use whichever is smaller: the BAR size or detected VRAM
if (lfbBarSize < priv->vramSize) {
priv->vramSize = lfbBarSize;
}
// Set VESA mode
VesaModeResultT vesa;
if (!vesaFindAndSetMode(req->width, req->height, req->bpp, &vesa)) {
dpmiUnmapFramebuffer(&priv->mmioMapping);
return false;
}
// Map framebuffer (BAR1)
if (!dpmiMapFramebuffer(priv->lfbPhysAddr, priv->vramSize, &priv->lfbMapping)) {
vgaRestoreTextMode();
dpmiUnmapFramebuffer(&priv->mmioMapping);
return false;
}
priv->bytesPerPixel = (vesa.bpp + 7) / 8;
priv->screenPitch = vesa.pitch;
drv->mode.width = vesa.width;
drv->mode.height = vesa.height;
drv->mode.bpp = vesa.bpp;
drv->mode.pitch = vesa.pitch;
drv->mode.framebuffer = priv->lfbMapping.ptr;
drv->mode.vramSize = priv->vramSize;
drv->mode.offscreenBase = vesa.pitch * vesa.height;
// Reserve space for hardware cursor at end of VRAM
priv->cursorOffset = priv->vramSize - NV_HW_CURSOR_BYTES;
priv->cursorOffset &= ~(uint32_t)(NV_HW_CURSOR_BYTES - 1);
// Initialize the 2D engine
nvSetupEngine(priv);
drv->caps = ACAP_RECT_FILL
| ACAP_BITBLT
| ACAP_HOST_BLIT
| ACAP_HW_CURSOR
| ACAP_CLIP;
// Set full-screen clip
nvSetClip(drv, 0, 0, vesa.width, vesa.height);
nvWaitIdle(drv);
return true;
}
// ============================================================
// nvMoveCursor
// ============================================================
static void nvMoveCursor(AccelDriverT *drv, int32_t x, int32_t y) {
NvPrivateT *priv = (NvPrivateT *)drv->privData;
// PRAMDAC cursor position: bits 15:0 = X, bits 31:16 = Y
// Negative values are handled by clamping to 0; the cursor
// offset register could be used for sub-pixel adjustment but
// that is not needed for typical use.
if (x < 0) {
x = 0;
}
if (y < 0) {
y = 0;
}
nvWriteMmio(priv, NV_PRAMDAC_CURSOR_POS, (uint32_t)x | ((uint32_t)y << 16));
}
// ============================================================
// nvReadMmio / nvWriteMmio
// ============================================================
//
// Direct MMIO register access via BAR0.
static uint32_t nvReadMmio(NvPrivateT *priv, uint32_t offset) {
return priv->mmio[offset / 4];
}
static void nvWriteMmio(NvPrivateT *priv, uint32_t offset, uint32_t val) {
priv->mmio[offset / 4] = val;
}
// ============================================================
// nvRectFill
// ============================================================
//
// Solid rectangle fill via the GdiRectangle subchannel.
static void nvRectFill(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, uint32_t color) {
NvPrivateT *priv = (NvPrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
nvWaitIdle(drv);
nvWriteFifo(priv, NV_RECT_COLOR, color);
nvWriteFifo(priv, NV_RECT_POINT, (uint32_t)x | ((uint32_t)y << 16));
nvWriteFifo(priv, NV_RECT_SIZE, (uint32_t)w | ((uint32_t)h << 16));
}
// ============================================================
// nvSetClip
// ============================================================
//
// Set the hardware clip rectangle via the Clip subchannel.
static void nvSetClip(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h) {
NvPrivateT *priv = (NvPrivateT *)drv->privData;
nvWaitIdle(drv);
nvWriteFifo(priv, NV_CLIP_POINT, (uint32_t)x | ((uint32_t)y << 16));
nvWriteFifo(priv, NV_CLIP_SIZE, (uint32_t)w | ((uint32_t)h << 16));
}
// ============================================================
// nvSetCursor
// ============================================================
//
// Upload a cursor image to VRAM and configure the PRAMDAC
// to display it. The NV hardware cursor is 64x64, 2 bits per
// pixel, stored in VRAM at the offset configured in
// NV_PRAMDAC_CURSOR_START.
//
// 2bpp encoding:
// 00 = cursor color 0 (background)
// 01 = cursor color 1 (foreground)
// 10 = transparent
// 11 = inverted
static void nvSetCursor(AccelDriverT *drv, const HwCursorImageT *image) {
NvPrivateT *priv = (NvPrivateT *)drv->privData;
if (!image) {
nvShowCursor(drv, false);
return;
}
nvWaitIdle(drv);
// Write cursor image to VRAM at the reserved offset
uint8_t *cursorMem = drv->mode.framebuffer + priv->cursorOffset;
for (int32_t row = 0; row < NV_HW_CURSOR_SIZE; row++) {
for (int32_t byteIdx = 0; byteIdx < 16; byteIdx++) {
uint8_t val = 0xAA; // all transparent (10 pattern)
if (row < image->height && byteIdx < (image->width + 3) / 4) {
int32_t bitOff = byteIdx * 4;
uint8_t andBits = 0;
uint8_t xorBits = 0;
if (bitOff / 8 < (image->width + 7) / 8) {
andBits = image->andMask[row * 8 + bitOff / 8];
xorBits = image->xorMask[row * 8 + bitOff / 8];
}
// Pack 4 pixels into one byte (2 bits each)
val = 0;
for (int32_t px = 0; px < 4; px++) {
int32_t srcBit = (bitOff + px) % 8;
uint8_t andBit = (andBits >> (7 - srcBit)) & 1;
uint8_t xorBit = (xorBits >> (7 - srcBit)) & 1;
uint8_t pixel;
if (andBit && !xorBit) {
pixel = 0x02; // transparent
} else if (andBit && xorBit) {
pixel = 0x03; // inverted
} else if (!andBit && xorBit) {
pixel = 0x01; // cursor color 1
} else {
pixel = 0x00; // cursor color 0
}
val |= pixel << (6 - px * 2);
}
}
cursorMem[row * 16 + byteIdx] = val;
}
}
// Point the PRAMDAC at the cursor image in VRAM
nvWriteMmio(priv, NV_PRAMDAC_CURSOR_START, priv->cursorOffset);
}
// ============================================================
// nvSetupEngine
// ============================================================
//
// Initialize the 2D acceleration engine. Sets the ROP to copy
// mode and prepares the FIFO subchannels for use.
static void nvSetupEngine(NvPrivateT *priv) {
// Set ROP to copy
nvWriteFifo(priv, NV_ROP_ROP, NV_ROP_COPY);
}
// ============================================================
// nvShowCursor
// ============================================================
static void nvShowCursor(AccelDriverT *drv, bool visible) {
NvPrivateT *priv = (NvPrivateT *)drv->privData;
uint32_t cfg = nvReadMmio(priv, NV_PRAMDAC_CURSOR_CFG);
if (visible) {
cfg |= NV_CURSOR_ENABLE;
} else {
cfg &= ~(uint32_t)NV_CURSOR_ENABLE;
}
nvWriteMmio(priv, NV_PRAMDAC_CURSOR_CFG, cfg);
}
// ============================================================
// nvShutdown
// ============================================================
static void nvShutdown(AccelDriverT *drv) {
NvPrivateT *priv = (NvPrivateT *)drv->privData;
nvShowCursor(drv, false);
vgaRestoreTextMode();
dpmiUnmapFramebuffer(&priv->lfbMapping);
dpmiUnmapFramebuffer(&priv->mmioMapping);
}
// ============================================================
// nvWaitIdle
// ============================================================
//
// Wait for the PGRAPH engine to become idle by polling the
// PGRAPH_STATUS register.
static void nvWaitIdle(AccelDriverT *drv) {
NvPrivateT *priv = (NvPrivateT *)drv->privData;
for (int32_t i = 0; i < NV_MAX_IDLE_WAIT; i++) {
if (nvReadMmio(priv, NV_PGRAPH_STATUS) == 0) {
return;
}
}
}
// ============================================================
// nvWriteFifo
// ============================================================
//
// Write a value to the FIFO user space. The offset is relative
// to the FIFO base (BAR0 + 0x800000).
static void nvWriteFifo(NvPrivateT *priv, uint32_t offset, uint32_t val) {
priv->fifo[offset / 4] = val;
}