Initial commit.

This commit is contained in:
Scott Duensing 2026-04-13 19:40:45 -05:00
commit a20c488959
23 changed files with 11066 additions and 0 deletions

4
.gitignore vendored Normal file
View file

@ -0,0 +1,4 @@
.claude/
obj/
bin/
PLAN.md

52
Makefile Normal file
View file

@ -0,0 +1,52 @@
# Makefile -- DOS Accelerated Video Driver Framework
#
# DJGPP cross-compilation build matching DVX conventions.
# Produces demo.exe as the test application.
DJGPP_PREFIX = $(HOME)/djgpp/djgpp
CC = $(DJGPP_PREFIX)/bin/i586-pc-msdosdjgpp-gcc
CFLAGS = -O2 -Wall -Wextra -Werror -Wno-type-limits -Wno-sign-compare -Wno-format-truncation -march=i486 -mtune=i586
OBJDIR = obj
BINDIR = bin
# Source files
SRCS = pci.c vgaCommon.c accelVid.c s3Trio.c cirrusGd54.c cirrusLaguna.c atiMach64.c tsengW32.c matroxMga.c banshee.c nvidia.c trident.c sis.c demo.c
OBJS = $(patsubst %.c,$(OBJDIR)/%.o,$(SRCS))
TARGET = $(BINDIR)/demo.exe
.PHONY: all clean
all: $(TARGET)
$(TARGET): $(OBJS) | $(BINDIR)
$(CC) $(CFLAGS) -o $@ $(OBJS)
$(OBJDIR)/%.o: %.c | $(OBJDIR)
$(CC) $(CFLAGS) -c -o $@ $<
$(OBJDIR):
mkdir -p $(OBJDIR)
$(BINDIR):
mkdir -p $(BINDIR)
# Dependencies
$(OBJDIR)/pci.o: pci.c pci.h
$(OBJDIR)/vgaCommon.o: vgaCommon.c vgaCommon.h
$(OBJDIR)/accelVid.o: accelVid.c accelVid.h pci.h
$(OBJDIR)/s3Trio.o: s3Trio.c accelVid.h vgaCommon.h pci.h
$(OBJDIR)/cirrusGd54.o: cirrusGd54.c accelVid.h vgaCommon.h pci.h
$(OBJDIR)/atiMach64.o: atiMach64.c accelVid.h vgaCommon.h pci.h
$(OBJDIR)/tsengW32.o: tsengW32.c accelVid.h vgaCommon.h pci.h
$(OBJDIR)/matroxMga.o: matroxMga.c accelVid.h vgaCommon.h pci.h
$(OBJDIR)/banshee.o: banshee.c accelVid.h vgaCommon.h pci.h
$(OBJDIR)/nvidia.o: nvidia.c accelVid.h vgaCommon.h pci.h
$(OBJDIR)/trident.o: trident.c accelVid.h vgaCommon.h pci.h
$(OBJDIR)/cirrusLaguna.o: cirrusLaguna.c accelVid.h vgaCommon.h pci.h
$(OBJDIR)/sis.o: sis.c accelVid.h vgaCommon.h pci.h
$(OBJDIR)/demo.o: demo.c accelVid.h pci.h
clean:
rm -rf $(OBJDIR) $(BINDIR)

399
README.md Normal file
View file

@ -0,0 +1,399 @@
# DOS Accelerated Video Driver Framework
Hardware-accelerated 2D video drivers for DOS/DJGPP. Programs the
acceleration engines on PCI video cards directly -- no VESA, no BIOS
calls for rendering. A common API lets applications use acceleration
without knowing which chip is present.
## Supported Video Cards
### S3 (s3Trio.c)
| Chip | Device ID | Notes |
|------|-----------|-------|
| Trio32 | 0x8810 | |
| Trio64 | 0x8811 | MMIO at LFB+16MB |
| Trio64V+ | 0x8814 | MMIO at LFB+16MB |
| ViRGE | 0x5631 | MMIO, 3D engine ignored |
| ViRGE/VX | 0x883D | |
| ViRGE/DX/GX | 0x8A01 | |
| ViRGE/GX2 | 0x8A10 | |
| ViRGE/MX | 0x8C01, 0x8C03 | |
| Savage3D | 0x8A20, 0x8A21 | |
| Savage4 | 0x8A22 | |
| Savage/MX | 0x8C10, 0x8C11 | |
| Savage/IX | 0x8C12, 0x8C13 | |
| Savage 2000 | 0x9102 | |
| Vision864 | 0x88C0, 0x88C1 | I/O only (no MMIO) |
| Vision868 | 0x8880 | I/O only |
| Vision964 | 0x88D0 | I/O only |
| Vision968 | 0x88F0, 0x88F1 | I/O only |
Hardware ops: RectFill, PatFill, BitBlt, HostBlit, ColorExpand,
LineDraw, HwCursor, Clip
### ATI Mach64 / Rage (atiMach64.c)
| Chip | Device ID | Notes |
|------|-----------|-------|
| Mach64 GX | 0x4758 | I/O only |
| Mach64 CX | 0x4358 | I/O only |
| Mach64 CT | 0x4354 | MMIO at end of aperture |
| Mach64 ET | 0x4554 | |
| Mach64 VT | 0x5654, 0x5655 | |
| 3D Rage II | 0x4754, 0x4755 | |
| Rage Pro | 0x4750, 0x4752 | |
| Rage 128 | 0x5245, 0x5246, 0x524B, 0x524C | |
| Rage 128 Pro | 0x5046, 0x5052 | |
Hardware ops: RectFill, PatFill, BitBlt, HostBlit, ColorExpand,
LineDraw, HwCursor, Clip
### Matrox MGA (matroxMga.c)
| Chip | Device ID | Notes |
|------|-----------|-------|
| Millennium (MGA2064W) | 0x0519 | Separate MMIO BAR |
| Mystique (MGA1064SG) | 0x051A | |
| G100 | 0x1000, 0x1001 | |
| G200 | 0x0520, 0x0521 | |
| G400 | 0x0525 | |
| G450 | 0x2527 | |
Hardware ops: RectFill, PatFill, BitBlt, HostBlit, ColorExpand,
LineDraw, HwCursor, Clip
### 3dfx (banshee.c)
| Chip | Device ID | Notes |
|------|-----------|-------|
| Banshee | 0x0003 | MMIO + launch area for data |
| Voodoo3 | 0x0005 | |
Hardware ops: RectFill, PatFill, BitBlt, HostBlit, ColorExpand,
LineDraw, HwCursor, Clip
### Cirrus Logic GD54xx (cirrusGd54.c)
| Chip | Device ID | Notes |
|------|-----------|-------|
| GD5434 | 0x00A0, 0x00A8 | BLT via GR registers |
| GD5436 | 0x00AC | |
| GD5446 | 0x00B8 | |
| GD5480 | 0x00BC | |
Hardware ops: RectFill, BitBlt, HostBlit, ColorExpand, HwCursor
### Cirrus Logic Laguna (cirrusLaguna.c)
| Chip | Device ID | Notes |
|------|-----------|-------|
| GD5462 | 0x00D0 | MMIO, different engine from GD54xx |
| GD5464 | 0x00D4 | |
| GD5465 | 0x00D6 | |
Hardware ops: RectFill, BitBlt, HostBlit, ColorExpand, HwCursor, Clip
### Nvidia RIVA / TNT (nvidia.c)
| Chip | Device ID | Notes |
|------|-----------|-------|
| RIVA 128 | 0x0018 | PGRAPH subchannel interface |
| RIVA 128 ZX | 0x0019 | |
| TNT | 0x0020 | |
| TNT2 | 0x0028 | |
| TNT2 Ultra | 0x0029 | |
| TNT2 M64 | 0x002D | |
| Vanta | 0x002C | |
Hardware ops: RectFill, BitBlt, HostBlit, HwCursor, Clip
### Tseng ET4000/W32 (tsengW32.c)
| Chip | Device ID | Notes |
|------|-----------|-------|
| W32 | 0x3202 | ACL engine via I/O ports |
| W32i | 0x3205 | |
| W32p rev A | 0x3206 | HwCursor on W32p only |
| W32p rev B | 0x3207 | |
| W32p rev C | 0x3208 | |
| W32p rev D | 0x4702 | |
Hardware ops: RectFill, BitBlt, HostBlit, HwCursor (W32p only)
### Trident TGUI (trident.c)
| Chip | Device ID | Notes |
|------|-----------|-------|
| TGUI9440 | 0x9440 | GER engine via I/O ports |
| TGUI9660 | 0x9660 | |
| TGUI9680 | 0x9680 | |
| ProVidia 9685 | 0x9685 | |
| Blade3D | 0x9880 | |
| CyberBlade | 0x9910 | |
Hardware ops: RectFill, BitBlt, HostBlit, HwCursor
### SiS (sis.c)
| Chip | Device ID | Notes |
|------|-----------|-------|
| 6326 | 0x6326 | MMIO queue-based engine |
| 300 | 0x0300 | |
| 305 | 0x0305 | |
| 315 | 0x0315 | |
| 330 | 0x0330 | |
Hardware ops: RectFill, BitBlt, HostBlit, HwCursor, Clip
## Capability Matrix
Operations not implemented in hardware get automatic software fallbacks.
Every function pointer is always callable -- callers never need to
check for NULL.
| Operation | S3 | ATI | Matrox | 3dfx | CL 54xx | CL Laguna | Nvidia | Tseng | Trident | SiS |
|-----------|:--:|:---:|:------:|:----:|:-------:|:---------:|:------:|:-----:|:-------:|:---:|
| RectFill | HW | HW | HW | HW | HW | HW | HW | HW | HW | HW |
| PatFill | HW | HW | HW | HW | sw | sw | sw | sw | sw | sw |
| BitBlt | HW | HW | HW | HW | HW | HW | HW | HW | HW | HW |
| HostBlit | HW | HW | HW | HW | HW | HW | HW | HW | HW | HW |
| ColorExpand | HW | HW | HW | HW | HW | HW | sw | sw | sw | sw |
| LineDraw | HW | HW | HW | HW | sw | sw | sw | sw | sw | sw |
| HwCursor | HW | HW | HW | HW | HW | HW | HW | W32p | HW | HW |
| Clip | HW | HW | HW | HW | sw | HW | HW | sw | sw | HW |
HW = hardware accelerated, sw = software fallback
## API Usage
### Basic Lifecycle
```c
#include "accelVid.h"
// Declare registration functions for the drivers you want
extern void s3RegisterDriver(void);
extern void atiRegisterDriver(void);
// ... etc
int main(void) {
// 1. Register drivers (order = detection priority)
s3RegisterDriver();
atiRegisterDriver();
// 2. Detect hardware
AccelDriverT *drv = accelDetect();
if (!drv) {
printf("No supported video card found\n");
return 1;
}
// 3. Initialize with a video mode
AccelModeRequestT req;
req.width = 640;
req.height = 480;
req.bpp = 16;
if (!accelInit(drv, &req)) {
printf("Failed to set video mode\n");
return 1;
}
// Mode info is now available
printf("Mode: %dx%dx%d pitch=%d\n",
drv->mode.width, drv->mode.height,
drv->mode.bpp, drv->mode.pitch);
// 4. Draw
drv->rectFill(drv, 0, 0, 640, 480, 0x001F); // blue
drv->waitIdle(drv);
// 5. Shut down
accelShutdown(drv);
return 0;
}
```
### Drawing Operations
All drawing functions take the driver pointer as the first argument.
Colors are packed in the display's native pixel format.
```c
// Solid rectangle fill
drv->rectFill(drv, x, y, w, h, color);
// 8x8 mono pattern fill (1=fg, 0=bg, MSB first, 8 bytes)
uint8_t checkerboard[8] = {
0xAA, 0x55, 0xAA, 0x55,
0xAA, 0x55, 0xAA, 0x55
};
drv->rectFillPat(drv, x, y, w, h, checkerboard, fgColor, bgColor);
// Screen-to-screen blit (handles overlapping regions)
drv->bitBlt(drv, srcX, srcY, dstX, dstY, w, h);
// CPU-to-screen blit (transfer RAM buffer to VRAM)
// srcBuf = packed pixels in display format, srcPitch = byte stride
drv->hostBlit(drv, buffer, pitch, dstX, dstY, w, h);
// Monochrome color expansion (1bpp -> full color)
// Each 1-bit becomes fg, each 0-bit becomes bg
// srcBuf = packed MSB-first mono bitmap, srcPitch = byte stride
drv->colorExpand(drv, glyphData, 1, dstX, dstY, 8, 16, fg, bg);
// Bresenham line draw (inclusive endpoints)
drv->lineDraw(drv, x1, y1, x2, y2, color);
// Hardware clip rectangle
drv->setClip(drv, clipX, clipY, clipW, clipH);
```
### Hardware Cursor
```c
// Define a cursor image (64x64 max, AND/XOR masks)
HwCursorImageT cursor;
cursor.width = 16;
cursor.height = 16;
cursor.hotX = 0;
cursor.hotY = 0;
memset(cursor.andMask, 0xFF, sizeof(cursor.andMask)); // transparent
memset(cursor.xorMask, 0x00, sizeof(cursor.xorMask));
// ... fill in actual cursor shape ...
// Upload and enable
drv->setCursor(drv, &cursor);
drv->showCursor(drv, true);
// Move (call on every mouse poll)
drv->moveCursor(drv, mouseX, mouseY);
// Hide
drv->showCursor(drv, false);
```
### Checking Capabilities
The `caps` field indicates which operations are hardware-accelerated.
Software fallbacks are always installed, so you can call any operation
regardless of caps. Use caps to make optimization decisions:
```c
if (drv->caps & ACAP_COLOR_EXPAND) {
// Use color expansion for text -- 16x less bus traffic
drv->colorExpand(drv, glyph, 1, x, y, 8, 16, fg, bg);
} else {
// Software fallback is installed but may be slow --
// consider pre-rendering text to a RAM buffer instead
drv->colorExpand(drv, glyph, 1, x, y, 8, 16, fg, bg);
}
if (drv->caps & ACAP_HW_CURSOR) {
// Hardware cursor eliminates cursor dirty rectangles
drv->setCursor(drv, &cursorImage);
drv->showCursor(drv, true);
}
```
### Synchronization
The acceleration engine runs asynchronously. Drawing functions return
immediately after queuing the command. Use `waitIdle` before reading
from VRAM or when you need all pending operations to complete:
```c
drv->rectFill(drv, 0, 0, 100, 100, color1);
drv->rectFill(drv, 50, 50, 100, 100, color2);
drv->bitBlt(drv, 0, 0, 200, 0, 150, 150);
// Wait for everything to finish before reading VRAM
drv->waitIdle(drv);
uint16_t pixel = *(uint16_t *)(drv->mode.framebuffer + offset);
```
### Mode Information
After `accelInit` succeeds, `drv->mode` contains:
| Field | Description |
|-------|-------------|
| `width` | Horizontal resolution in pixels |
| `height` | Vertical resolution in pixels |
| `bpp` | Bits per pixel (8, 15, 16, or 32) |
| `pitch` | Bytes per scanline (may exceed width * bpp/8) |
| `framebuffer` | Direct pointer to the linear framebuffer |
| `vramSize` | Total video RAM in bytes |
| `offscreenBase` | Byte offset where offscreen VRAM begins |
The framebuffer pointer can be used for direct pixel access when
the acceleration engine doesn't offer a suitable operation.
## Adding a New Driver
1. Create a new source file (e.g., `newchip.c`)
2. Include `accelVid.h`, `vgaCommon.h`, and `pci.h`
3. Define a static `AccelDriverT` with your function pointers
4. Use shared helpers for boilerplate:
- `vesaFindAndSetMode()` for VESA mode enumeration and setting
- `dpmiMapFramebuffer()` for DPMI physical address mapping
- `pciSizeBar()` for PCI BAR size detection
5. Leave unsupported operations as NULL -- the driver manager
installs software fallbacks automatically
6. Add a registration function: `void newchipRegisterDriver(void)`
7. Add the source file to the Makefile and call the registration
function from `main()`
See `trident.c` (simplest driver) or `matroxMga.c` (most complete)
as reference implementations.
## Building
Requires a DJGPP cross-compiler targeting i586-pc-msdosdjgpp.
```
make # build bin/demo.exe
make clean # remove build artifacts
```
The Makefile expects the DJGPP toolchain at `$HOME/djgpp/djgpp`.
Override with `make DJGPP_PREFIX=/path/to/djgpp`.
Compiler flags: `-O2 -Wall -Wextra -Werror -march=i486 -mtune=i586`
## Testing
The `test/` directory contains an 86Box configuration for testing
with an emulated S3 Trio64. See `test/README.txt` for setup
instructions.
```
demo.exe [width height bpp]
```
Default mode: 640x480x16. Controls: SPACE cycles demos, B runs
benchmarks, ESC exits.
## Project Structure
```
accelVid.h Driver abstraction and manager API
accelVid.c Driver manager, software fallbacks
pci.h / pci.c PCI configuration space access
vgaCommon.h / .c Shared VGA registers, VESA, DPMI helpers
s3Trio.c S3 Trio/ViRGE/Savage/Vision driver
atiMach64.c ATI Mach64 / Rage driver
matroxMga.c Matrox Millennium / Mystique / G-series driver
banshee.c 3dfx Banshee / Voodoo3 driver
cirrusGd54.c Cirrus Logic GD5434/36/46/80 driver
cirrusLaguna.c Cirrus Logic Laguna GD5462/64/65 driver
nvidia.c Nvidia RIVA 128 / TNT family driver
tsengW32.c Tseng ET4000/W32 family driver
trident.c Trident TGUI / Blade / CyberBlade driver
sis.c SiS 6326/300/315 driver
demo.c Test/demo application
Makefile DJGPP cross-compilation build
PLAN.md Architecture plan and chipset reference
test/ 86Box test configuration and setup guide
```

574
accelVid.c Normal file
View file

@ -0,0 +1,574 @@
// accelVid.c -- Accelerated video driver manager
//
// Manages registration, detection, and lifecycle of hardware-specific
// video drivers. Drivers register themselves at startup, then the
// manager probes each in order to find matching hardware.
//
// After a chip driver's init() succeeds, the manager fills in
// software fallback implementations for any drawing operations
// the driver left as NULL. This means callers never need to
// check function pointers -- every operation is always callable.
// The fallbacks draw directly to the LFB using simple loops.
#include "accelVid.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// Maximum number of registered drivers. This is more than enough
// for all chip families we'll ever support.
#define MAX_DRIVERS 32
// ============================================================
// Prototypes -- public API
// ============================================================
AccelDriverT *accelDetect(void);
uint32_t accelGetCaps(const AccelDriverT *drv);
const char *accelGetName(const AccelDriverT *drv);
bool accelInit(AccelDriverT *drv, const AccelModeRequestT *req);
void accelRegisterDriver(AccelDriverT *drv);
void accelShutdown(AccelDriverT *drv);
// ============================================================
// Prototypes -- software fallbacks
// ============================================================
static void swBitBlt(AccelDriverT *drv, int32_t srcX, int32_t srcY, int32_t dstX, int32_t dstY, int32_t w, int32_t h);
static void swColorExpand(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h, uint32_t fg, uint32_t bg);
static void swHostBlit(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h);
static void swLineDraw(AccelDriverT *drv, int32_t x1, int32_t y1, int32_t x2, int32_t y2, uint32_t color);
static void swRectFill(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, uint32_t color);
static void swRectFillPat(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, const uint8_t *pattern, uint32_t fg, uint32_t bg);
static void swSetClip(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h);
static void swWaitIdle(AccelDriverT *drv);
static void swInstallFallbacks(AccelDriverT *drv);
// ============================================================
// Inline helpers for software fallbacks
// ============================================================
// Write a pixel at (x, y) in the framebuffer. No bounds checking
// -- the caller must clip before calling.
static inline void swPutPixel(AccelDriverT *drv, int32_t x, int32_t y, uint32_t color) {
uint8_t *fb = drv->mode.framebuffer;
int32_t bpp = (drv->mode.bpp + 7) / 8;
uint8_t *dst = fb + y * drv->mode.pitch + x * bpp;
switch (bpp) {
case 1:
*dst = (uint8_t)color;
break;
case 2:
*(uint16_t *)dst = (uint16_t)color;
break;
case 4:
*(uint32_t *)dst = color;
break;
}
}
// ============================================================
// Module state
// ============================================================
static AccelDriverT *sDrivers[MAX_DRIVERS];
static int32_t sDriverCount = 0;
// Software clip rectangle (used by fallbacks when no hardware clip)
static int32_t sClipX = 0;
static int32_t sClipY = 0;
static int32_t sClipW = 0;
static int32_t sClipH = 0;
// ============================================================
// accelDetect
// ============================================================
//
// Iterates all registered drivers and calls detect() on each.
// Returns the first driver that claims the hardware, or NULL
// if no supported hardware is found.
//
// Detection order matters: drivers registered first are tried
// first. This allows callers to prioritize specific drivers
// (e.g. prefer S3 over generic VESA).
AccelDriverT *accelDetect(void) {
if (!pciDetect()) {
fprintf(stderr, "accelVid: PCI bus not detected\n");
return NULL;
}
for (int32_t i = 0; i < sDriverCount; i++) {
if (sDrivers[i]->detect(sDrivers[i])) {
printf("accelVid: Detected %s (PCI %02X:%02X.%X, "
"vendor=%04X device=%04X)\n",
sDrivers[i]->name,
sDrivers[i]->pciDev.bus,
sDrivers[i]->pciDev.dev,
sDrivers[i]->pciDev.func,
sDrivers[i]->pciDev.vendorId,
sDrivers[i]->pciDev.deviceId);
return sDrivers[i];
}
}
fprintf(stderr, "accelVid: No supported video hardware found\n");
return NULL;
}
// ============================================================
// accelGetCaps
// ============================================================
uint32_t accelGetCaps(const AccelDriverT *drv) {
if (!drv) {
return 0;
}
return drv->caps;
}
// ============================================================
// accelGetName
// ============================================================
const char *accelGetName(const AccelDriverT *drv) {
if (!drv) {
return "none";
}
return drv->name;
}
// ============================================================
// accelInit
// ============================================================
bool accelInit(AccelDriverT *drv, const AccelModeRequestT *req) {
if (!drv || !drv->init) {
return false;
}
memset(&drv->mode, 0, sizeof(drv->mode));
if (!drv->init(drv, req)) {
fprintf(stderr, "accelVid: Failed to initialize %s\n", drv->name);
return false;
}
printf("accelVid: Initialized %s at %ldx%ldx%ld (pitch=%ld, vram=%luKB)\n",
drv->name,
(long)drv->mode.width,
(long)drv->mode.height,
(long)drv->mode.bpp,
(long)drv->mode.pitch,
(unsigned long)(drv->mode.vramSize / 1024));
// Report capabilities
printf("accelVid: Capabilities:");
if (drv->caps & ACAP_RECT_FILL) {
printf(" RectFill");
}
if (drv->caps & ACAP_RECT_FILL_PAT) {
printf(" PatFill");
}
if (drv->caps & ACAP_BITBLT) {
printf(" BitBlt");
}
if (drv->caps & ACAP_COLOR_EXPAND) {
printf(" ColorExpand");
}
if (drv->caps & ACAP_LINE_DRAW) {
printf(" LineDraw");
}
if (drv->caps & ACAP_HW_CURSOR) {
printf(" HwCursor");
}
if (drv->caps & ACAP_HOST_BLIT) {
printf(" HostBlit");
}
if (drv->caps & ACAP_CLIP) {
printf(" Clip");
}
if (drv->caps & ACAP_TRANSPARENCY) {
printf(" Transparency");
}
printf("\n");
// Install software fallbacks for any operations the driver
// didn't implement in hardware
swInstallFallbacks(drv);
return true;
}
// ============================================================
// accelRegisterDriver
// ============================================================
void accelRegisterDriver(AccelDriverT *drv) {
if (sDriverCount >= MAX_DRIVERS) {
fprintf(stderr, "accelVid: Too many drivers registered (max %d)\n",
MAX_DRIVERS);
return;
}
sDrivers[sDriverCount++] = drv;
}
// ============================================================
// accelShutdown
// ============================================================
void accelShutdown(AccelDriverT *drv) {
if (!drv) {
return;
}
if (drv->waitIdle) {
drv->waitIdle(drv);
}
if (drv->showCursor) {
drv->showCursor(drv, false);
}
if (drv->shutdown) {
drv->shutdown(drv);
}
memset(&drv->mode, 0, sizeof(drv->mode));
}
// ============================================================
// Software fallback implementations
// ============================================================
//
// These draw directly to the LFB. They're correct but slow
// (uncached PCI writes). The point isn't performance -- it's
// ensuring every operation is always callable so the caller
// never needs to check for NULL function pointers.
// ============================================================
// swBitBlt
// ============================================================
//
// Screen-to-screen blit via the LFB. Handles overlapping regions
// by choosing copy direction.
static void swBitBlt(AccelDriverT *drv, int32_t srcX, int32_t srcY, int32_t dstX, int32_t dstY, int32_t w, int32_t h) {
if (w <= 0 || h <= 0) {
return;
}
uint8_t *fb = drv->mode.framebuffer;
int32_t pitch = drv->mode.pitch;
int32_t bpp = (drv->mode.bpp + 7) / 8;
int32_t rowBytes = w * bpp;
if (dstY < srcY || (dstY == srcY && dstX <= srcX)) {
// Copy forward (top to bottom, left to right)
for (int32_t row = 0; row < h; row++) {
uint8_t *src = fb + (srcY + row) * pitch + srcX * bpp;
uint8_t *dst = fb + (dstY + row) * pitch + dstX * bpp;
memmove(dst, src, rowBytes);
}
} else {
// Copy backward (bottom to top)
for (int32_t row = h - 1; row >= 0; row--) {
uint8_t *src = fb + (srcY + row) * pitch + srcX * bpp;
uint8_t *dst = fb + (dstY + row) * pitch + dstX * bpp;
memmove(dst, src, rowBytes);
}
}
}
// ============================================================
// swColorExpand
// ============================================================
//
// Monochrome-to-color expansion via the LFB. Each 1-bit in srcBuf
// becomes the fg color, each 0-bit becomes the bg color.
static void swColorExpand(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h, uint32_t fg, uint32_t bg) {
if (w <= 0 || h <= 0) {
return;
}
uint8_t *fb = drv->mode.framebuffer;
int32_t pitch = drv->mode.pitch;
int32_t bpp = (drv->mode.bpp + 7) / 8;
for (int32_t row = 0; row < h; row++) {
const uint8_t *mono = srcBuf + row * srcPitch;
uint8_t *dst = fb + (dstY + row) * pitch + dstX * bpp;
for (int32_t col = 0; col < w; col++) {
int32_t byteIdx = col / 8;
int32_t bitIdx = 7 - (col % 8);
uint32_t color = (mono[byteIdx] >> bitIdx) & 1 ? fg : bg;
switch (bpp) {
case 1:
dst[col] = (uint8_t)color;
break;
case 2:
((uint16_t *)dst)[col] = (uint16_t)color;
break;
case 4:
((uint32_t *)dst)[col] = color;
break;
}
}
}
}
// ============================================================
// swHostBlit
// ============================================================
//
// CPU-to-screen blit via the LFB. Just a memcpy per scanline.
static void swHostBlit(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h) {
if (w <= 0 || h <= 0) {
return;
}
uint8_t *fb = drv->mode.framebuffer;
int32_t pitch = drv->mode.pitch;
int32_t bpp = (drv->mode.bpp + 7) / 8;
int32_t rowBytes = w * bpp;
for (int32_t row = 0; row < h; row++) {
const uint8_t *src = srcBuf + row * srcPitch;
uint8_t *dst = fb + (dstY + row) * pitch + dstX * bpp;
memcpy(dst, src, rowBytes);
}
}
// ============================================================
// swLineDraw
// ============================================================
//
// Bresenham line draw via the LFB.
static void swLineDraw(AccelDriverT *drv, int32_t x1, int32_t y1, int32_t x2, int32_t y2, uint32_t color) {
int32_t dx = abs(x2 - x1);
int32_t dy = abs(y2 - y1);
int32_t sx = (x1 < x2) ? 1 : -1;
int32_t sy = (y1 < y2) ? 1 : -1;
int32_t err = dx - dy;
int32_t x = x1;
int32_t y = y1;
for (;;) {
if (x >= sClipX && x < sClipX + sClipW &&
y >= sClipY && y < sClipY + sClipH) {
swPutPixel(drv, x, y, color);
}
if (x == x2 && y == y2) {
break;
}
int32_t e2 = 2 * err;
if (e2 > -dy) {
err -= dy;
x += sx;
}
if (e2 < dx) {
err += dx;
y += sy;
}
}
}
// ============================================================
// swRectFill
// ============================================================
//
// Solid rectangle fill via the LFB.
static void swRectFill(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, uint32_t color) {
if (w <= 0 || h <= 0) {
return;
}
uint8_t *fb = drv->mode.framebuffer;
int32_t pitch = drv->mode.pitch;
int32_t bpp = (drv->mode.bpp + 7) / 8;
for (int32_t row = 0; row < h; row++) {
uint8_t *dst = fb + (y + row) * pitch + x * bpp;
switch (bpp) {
case 1:
memset(dst, (uint8_t)color, w);
break;
case 2: {
uint16_t *dst16 = (uint16_t *)dst;
for (int32_t col = 0; col < w; col++) {
dst16[col] = (uint16_t)color;
}
break;
}
case 4: {
uint32_t *dst32 = (uint32_t *)dst;
for (int32_t col = 0; col < w; col++) {
dst32[col] = color;
}
break;
}
}
}
}
// ============================================================
// swRectFillPat
// ============================================================
//
// 8x8 monochrome pattern fill via the LFB. The pattern is 8 bytes,
// one bit per pixel, MSB-first, row 0 first. Each 1-bit gets the
// fg color, each 0-bit gets the bg color. The pattern tiles across
// the destination rectangle with alignment to screen coordinates
// (so patterns line up across adjacent fills).
static void swRectFillPat(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, const uint8_t *pattern, uint32_t fg, uint32_t bg) {
if (w <= 0 || h <= 0) {
return;
}
uint8_t *fb = drv->mode.framebuffer;
int32_t pitch = drv->mode.pitch;
int32_t bpp = (drv->mode.bpp + 7) / 8;
for (int32_t row = 0; row < h; row++) {
uint8_t patRow = pattern[(y + row) & 7];
uint8_t *dst = fb + (y + row) * pitch + x * bpp;
for (int32_t col = 0; col < w; col++) {
int32_t patBit = 7 - ((x + col) & 7);
uint32_t color = (patRow >> patBit) & 1 ? fg : bg;
switch (bpp) {
case 1:
dst[col] = (uint8_t)color;
break;
case 2:
((uint16_t *)dst)[col] = (uint16_t)color;
break;
case 4:
((uint32_t *)dst)[col] = color;
break;
}
}
}
}
// ============================================================
// swSetClip
// ============================================================
//
// Software clip rectangle for fallback line drawing.
static void swSetClip(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h) {
(void)drv;
sClipX = x;
sClipY = y;
sClipW = w;
sClipH = h;
}
// ============================================================
// swWaitIdle
// ============================================================
//
// No-op -- software operations complete synchronously.
static void swWaitIdle(AccelDriverT *drv) {
(void)drv;
}
// ============================================================
// swInstallFallbacks
// ============================================================
//
// Fills in software implementations for any NULL function
// pointers in the driver struct. Called by accelInit() after
// the chip driver's init() succeeds. This guarantees that
// every drawing operation is always callable.
static void swInstallFallbacks(AccelDriverT *drv) {
int32_t count = 0;
if (!drv->waitIdle) {
drv->waitIdle = swWaitIdle;
}
if (!drv->setClip) {
drv->setClip = swSetClip;
count++;
}
if (!drv->rectFill) {
drv->rectFill = swRectFill;
count++;
}
if (!drv->bitBlt) {
drv->bitBlt = swBitBlt;
count++;
}
if (!drv->hostBlit) {
drv->hostBlit = swHostBlit;
count++;
}
if (!drv->colorExpand) {
drv->colorExpand = swColorExpand;
count++;
}
if (!drv->rectFillPat) {
drv->rectFillPat = swRectFillPat;
count++;
}
if (!drv->lineDraw) {
drv->lineDraw = swLineDraw;
count++;
}
// Initialize the software clip rect to full screen
sClipX = 0;
sClipY = 0;
sClipW = drv->mode.width;
sClipH = drv->mode.height;
if (count > 0) {
printf("accelVid: %ld operation(s) using software fallback\n",
(long)count);
}
}

257
accelVid.h Normal file
View file

@ -0,0 +1,257 @@
// accelVid.h -- Accelerated video driver abstraction for DOS
//
// Defines the common interface that all hardware-specific video
// drivers implement. Each driver fills in an AccelDriverT struct
// with function pointers for its accelerated operations and sets
// capability flags indicating which operations are hardware-backed.
//
// The driver manager (accelVid.c) iterates registered drivers,
// calls detect() on each, and returns the first match. The caller
// then uses the function pointers directly -- no dispatch overhead
// beyond the initial detection.
//
// Operations that aren't hardware-accelerated on a given chip
// should be left as NULL. The caller is responsible for falling
// back to software rendering for NULL operations. Capability
// flags in AccelDriverT.caps indicate which operations are
// available so callers can check without testing each pointer.
//
// All coordinates and dimensions are in pixels. Colors are packed
// in the display's native pixel format (same as DVX's packColor).
#ifndef ACCEL_VID_H
#define ACCEL_VID_H
#include <stdint.h>
#include <stdbool.h>
#include "pci.h"
// ============================================================
// Capability flags
// ============================================================
//
// Bit flags indicating which operations are hardware-accelerated.
// A driver sets these in its caps field during detect/init. The
// caller can test (drv->caps & ACAP_xxx) to decide whether to
// use hardware or fall back to software.
#define ACAP_RECT_FILL 0x00000001 // solid rectangle fill
#define ACAP_RECT_FILL_PAT 0x00000002 // pattern rectangle fill (8x8)
#define ACAP_BITBLT 0x00000004 // screen-to-screen blit
#define ACAP_COLOR_EXPAND 0x00000008 // mono-to-color expansion (text/glyphs)
#define ACAP_LINE_DRAW 0x00000010 // Bresenham line drawing
#define ACAP_HW_CURSOR 0x00000020 // hardware sprite cursor
#define ACAP_HOST_BLIT 0x00000040 // CPU-to-screen blit (image upload)
#define ACAP_CLIP 0x00000080 // hardware clip rectangle
#define ACAP_TRANSPARENCY 0x00000100 // transparent blit (color key)
// ============================================================
// Raster operation codes
// ============================================================
//
// Standard Microsoft/GDI ROP codes used by Windows drivers.
// These map to the 256 possible ternary raster operations, but
// we only define the commonly used ones. The hardware engines
// typically support these natively.
#define ROP_COPY 0xCC // dest = src
#define ROP_PAT_COPY 0xF0 // dest = pattern
#define ROP_ZERO 0x00 // dest = 0 (black)
#define ROP_ONE 0xFF // dest = 1 (white)
#define ROP_SRC_AND 0x88 // dest = src AND dest
#define ROP_SRC_OR 0xEE // dest = src OR dest
#define ROP_SRC_XOR 0x66 // dest = src XOR dest
#define ROP_NOT 0x55 // dest = NOT dest
#define ROP_PAT_AND 0xA0 // dest = pat AND dest
#define ROP_PAT_OR 0xFA // dest = pat OR dest
#define ROP_PAT_XOR 0x5A // dest = pat XOR dest
// ============================================================
// Hardware cursor image format
// ============================================================
//
// Hardware cursors use a 2-bit-per-pixel AND/XOR format:
// AND=0, XOR=0 -> cursor color 0 (background)
// AND=0, XOR=1 -> cursor color 1 (foreground)
// AND=1, XOR=0 -> transparent (screen shows through)
// AND=1, XOR=1 -> inverted (screen pixel is inverted)
//
// Most chips support 64x64 cursors (S3, Matrox, ATI, Tseng W32p).
// Older Cirrus (GD5426/28) support only 32x32.
#define HW_CURSOR_MAX_SIZE 64
typedef struct {
int32_t width;
int32_t height;
int32_t hotX;
int32_t hotY;
uint8_t andMask[HW_CURSOR_MAX_SIZE * HW_CURSOR_MAX_SIZE / 8];
uint8_t xorMask[HW_CURSOR_MAX_SIZE * HW_CURSOR_MAX_SIZE / 8];
} HwCursorImageT;
// ============================================================
// Video mode request / result
// ============================================================
typedef struct {
int32_t width;
int32_t height;
int32_t bpp; // requested bits per pixel (8, 15, 16, 32)
} AccelModeRequestT;
typedef struct {
int32_t width;
int32_t height;
int32_t bpp;
int32_t pitch; // bytes per scanline (may be > width * bytesPerPixel)
uint8_t *framebuffer; // mapped linear framebuffer pointer
uint32_t vramSize; // total video RAM in bytes
uint32_t offscreenBase; // offset to start of offscreen VRAM (for allocations)
} AccelModeResultT;
// ============================================================
// Driver structure
// ============================================================
//
// Each chip driver provides a statically-allocated AccelDriverT
// and registers it with accelRegisterDriver(). The driver manager
// calls detect() on each registered driver during accelInit().
//
// The init() function receives a mode request and returns detailed
// mode info. It is responsible for:
// - Programming the CRTC/sequencer for the requested mode
// - Enabling the linear framebuffer
// - Unlocking the acceleration engine
// - Setting up MMIO mappings if needed
//
// All accelerated drawing functions must call waitIdle() internally
// before returning if the operation is asynchronous. The explicit
// waitIdle() in the API is for synchronization points where the
// caller needs to read back from VRAM after a series of operations.
typedef struct AccelDriverT {
// Driver identification
const char *name; // human-readable name (e.g. "S3 Trio64")
const char *chipFamily; // family identifier (e.g. "s3", "cirrus")
uint32_t caps; // ACAP_xxx capability flags
// PCI device info (filled by detect)
PciDeviceT pciDev;
// Current mode info (filled by init)
AccelModeResultT mode;
// --------------------------------------------------------
// Lifecycle
// --------------------------------------------------------
// Probe for this chip. Returns true if this driver's hardware
// is present. Must not change any hardware state.
bool (*detect)(struct AccelDriverT *drv);
// Initialize the chip: set the requested video mode, enable
// acceleration, map the framebuffer. Returns true on success.
bool (*init)(struct AccelDriverT *drv, const AccelModeRequestT *req);
// Shut down: restore text mode, disable acceleration, unmap
// memory. Safe to call even if init() was never called.
void (*shutdown)(struct AccelDriverT *drv);
// --------------------------------------------------------
// Synchronization
// --------------------------------------------------------
// Wait until the acceleration engine is idle. All pending
// drawing commands must complete before this returns.
void (*waitIdle)(struct AccelDriverT *drv);
// --------------------------------------------------------
// Hardware clip rectangle
// --------------------------------------------------------
// Set the hardware clip rectangle. All subsequent drawing
// operations are clipped to this region. Pass full-screen
// dimensions to disable clipping.
void (*setClip)(struct AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h);
// --------------------------------------------------------
// Accelerated drawing operations
// --------------------------------------------------------
// Solid rectangle fill.
void (*rectFill)(struct AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, uint32_t color);
// Pattern rectangle fill (8x8 pattern, one color + transparent
// or two-color). Pattern data is 8 bytes, one bit per pixel,
// MSB-first, top row first.
void (*rectFillPat)(struct AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, const uint8_t *pattern, uint32_t fg, uint32_t bg);
// Screen-to-screen blit.
void (*bitBlt)(struct AccelDriverT *drv, int32_t srcX, int32_t srcY, int32_t dstX, int32_t dstY, int32_t w, int32_t h);
// CPU-to-screen blit: transfer pixels from system RAM to VRAM.
// srcBuf points to packed pixel data in display format.
// srcPitch is the byte stride of the source buffer.
void (*hostBlit)(struct AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h);
// Monochrome color expansion: convert 1bpp bitmap data to
// full-color pixels. Used for fast text/glyph rendering.
// srcBuf is packed MSB-first, one bit per pixel.
// srcPitch is the byte stride between rows.
void (*colorExpand)(struct AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h, uint32_t fg, uint32_t bg);
// Bresenham line draw (inclusive endpoints).
void (*lineDraw)(struct AccelDriverT *drv, int32_t x1, int32_t y1, int32_t x2, int32_t y2, uint32_t color);
// --------------------------------------------------------
// Hardware cursor
// --------------------------------------------------------
// Set the cursor image. Called when the cursor shape changes.
void (*setCursor)(struct AccelDriverT *drv, const HwCursorImageT *image);
// Move the cursor to a screen position. Called every mouse poll.
void (*moveCursor)(struct AccelDriverT *drv, int32_t x, int32_t y);
// Show or hide the hardware cursor.
void (*showCursor)(struct AccelDriverT *drv, bool visible);
// --------------------------------------------------------
// Private driver data
// --------------------------------------------------------
// Opaque pointer for chip-specific state (MMIO base address,
// current engine state, etc.). Each driver allocates and manages
// its own private data.
void *privData;
} AccelDriverT;
// ============================================================
// Driver manager API
// ============================================================
// Register a driver with the manager. Call once per driver at
// startup (typically from main before accelInit). Drivers are
// probed in registration order.
void accelRegisterDriver(AccelDriverT *drv);
// Probe all registered drivers and return the first one whose
// detect() succeeds. Returns NULL if no supported hardware is found.
AccelDriverT *accelDetect(void);
// Initialize the detected driver with the given mode.
// Returns true on success. On failure the driver is not usable.
bool accelInit(AccelDriverT *drv, const AccelModeRequestT *req);
// Shut down the active driver and restore text mode.
void accelShutdown(AccelDriverT *drv);
// Return the driver name string for display.
const char *accelGetName(const AccelDriverT *drv);
// Return the capability flags for the active driver.
uint32_t accelGetCaps(const AccelDriverT *drv);
#endif // ACCEL_VID_H

960
atiMach64.c Normal file
View file

@ -0,0 +1,960 @@
// atiMach64.c -- ATI Mach64 / Rage accelerated video driver
//
// Supports the ATI Mach64 family: GX, CX, CT, ET, VT, GT (Rage II),
// and Rage Pro. These were among the most capable 2D accelerators
// of the mid-1990s, with features including:
// - Solid and pattern rectangle fill
// - Screen-to-screen BitBLT
// - Host-to-screen blit (CPU data transfer)
// - Monochrome color expansion
// - Bresenham line draw
// - Trapezoid fill
// - Hardware scissor rectangle
// - 64x64 two-color hardware cursor
//
// Register access:
// The Mach64 has two register access methods:
// 1. I/O port: registers at block I/O base + offset. The base
// is typically 0x02EC for Mach64, determined by CONFIG_CHIP_ID.
// 2. MMIO: register block at end of LFB (BAR0 + aperture_size - 1KB)
// or via a dedicated BAR.
//
// We use MMIO for speed. The register block is 1KB at the end
// of the aperture (LFB base + size - 0x400 on most variants,
// or LFB base + size - 0x800 for 8MB apertures).
//
// Some early Mach64 chips (GX/CX) may not support MMIO well;
// for those we fall back to I/O port access.
#include "accelVid.h"
#include "vgaCommon.h"
#include "pci.h"
#include <pc.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// ============================================================
// ATI vendor/device IDs
// ============================================================
#define ATI_VENDOR_ID 0x1002
#define ATI_MACH64_GX 0x4758 // Mach64 GX
#define ATI_MACH64_CX 0x4358 // Mach64 CX
#define ATI_MACH64_CT 0x4354 // Mach64 CT
#define ATI_MACH64_ET 0x4554 // Mach64 ET
#define ATI_MACH64_VT 0x5654 // Mach64 VT
#define ATI_MACH64_VT_B 0x5655 // Mach64 VT-B
#define ATI_MACH64_GT 0x4754 // Mach64 GT (3D Rage II)
#define ATI_MACH64_GT_B 0x4755 // Mach64 GT-B (3D Rage II+)
#define ATI_RAGE_PRO 0x4750 // Rage Pro
#define ATI_RAGE_PRO_AGP 0x4752 // Rage Pro AGP
#define ATI_RAGE_XL_PCI 0x4752 // Rage XL PCI (shares ID with Pro AGP)
#define ATI_RAGE_128_RE 0x5245 // Rage 128 RE
#define ATI_RAGE_128_RF 0x5246 // Rage 128 RF
#define ATI_RAGE_128_RK 0x524B // Rage 128 RK
#define ATI_RAGE_128_RL 0x524C // Rage 128 RL
#define ATI_RAGE_128_PRO_PF 0x5046 // Rage 128 Pro PF
#define ATI_RAGE_128_PRO_PR 0x5052 // Rage 128 Pro PR
#define ATI_RAGE_FURY 0x5046 // Rage Fury (same as 128 Pro PF)
static const uint16_t sAtiDeviceIds[] = {
ATI_VENDOR_ID, ATI_MACH64_GX,
ATI_VENDOR_ID, ATI_MACH64_CX,
ATI_VENDOR_ID, ATI_MACH64_CT,
ATI_VENDOR_ID, ATI_MACH64_ET,
ATI_VENDOR_ID, ATI_MACH64_VT,
ATI_VENDOR_ID, ATI_MACH64_VT_B,
ATI_VENDOR_ID, ATI_MACH64_GT,
ATI_VENDOR_ID, ATI_MACH64_GT_B,
ATI_VENDOR_ID, ATI_RAGE_PRO,
ATI_VENDOR_ID, ATI_RAGE_PRO_AGP,
ATI_VENDOR_ID, ATI_RAGE_128_RE,
ATI_VENDOR_ID, ATI_RAGE_128_RF,
ATI_VENDOR_ID, ATI_RAGE_128_RK,
ATI_VENDOR_ID, ATI_RAGE_128_RL,
ATI_VENDOR_ID, ATI_RAGE_128_PRO_PF,
ATI_VENDOR_ID, ATI_RAGE_128_PRO_PR,
0, 0
};
// ============================================================
// Mach64 register offsets (from MMIO base)
// ============================================================
//
// The Mach64 has a flat register space. For I/O access, these
// offsets are added to the I/O base port. For MMIO, they're
// byte offsets from the MMIO base address.
// Drawing engine source registers
#define ATI_SRC_OFF_PITCH 0x0000 // source offset and pitch
#define ATI_SRC_Y 0x0004 // source Y
#define ATI_SRC_X 0x0008 // source X (alias: SRC_HEIGHT1)
#define ATI_SRC_Y_X 0x000C // source Y and X combined
#define ATI_SRC_WIDTH1 0x0010
#define ATI_SRC_HEIGHT1 0x0014
// Drawing engine destination registers
#define ATI_DST_OFF_PITCH 0x0040 // destination offset and pitch
#define ATI_DST_Y 0x0044
#define ATI_DST_X 0x0048
#define ATI_DST_Y_X 0x004C
#define ATI_DST_HEIGHT 0x0050
#define ATI_DST_WIDTH 0x0054
#define ATI_DST_HEIGHT_WIDTH 0x0058 // triggers blit
#define ATI_DST_X_WIDTH 0x005C
#define ATI_DST_BRES_ERR 0x0064
#define ATI_DST_BRES_INC 0x0068
#define ATI_DST_BRES_DEC 0x006C
#define ATI_DST_BRES_LNTH 0x0070
#define ATI_DST_BRES_LNTH_END 0x0074 // triggers line draw
// Host data (CPU-to-screen)
#define ATI_HOST_DATA0 0x0200
// Scissor registers
#define ATI_SC_LEFT 0x00A0
#define ATI_SC_RIGHT 0x00A4
#define ATI_SC_TOP 0x00A8
#define ATI_SC_BOTTOM 0x00AC
// Drawing processor registers
#define ATI_DP_BKGD_CLR 0x00B0
#define ATI_DP_FRGD_CLR 0x00B4
#define ATI_DP_WRITE_MASK 0x00B8
#define ATI_DP_CHAIN_MASK 0x00BC
#define ATI_DP_PIX_WIDTH 0x00D0
#define ATI_DP_MIX 0x00D4
#define ATI_DP_SRC 0x00D8
// Clock/config
#define ATI_CLR_CMP_CNTL 0x0100
#define ATI_GUI_TRAJ_CNTL 0x00CC
#define ATI_GUI_STAT 0x00CE // I/O only; for MMIO see below
// FIFO and status (MMIO addresses)
#define ATI_FIFO_STAT 0x0310
#define ATI_GUI_STAT_MMIO 0x0338
// Hardware cursor
#define ATI_CUR_CLR0 0x0260
#define ATI_CUR_CLR1 0x0264
#define ATI_CUR_OFFSET 0x0268
#define ATI_CUR_HORZ_VERT_POSN 0x026C
#define ATI_CUR_HORZ_VERT_OFF 0x0270
#define ATI_GEN_TEST_CNTL 0x0034 // general test/cursor control
// Memory config
#define ATI_MEM_CNTL 0x0140
// I/O and MMIO constants
#define ATI_IO_BASE_DEFAULT 0x02EC // default block I/O base port
#define ATI_MMIO_SIZE 0x0400 // MMIO block size (1KB at end of aperture)
#define ATI_CONFIG_CHIP_ID 0x00E0
// ============================================================
// Mach64 DP_MIX values
// ============================================================
//
// The drawing processor MIX register controls the raster operation
// for foreground (bits 20:16) and background (bits 4:0).
#define ATI_MIX_NOT_DST 0x00
#define ATI_MIX_ZERO 0x01
#define ATI_MIX_ONE 0x02
#define ATI_MIX_DST 0x03
#define ATI_MIX_NOT_SRC 0x04
#define ATI_MIX_XOR 0x05
#define ATI_MIX_XNOR 0x06
#define ATI_MIX_COPY 0x07 // dest = source (most common)
#define ATI_MIX_NOT_SRC_AND 0x08
#define ATI_MIX_SRC_AND_DST 0x0C
#define ATI_MIX_SRC_OR_DST 0x0E
// Foreground mix is in bits 20:16, background in bits 4:0
#define ATI_FRGD_MIX(rop) ((uint32_t)(rop) << 16)
#define ATI_BKGD_MIX(rop) ((uint32_t)(rop))
// ============================================================
// Mach64 DP_SRC values
// ============================================================
#define ATI_SRC_BKGD_CLR 0x00 // background color register
#define ATI_SRC_FRGD_CLR 0x01 // foreground color register
#define ATI_SRC_HOST 0x02 // CPU host data
#define ATI_SRC_BLIT 0x03 // video memory (blit)
#define ATI_SRC_PATTERN 0x04 // pattern register
// DP_SRC packs three source selects: mono src (bits 10:8),
// foreground src (bits 18:16 on some, or bits 10:8), background src
// In practice, the format is:
// bits 2:0 = background source
// bits 10:8 = foreground source
// bits 18:16 = mono source (for color expand)
#define ATI_DP_SRC_BKGD(s) ((uint32_t)(s))
#define ATI_DP_SRC_FRGD(s) ((uint32_t)(s) << 8)
#define ATI_DP_SRC_MONO(s) ((uint32_t)(s) << 16)
// ============================================================
// Mach64 DP_PIX_WIDTH values
// ============================================================
#define ATI_PIX_8BPP 0x02
#define ATI_PIX_15BPP 0x03
#define ATI_PIX_16BPP 0x04
#define ATI_PIX_32BPP 0x06
// HOST byte/word/dword order -- use native (little-endian)
#define ATI_HOST_BYTE_ORDER 0x00
// GUI_TRAJ_CNTL direction bits
#define ATI_DST_X_DIR_LEFT 0x00
#define ATI_DST_X_DIR_RIGHT 0x01
#define ATI_DST_Y_DIR_UP 0x00
#define ATI_DST_Y_DIR_DOWN 0x02
// GUI_STAT busy bit
#define ATI_GUI_STAT_BUSY 0x00000001
#define ATI_FIFO_STAT_MASK 0x0000FFFF
// Hardware cursor size
#define ATI_HW_CURSOR_SIZE 64
#define ATI_HW_CURSOR_BYTES 1024 // 64*64*2bpp/8
// Maximum wait iterations
#define ATI_MAX_IDLE_WAIT 1000000
// ============================================================
// Private driver state
// ============================================================
typedef struct {
uint32_t lfbPhysAddr;
uint32_t vramSize;
uint32_t cursorOffset;
int32_t bytesPerPixel;
int32_t screenPitch;
volatile uint32_t *mmio; // mapped MMIO register base
uint32_t mmioPhysAddr;
bool useIo; // fall back to I/O on old GX/CX
uint16_t ioBase; // I/O base port for register access
DpmiMappingT lfbMapping;
} AtiPrivateT;
// ============================================================
// Prototypes
// ============================================================
static void atiBitBlt(AccelDriverT *drv, int32_t srcX, int32_t srcY, int32_t dstX, int32_t dstY, int32_t w, int32_t h);
static void atiColorExpand(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h, uint32_t fg, uint32_t bg);
static bool atiDetect(AccelDriverT *drv);
static void atiHostBlit(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h);
static bool atiInit(AccelDriverT *drv, const AccelModeRequestT *req);
static void atiLineDraw(AccelDriverT *drv, int32_t x1, int32_t y1, int32_t x2, int32_t y2, uint32_t color);
static void atiMoveCursor(AccelDriverT *drv, int32_t x, int32_t y);
static void atiRectFill(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, uint32_t color);
static void atiRectFillPat(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, const uint8_t *pattern, uint32_t fg, uint32_t bg);
static void atiSetClip(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h);
static void atiSetCursor(AccelDriverT *drv, const HwCursorImageT *image);
static void atiShowCursor(AccelDriverT *drv, bool visible);
static void atiShutdown(AccelDriverT *drv);
static void atiWaitFifo(AtiPrivateT *priv, int32_t entries);
static void atiWaitIdle(AccelDriverT *drv);
static void atiWriteReg(AtiPrivateT *priv, uint32_t reg, uint32_t val);
static uint32_t atiReadReg(AtiPrivateT *priv, uint32_t reg);
// ============================================================
// Driver instance
// ============================================================
static AtiPrivateT sAtiPrivate;
static AccelDriverT sAtiDriver = {
.name = "ATI Mach64",
.chipFamily = "ati",
.caps = 0,
.privData = &sAtiPrivate,
.detect = atiDetect,
.init = atiInit,
.shutdown = atiShutdown,
.waitIdle = atiWaitIdle,
.setClip = atiSetClip,
.rectFill = atiRectFill,
.rectFillPat = atiRectFillPat,
.bitBlt = atiBitBlt,
.hostBlit = atiHostBlit,
.colorExpand = atiColorExpand,
.lineDraw = atiLineDraw,
.setCursor = atiSetCursor,
.moveCursor = atiMoveCursor,
.showCursor = atiShowCursor,
};
// ============================================================
// atiRegisterDriver
// ============================================================
void atiRegisterDriver(void) {
accelRegisterDriver(&sAtiDriver);
}
// ============================================================
// atiReadReg / atiWriteReg
// ============================================================
//
// Register access abstraction. Uses MMIO when available, falls
// back to I/O port access on older chips.
static uint32_t atiReadReg(AtiPrivateT *priv, uint32_t reg) {
if (priv->useIo) {
return inportl(priv->ioBase + reg);
}
return priv->mmio[reg / 4];
}
static void atiWriteReg(AtiPrivateT *priv, uint32_t reg, uint32_t val) {
if (priv->useIo) {
outportl(priv->ioBase + reg, val);
return;
}
priv->mmio[reg / 4] = val;
}
// ============================================================
// atiBitBlt
// ============================================================
//
// Screen-to-screen BitBLT. The Mach64 engine handles overlapping
// regions automatically based on the trajectory control register.
static void atiBitBlt(AccelDriverT *drv, int32_t srcX, int32_t srcY, int32_t dstX, int32_t dstY, int32_t w, int32_t h) {
AtiPrivateT *priv = (AtiPrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
// Determine blit direction
uint32_t direction = ATI_DST_X_DIR_RIGHT | ATI_DST_Y_DIR_DOWN;
int32_t sx = srcX;
int32_t sy = srcY;
int32_t dx = dstX;
int32_t dy = dstY;
if (srcX < dstX) {
direction &= ~ATI_DST_X_DIR_RIGHT;
sx += w - 1;
dx += w - 1;
}
if (srcY < dstY) {
direction &= ~ATI_DST_Y_DIR_DOWN;
sy += h - 1;
dy += h - 1;
}
atiWaitFifo(priv, 7);
atiWriteReg(priv, ATI_GUI_TRAJ_CNTL, direction);
atiWriteReg(priv, ATI_DP_MIX, ATI_FRGD_MIX(ATI_MIX_COPY) | ATI_BKGD_MIX(ATI_MIX_COPY));
atiWriteReg(priv, ATI_DP_SRC, ATI_DP_SRC_FRGD(ATI_SRC_BLIT));
atiWriteReg(priv, ATI_SRC_Y_X, ((uint32_t)sx << 16) | (uint32_t)sy);
atiWriteReg(priv, ATI_SRC_WIDTH1, w);
atiWriteReg(priv, ATI_DST_Y_X, ((uint32_t)dx << 16) | (uint32_t)dy);
atiWriteReg(priv, ATI_DST_HEIGHT_WIDTH, ((uint32_t)w << 16) | (uint32_t)h);
}
// ============================================================
// atiColorExpand
// ============================================================
//
// Monochrome-to-color expansion via the host data path.
// Converts 1bpp source bitmap to full-color pixels using the
// Mach64 engine. Source data is packed MSB-first, padded to
// dword boundaries per scanline.
static void atiColorExpand(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h, uint32_t fg, uint32_t bg) {
AtiPrivateT *priv = (AtiPrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
// Number of dwords per scanline of monochrome data
int32_t dwordsPerRow = (w + 31) / 32;
// Set up color expand: mono source from host, fg/bg from color regs
atiWaitFifo(priv, 7);
atiWriteReg(priv, ATI_DP_FRGD_CLR, fg);
atiWriteReg(priv, ATI_DP_BKGD_CLR, bg);
atiWriteReg(priv, ATI_DP_SRC, ATI_DP_SRC_MONO(ATI_SRC_HOST) | ATI_DP_SRC_FRGD(ATI_SRC_FRGD_CLR) | ATI_DP_SRC_BKGD(ATI_SRC_BKGD_CLR));
atiWriteReg(priv, ATI_DP_MIX, ATI_FRGD_MIX(ATI_MIX_COPY) | ATI_BKGD_MIX(ATI_MIX_COPY));
atiWriteReg(priv, ATI_GUI_TRAJ_CNTL, ATI_DST_X_DIR_RIGHT | ATI_DST_Y_DIR_DOWN);
atiWriteReg(priv, ATI_DST_Y_X, ((uint32_t)dstX << 16) | (uint32_t)dstY);
atiWriteReg(priv, ATI_DST_HEIGHT_WIDTH, ((uint32_t)w << 16) | (uint32_t)h);
// Feed monochrome data row by row through HOST_DATA0
for (int32_t row = 0; row < h; row++) {
const uint8_t *rowPtr = srcBuf + row * srcPitch;
for (int32_t dw = 0; dw < dwordsPerRow; dw++) {
// Pack bytes into a dword (MSB-first bit order)
int32_t byteOff = dw * 4;
uint32_t data = 0;
for (int32_t b = 0; b < 4; b++) {
uint8_t srcByte = 0;
if (byteOff + b < srcPitch) {
srcByte = rowPtr[byteOff + b];
}
data |= (uint32_t)srcByte << (24 - b * 8);
}
atiWaitFifo(priv, 1);
atiWriteReg(priv, ATI_HOST_DATA0, data);
}
}
}
// ============================================================
// atiDetect
// ============================================================
static bool atiDetect(AccelDriverT *drv) {
int32_t matchIdx;
if (!pciFindDeviceList(sAtiDeviceIds, &drv->pciDev, &matchIdx)) {
return false;
}
switch (drv->pciDev.deviceId) {
case ATI_MACH64_GX:
drv->name = "ATI Mach64 GX";
break;
case ATI_MACH64_CX:
drv->name = "ATI Mach64 CX";
break;
case ATI_MACH64_CT:
drv->name = "ATI Mach64 CT";
break;
case ATI_MACH64_ET:
drv->name = "ATI Mach64 ET";
break;
case ATI_MACH64_VT:
case ATI_MACH64_VT_B:
drv->name = "ATI Mach64 VT";
break;
case ATI_MACH64_GT:
case ATI_MACH64_GT_B:
drv->name = "ATI 3D Rage II";
break;
case ATI_RAGE_PRO:
case ATI_RAGE_PRO_AGP:
drv->name = "ATI Rage Pro";
break;
case ATI_RAGE_128_RE:
case ATI_RAGE_128_RF:
case ATI_RAGE_128_RK:
case ATI_RAGE_128_RL:
drv->name = "ATI Rage 128";
break;
case ATI_RAGE_128_PRO_PF:
case ATI_RAGE_128_PRO_PR:
drv->name = "ATI Rage 128 Pro";
break;
default:
drv->name = "ATI Mach64";
break;
}
return true;
}
// ============================================================
// atiHostBlit
// ============================================================
//
// CPU-to-screen blit. Transfers pixel data from system memory
// to VRAM through the Mach64 host data registers.
static void atiHostBlit(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h) {
AtiPrivateT *priv = (AtiPrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
int32_t bytesPerPixel = priv->bytesPerPixel;
int32_t rowBytes = w * bytesPerPixel;
int32_t dwordsPerRow = (rowBytes + 3) / 4;
// Set up host-to-screen blit
atiWaitFifo(priv, 5);
atiWriteReg(priv, ATI_DP_SRC, ATI_DP_SRC_FRGD(ATI_SRC_HOST));
atiWriteReg(priv, ATI_DP_MIX, ATI_FRGD_MIX(ATI_MIX_COPY) | ATI_BKGD_MIX(ATI_MIX_COPY));
atiWriteReg(priv, ATI_GUI_TRAJ_CNTL, ATI_DST_X_DIR_RIGHT | ATI_DST_Y_DIR_DOWN);
atiWriteReg(priv, ATI_DST_Y_X, ((uint32_t)dstX << 16) | (uint32_t)dstY);
atiWriteReg(priv, ATI_DST_HEIGHT_WIDTH, ((uint32_t)w << 16) | (uint32_t)h);
// Write pixel data row by row through HOST_DATA0
for (int32_t row = 0; row < h; row++) {
const uint8_t *rowPtr = srcBuf + row * srcPitch;
for (int32_t dw = 0; dw < dwordsPerRow; dw++) {
int32_t byteOff = dw * 4;
uint32_t data = 0;
// Pack bytes into a dword (little-endian native order)
for (int32_t b = 0; b < 4; b++) {
if (byteOff + b < rowBytes) {
data |= (uint32_t)rowPtr[byteOff + b] << (b * 8);
}
}
atiWaitFifo(priv, 1);
atiWriteReg(priv, ATI_HOST_DATA0, data);
}
}
}
// ============================================================
// atiInit
// ============================================================
static bool atiInit(AccelDriverT *drv, const AccelModeRequestT *req) {
AtiPrivateT *priv = (AtiPrivateT *)drv->privData;
memset(priv, 0, sizeof(*priv));
// Determine if this is an old GX/CX (I/O only) or newer (MMIO)
priv->useIo = (drv->pciDev.deviceId == ATI_MACH64_GX
|| drv->pciDev.deviceId == ATI_MACH64_CX);
priv->ioBase = ATI_IO_BASE_DEFAULT;
// Get LFB address and size from PCI BAR0
uint32_t bar0 = pciRead32(drv->pciDev.bus, drv->pciDev.dev,
drv->pciDev.func, PCI_BAR0);
priv->lfbPhysAddr = bar0 & 0xFFFFFFF0;
uint32_t barSize = pciSizeBar(drv->pciDev.bus, drv->pciDev.dev, drv->pciDev.func, PCI_BAR0);
// Aperture size != VRAM size on Mach64 (aperture is typically 8MB)
// Read actual VRAM from MEM_CNTL register
uint32_t memCntl;
if (priv->useIo) {
memCntl = inportl(priv->ioBase + ATI_MEM_CNTL);
} else {
// Need a temporary MMIO mapping to read MEM_CNTL
// MMIO is at the end of the aperture
priv->mmioPhysAddr = priv->lfbPhysAddr + barSize - ATI_MMIO_SIZE;
memCntl = 0; // will determine from aperture size
}
// Determine VRAM size
if (memCntl != 0) {
uint32_t memSize = memCntl & 0x07;
switch (memSize) {
case 0: priv->vramSize = 512 * 1024; break;
case 1: priv->vramSize = 1024 * 1024; break;
case 2: priv->vramSize = 2 * 1024 * 1024; break;
case 3: priv->vramSize = 4 * 1024 * 1024; break;
case 4: priv->vramSize = 6 * 1024 * 1024; break;
case 5: priv->vramSize = 8 * 1024 * 1024; break;
default: priv->vramSize = 2 * 1024 * 1024; break;
}
} else {
// Conservative fallback
priv->vramSize = (barSize > 8 * 1024 * 1024) ? 4 * 1024 * 1024 : barSize;
}
// Set VESA mode
VesaModeResultT vesa;
if (!vesaFindAndSetMode(req->width, req->height, req->bpp, &vesa)) {
return false;
}
// Map LFB + MMIO region (map entire aperture; MMIO is at end)
if (!dpmiMapFramebuffer(priv->lfbPhysAddr, barSize, &priv->lfbMapping)) {
vgaRestoreTextMode();
return false;
}
priv->bytesPerPixel = (vesa.bpp + 7) / 8;
priv->screenPitch = vesa.pitch;
drv->mode.width = vesa.width;
drv->mode.height = vesa.height;
drv->mode.bpp = vesa.bpp;
drv->mode.pitch = vesa.pitch;
drv->mode.framebuffer = priv->lfbMapping.ptr;
drv->mode.vramSize = priv->vramSize;
drv->mode.offscreenBase = vesa.pitch * vesa.height;
// Set up MMIO pointer at end of aperture
if (!priv->useIo) {
priv->mmio = (volatile uint32_t *)(priv->lfbMapping.ptr + barSize - ATI_MMIO_SIZE);
}
// Configure the drawing engine pixel width
uint32_t pixWidth;
switch (vesa.bpp) {
case 8: pixWidth = ATI_PIX_8BPP; break;
case 15: pixWidth = ATI_PIX_15BPP; break;
case 16: pixWidth = ATI_PIX_16BPP; break;
case 32: pixWidth = ATI_PIX_32BPP; break;
default: pixWidth = ATI_PIX_16BPP; break;
}
// DP_PIX_WIDTH: set all fields to the same depth
uint32_t dpPixWidth = pixWidth
| (pixWidth << 4) // host data
| (pixWidth << 8) // source
| (pixWidth << 16) // destination
| (pixWidth << 28); // default
atiWaitFifo(priv, 2);
atiWriteReg(priv, ATI_DP_PIX_WIDTH, dpPixWidth);
atiWriteReg(priv, ATI_DP_WRITE_MASK, 0xFFFFFFFF);
// Set DST_OFF_PITCH: offset = 0, pitch in units of 8 pixels
uint32_t pitch8 = vesa.pitch / priv->bytesPerPixel / 8;
atiWriteReg(priv, ATI_DST_OFF_PITCH, pitch8 << 22);
atiWriteReg(priv, ATI_SRC_OFF_PITCH, pitch8 << 22);
// Set up cursor at end of VRAM
priv->cursorOffset = priv->vramSize - ATI_HW_CURSOR_BYTES;
priv->cursorOffset &= ~(ATI_HW_CURSOR_BYTES - 1);
drv->caps = ACAP_RECT_FILL
| ACAP_RECT_FILL_PAT
| ACAP_BITBLT
| ACAP_HOST_BLIT
| ACAP_COLOR_EXPAND
| ACAP_LINE_DRAW
| ACAP_HW_CURSOR
| ACAP_CLIP;
// Full screen clip
atiSetClip(drv, 0, 0, vesa.width, vesa.height);
atiWaitIdle(drv);
return true;
}
// ============================================================
// atiLineDraw
// ============================================================
//
// Bresenham line draw using the Mach64 DST_BRES registers.
static void atiLineDraw(AccelDriverT *drv, int32_t x1, int32_t y1, int32_t x2, int32_t y2, uint32_t color) {
AtiPrivateT *priv = (AtiPrivateT *)drv->privData;
int32_t dx = x2 - x1;
int32_t dy = y2 - y1;
uint32_t direction = ATI_DST_X_DIR_RIGHT | ATI_DST_Y_DIR_DOWN;
if (dx < 0) {
dx = -dx;
direction &= ~ATI_DST_X_DIR_RIGHT;
}
if (dy < 0) {
dy = -dy;
direction &= ~ATI_DST_Y_DIR_DOWN;
}
int32_t majAxis;
int32_t minAxis;
if (dx >= dy) {
majAxis = dx;
minAxis = dy;
} else {
majAxis = dy;
minAxis = dx;
// Swap X/Y major
direction |= 0x04; // Y major axis select
}
if (majAxis == 0) {
return;
}
int32_t errTerm = 2 * minAxis - majAxis;
int32_t errInc = 2 * minAxis;
int32_t errDec = 2 * (minAxis - majAxis);
atiWaitFifo(priv, 8);
atiWriteReg(priv, ATI_GUI_TRAJ_CNTL, direction);
atiWriteReg(priv, ATI_DP_MIX, ATI_FRGD_MIX(ATI_MIX_COPY));
atiWriteReg(priv, ATI_DP_SRC, ATI_DP_SRC_FRGD(ATI_SRC_FRGD_CLR));
atiWriteReg(priv, ATI_DP_FRGD_CLR, color);
atiWriteReg(priv, ATI_DST_Y_X, ((uint32_t)x1 << 16) | (uint32_t)y1);
atiWriteReg(priv, ATI_DST_BRES_ERR, errTerm);
atiWriteReg(priv, ATI_DST_BRES_INC, errInc);
atiWriteReg(priv, ATI_DST_BRES_DEC, errDec);
atiWaitFifo(priv, 1);
atiWriteReg(priv, ATI_DST_BRES_LNTH, majAxis + 1);
}
// ============================================================
// atiMoveCursor
// ============================================================
static void atiMoveCursor(AccelDriverT *drv, int32_t x, int32_t y) {
AtiPrivateT *priv = (AtiPrivateT *)drv->privData;
uint32_t offset = 0;
if (x < 0) {
offset |= ((-x) & 0x3F) << 16;
x = 0;
}
if (y < 0) {
offset |= (-y) & 0x3F;
y = 0;
}
atiWriteReg(priv, ATI_CUR_HORZ_VERT_OFF, offset);
atiWriteReg(priv, ATI_CUR_HORZ_VERT_POSN,
((uint32_t)x << 16) | (uint32_t)y);
}
// ============================================================
// atiRectFill
// ============================================================
static void atiRectFill(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, uint32_t color) {
AtiPrivateT *priv = (AtiPrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
atiWaitFifo(priv, 5);
atiWriteReg(priv, ATI_GUI_TRAJ_CNTL, ATI_DST_X_DIR_RIGHT | ATI_DST_Y_DIR_DOWN);
atiWriteReg(priv, ATI_DP_MIX, ATI_FRGD_MIX(ATI_MIX_COPY));
atiWriteReg(priv, ATI_DP_SRC, ATI_DP_SRC_FRGD(ATI_SRC_FRGD_CLR));
atiWriteReg(priv, ATI_DP_FRGD_CLR, color);
atiWriteReg(priv, ATI_DST_Y_X, ((uint32_t)x << 16) | (uint32_t)y);
atiWaitFifo(priv, 1);
atiWriteReg(priv, ATI_DST_HEIGHT_WIDTH, ((uint32_t)w << 16) | (uint32_t)h);
}
// ============================================================
// atiRectFillPat
// ============================================================
//
// 8x8 mono pattern fill using the host data path. The pattern is
// 8 bytes (one per row, MSB-first), tiled across the rectangle.
// 1-bits use the foreground color, 0-bits use the background.
// Data is fed through HOST_DATA0, repeating the 8-row pattern
// for the full height, with each row padded to a dword boundary.
static void atiRectFillPat(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, const uint8_t *pattern, uint32_t fg, uint32_t bg) {
AtiPrivateT *priv = (AtiPrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
// Number of dwords per scanline of monochrome data
int32_t dwordsPerRow = (w + 31) / 32;
// Set up color expand: mono source from host, fg/bg from color regs
atiWaitFifo(priv, 7);
atiWriteReg(priv, ATI_DP_FRGD_CLR, fg);
atiWriteReg(priv, ATI_DP_BKGD_CLR, bg);
atiWriteReg(priv, ATI_DP_SRC, ATI_DP_SRC_MONO(ATI_SRC_HOST) | ATI_DP_SRC_FRGD(ATI_SRC_FRGD_CLR) | ATI_DP_SRC_BKGD(ATI_SRC_BKGD_CLR));
atiWriteReg(priv, ATI_DP_MIX, ATI_FRGD_MIX(ATI_MIX_COPY) | ATI_BKGD_MIX(ATI_MIX_COPY));
atiWriteReg(priv, ATI_GUI_TRAJ_CNTL, ATI_DST_X_DIR_RIGHT | ATI_DST_Y_DIR_DOWN);
atiWriteReg(priv, ATI_DST_Y_X, ((uint32_t)x << 16) | (uint32_t)y);
atiWriteReg(priv, ATI_DST_HEIGHT_WIDTH, ((uint32_t)w << 16) | (uint32_t)h);
// Feed tiled pattern data through HOST_DATA0
for (int32_t row = 0; row < h; row++) {
uint8_t patByte = pattern[row & 7];
for (int32_t dw = 0; dw < dwordsPerRow; dw++) {
// Replicate the pattern byte across all 4 bytes of the dword.
// MSB-first bit order: place the pattern byte in the high byte.
uint32_t data = ((uint32_t)patByte << 24)
| ((uint32_t)patByte << 16)
| ((uint32_t)patByte << 8)
| (uint32_t)patByte;
atiWaitFifo(priv, 1);
atiWriteReg(priv, ATI_HOST_DATA0, data);
}
}
}
// ============================================================
// atiSetClip
// ============================================================
static void atiSetClip(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h) {
AtiPrivateT *priv = (AtiPrivateT *)drv->privData;
atiWaitFifo(priv, 4);
atiWriteReg(priv, ATI_SC_LEFT, x);
atiWriteReg(priv, ATI_SC_TOP, y);
atiWriteReg(priv, ATI_SC_RIGHT, x + w - 1);
atiWriteReg(priv, ATI_SC_BOTTOM, y + h - 1);
}
// ============================================================
// atiSetCursor
// ============================================================
static void atiSetCursor(AccelDriverT *drv, const HwCursorImageT *image) {
AtiPrivateT *priv = (AtiPrivateT *)drv->privData;
if (!image) {
atiShowCursor(drv, false);
return;
}
atiWaitIdle(drv);
// Write cursor image to VRAM
// Mach64 cursor format: 64x64, 2bpp, rows of 16 bytes
// Bit encoding: 00=cursor color 0, 01=cursor color 1,
// 10=transparent, 11=inverted
uint8_t *cursorMem = drv->mode.framebuffer + priv->cursorOffset;
for (int32_t row = 0; row < ATI_HW_CURSOR_SIZE; row++) {
for (int32_t byte = 0; byte < 16; byte++) {
uint8_t val = 0xAA; // all transparent (10 pattern)
if (row < image->height && byte < (image->width + 3) / 4) {
// Convert AND/XOR to Mach64 2bpp encoding
int32_t bitOff = byte * 4;
uint8_t andBits = 0;
uint8_t xorBits = 0;
if (bitOff / 8 < (image->width + 7) / 8) {
andBits = image->andMask[row * 8 + bitOff / 8];
xorBits = image->xorMask[row * 8 + bitOff / 8];
}
// Pack 4 pixels into one byte (2 bits each)
val = 0;
for (int32_t px = 0; px < 4; px++) {
int32_t srcBit = (bitOff + px) % 8;
uint8_t andBit = (andBits >> (7 - srcBit)) & 1;
uint8_t xorBit = (xorBits >> (7 - srcBit)) & 1;
uint8_t pixel;
if (andBit && !xorBit) {
pixel = 0x02; // transparent
} else if (andBit && xorBit) {
pixel = 0x03; // inverted
} else if (!andBit && xorBit) {
pixel = 0x01; // cursor color 1
} else {
pixel = 0x00; // cursor color 0
}
val |= pixel << (6 - px * 2);
}
}
cursorMem[row * 16 + byte] = val;
}
}
// Set cursor offset (in units of 8 bytes)
atiWriteReg(priv, ATI_CUR_OFFSET, priv->cursorOffset / 8);
// Set cursor colors (white foreground, black background)
atiWriteReg(priv, ATI_CUR_CLR0, 0x00000000);
atiWriteReg(priv, ATI_CUR_CLR1, 0x00FFFFFF);
}
// ============================================================
// atiShowCursor
// ============================================================
static void atiShowCursor(AccelDriverT *drv, bool visible) {
AtiPrivateT *priv = (AtiPrivateT *)drv->privData;
uint32_t val = atiReadReg(priv, ATI_GEN_TEST_CNTL);
if (visible) {
val |= 0x80; // enable cursor
} else {
val &= ~0x80;
}
atiWriteReg(priv, ATI_GEN_TEST_CNTL, val);
}
// ============================================================
// atiShutdown
// ============================================================
static void atiShutdown(AccelDriverT *drv) {
AtiPrivateT *priv = (AtiPrivateT *)drv->privData;
atiShowCursor(drv, false);
dpmiUnmapFramebuffer(&priv->lfbMapping);
vgaRestoreTextMode();
}
// ============================================================
// atiWaitFifo
// ============================================================
//
// Wait until the Mach64 FIFO has at least 'entries' free slots.
// The FIFO_STAT register indicates free entries (bits 15:0,
// value = 0x8000 means 0 free, lower values mean more free).
static void atiWaitFifo(AtiPrivateT *priv, int32_t entries) {
uint32_t mask = ATI_FIFO_STAT_MASK >> entries;
for (int32_t i = 0; i < ATI_MAX_IDLE_WAIT; i++) {
if (!(atiReadReg(priv, ATI_FIFO_STAT) & mask)) {
return;
}
}
}
// ============================================================
// atiWaitIdle
// ============================================================
static void atiWaitIdle(AccelDriverT *drv) {
AtiPrivateT *priv = (AtiPrivateT *)drv->privData;
// First wait for FIFO to drain
atiWaitFifo(priv, 16);
// Then wait for engine idle
for (int32_t i = 0; i < ATI_MAX_IDLE_WAIT; i++) {
if (!(atiReadReg(priv, ATI_GUI_STAT_MMIO) & ATI_GUI_STAT_BUSY)) {
return;
}
}
}

715
banshee.c Normal file
View file

@ -0,0 +1,715 @@
// banshee.c -- 3dfx Banshee/Voodoo3 accelerated video driver
//
// Supports the 3dfx Banshee and Voodoo3 2D/3D accelerators.
// The Banshee was 3dfx's first 2D/3D combo chip, and the Voodoo3
// improved on it with higher clock speeds. Both share the same
// 2D register interface:
// - Hardware rectangle fill
// - Screen-to-screen BitBLT
// - CPU-to-screen blit (host blit via launch area)
// - Monochrome color expansion (host blit with mono source)
// - Bresenham line draw
// - Hardware clip rectangle
// - 64x64 hardware cursor
//
// Register access:
// BAR0 maps the 32KB MMIO register block. The 2D engine
// registers live at offsets 0x200-0x270 within this block.
// The status register at 0x100 provides engine busy state.
//
// For host-to-screen operations, pixel data is fed through the
// "launch area" -- a write-combining window at MMIO physical
// address + 0x80000. Data is written as 32-bit dwords.
//
// BAR1 maps the linear framebuffer.
#include "accelVid.h"
#include "vgaCommon.h"
#include "pci.h"
#include <pc.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/nearptr.h>
// ============================================================
// 3dfx vendor/device IDs
// ============================================================
#define TDFX_VENDOR_ID 0x121A
#define TDFX_BANSHEE 0x0003
#define TDFX_VOODOO3 0x0005
static const uint16_t sBansheeDeviceIds[] = {
TDFX_VENDOR_ID, TDFX_BANSHEE,
TDFX_VENDOR_ID, TDFX_VOODOO3,
0, 0
};
// ============================================================
// 2D engine register offsets (from MMIO base)
// ============================================================
#define BAN_STATUS 0x100 // bits 0-10: busy when any set
#define BAN_INTRCTRL 0x108 // interrupt control
#define BAN_CLIP0MIN 0x200 // clip rect 0 min (X | Y<<16)
#define BAN_CLIP0MAX 0x204 // clip rect 0 max (X | Y<<16)
#define BAN_DSTBASEADDR 0x208 // destination base address
#define BAN_DSTFORMAT 0x20C // pitch<<16 | bpp encoding
#define BAN_SRCCKMIN 0x210 // source color key min
#define BAN_SRCCKMAX 0x214 // source color key max
#define BAN_DSTCKMIN 0x218 // dest color key min
#define BAN_DSTCKMAX 0x21C // dest color key max
#define BAN_BRESERROR0 0x220 // Bresenham error 0
#define BAN_BRESERROR1 0x224 // Bresenham error 1
#define BAN_ROP 0x230 // raster operation (bits 7:0)
#define BAN_SRCBASEADDR 0x234 // source base address
#define BAN_COMMANDEXTRA 0x238 // command extra
#define BAN_LINESTIPPLE 0x23C // line stipple
#define BAN_LINESTYLE 0x240 // line style
#define BAN_PATTERN0 0x244 // pattern alias 0
#define BAN_PATTERN1 0x248 // pattern alias 1
#define BAN_CLIP1MIN 0x24C // clip rect 1 min
#define BAN_CLIP1MAX 0x250 // clip rect 1 max
#define BAN_SRCFORMAT 0x254 // pitch<<16 | bpp encoding
#define BAN_SRCSIZE 0x258 // width | height<<16
#define BAN_SRCXY 0x25C // X | Y<<16
#define BAN_COLORBACK 0x260 // background color
#define BAN_COLORFORE 0x264 // foreground color
#define BAN_DSTSIZE 0x268 // width | height<<16
#define BAN_DSTXY 0x26C // X | Y<<16
#define BAN_COMMAND 0x270 // command (triggers operation)
// ============================================================
// Command register encoding
// ============================================================
// Command types (bits 3:0)
#define BAN_CMD_NOP 0x00
#define BAN_CMD_S2S_BLIT 0x01 // screen-to-screen blit
#define BAN_CMD_S2S_STRETCH 0x02 // screen-to-screen stretch blit
#define BAN_CMD_H2S_BLIT 0x03 // host-to-screen blit
#define BAN_CMD_RECTFILL 0x05 // rectangle fill
#define BAN_CMD_LINEDRAW 0x06 // line draw
#define BAN_CMD_POLYLINE 0x07 // polyline
// Command flags
#define BAN_CMD_INITIATE (1 << 4) // must be set to start operation
#define BAN_CMD_STIPPLE (1 << 8) // stipple line
#define BAN_CMD_CLIPSEL1 (1 << 9) // use clip1 instead of clip0
#define BAN_CMD_SRCCKENA (1 << 12) // source color key enable
#define BAN_CMD_DSTCKENA (1 << 13) // dest color key enable
#define BAN_CMD_MONOPAT (1 << 14) // mono pattern
#define BAN_CMD_SRCMONO (1 << 15) // source is monochrome
// ============================================================
// BPP format encodings (for srcFormat/dstFormat low bits)
// ============================================================
#define BAN_FMT_8BPP 1
#define BAN_FMT_16BPP 3
#define BAN_FMT_32BPP 5
// ============================================================
// Status register
// ============================================================
#define BAN_STATUS_BUSY_MASK 0x7FF // bits 0-10: engine busy
// ============================================================
// Hardware cursor registers
// ============================================================
#define BAN_VIDPROCCFG 0x5C // bit 27 = cursor enable
#define BAN_CURSORLOC 0x60 // X | Y<<16
#define BAN_CURSOR_ENABLE (1 << 27)
// ============================================================
// Launch area
// ============================================================
#define BAN_LAUNCH_OFFSET 0x80000 // offset from MMIO phys base
#define BAN_LAUNCH_MAP_SIZE 4096 // map 4KB of launch area
// ============================================================
// Misc constants
// ============================================================
#define BAN_MMIO_SIZE 32768 // BAR0: 32KB MMIO
#define BAN_MAX_IDLE_WAIT 1000000
#define BAN_ROP_COPY 0xCC
#define BAN_HW_CURSOR_SIZE 64
// ============================================================
// Private driver state
// ============================================================
typedef struct {
uint32_t lfbPhysAddr;
uint32_t mmioPhysAddr;
uint32_t vramSize;
int32_t bytesPerPixel;
int32_t screenPitch;
uint32_t bppFormat;
volatile uint32_t *mmio;
volatile uint32_t *launch;
DpmiMappingT mmioMap;
DpmiMappingT lfbMap;
DpmiMappingT launchMap;
} BansheePrivateT;
// ============================================================
// Prototypes
// ============================================================
static void bansheeBitBlt(AccelDriverT *drv, int32_t srcX, int32_t srcY, int32_t dstX, int32_t dstY, int32_t w, int32_t h);
static void bansheeColorExpand(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h, uint32_t fg, uint32_t bg);
static bool bansheeDetect(AccelDriverT *drv);
static void bansheeHostBlit(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h);
static bool bansheeInit(AccelDriverT *drv, const AccelModeRequestT *req);
static void bansheeLineDraw(AccelDriverT *drv, int32_t x1, int32_t y1, int32_t x2, int32_t y2, uint32_t color);
static void bansheeMoveCursor(AccelDriverT *drv, int32_t x, int32_t y);
static void bansheeRectFill(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, uint32_t color);
static void bansheeRectFillPat(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, const uint8_t *pattern, uint32_t fg, uint32_t bg);
static void bansheeSetClip(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h);
static void bansheeSetCursor(AccelDriverT *drv, const HwCursorImageT *image);
static void bansheeShowCursor(AccelDriverT *drv, bool visible);
static void bansheeShutdown(AccelDriverT *drv);
static void bansheeWaitIdle(AccelDriverT *drv);
static uint32_t bppToFormat(int32_t bpp);
static inline void bansheeWrite(BansheePrivateT *priv, uint32_t reg, uint32_t val) {
priv->mmio[reg / 4] = val;
}
static inline uint32_t bansheeRead(BansheePrivateT *priv, uint32_t reg) {
return priv->mmio[reg / 4];
}
// ============================================================
// Driver instance
// ============================================================
static BansheePrivateT sBansheePrivate;
static AccelDriverT sBansheeDriver = {
.name = "3dfx Banshee",
.chipFamily = "3dfx",
.caps = 0,
.privData = &sBansheePrivate,
.detect = bansheeDetect,
.init = bansheeInit,
.shutdown = bansheeShutdown,
.waitIdle = bansheeWaitIdle,
.setClip = bansheeSetClip,
.rectFill = bansheeRectFill,
.rectFillPat = bansheeRectFillPat,
.bitBlt = bansheeBitBlt,
.hostBlit = bansheeHostBlit,
.colorExpand = bansheeColorExpand,
.lineDraw = bansheeLineDraw,
.setCursor = bansheeSetCursor,
.moveCursor = bansheeMoveCursor,
.showCursor = bansheeShowCursor,
};
// ============================================================
// bansheeRegisterDriver
// ============================================================
void bansheeRegisterDriver(void) {
accelRegisterDriver(&sBansheeDriver);
}
// ============================================================
// bansheeBitBlt
// ============================================================
//
// Screen-to-screen BitBLT. The Banshee engine handles overlapping
// regions automatically when srcXY and dstXY are set correctly --
// the hardware determines the blit direction internally.
static void bansheeBitBlt(AccelDriverT *drv, int32_t srcX, int32_t srcY, int32_t dstX, int32_t dstY, int32_t w, int32_t h) {
BansheePrivateT *priv = (BansheePrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
bansheeWaitIdle(drv);
bansheeWrite(priv, BAN_SRCBASEADDR, 0);
bansheeWrite(priv, BAN_DSTBASEADDR, 0);
bansheeWrite(priv, BAN_SRCFORMAT, ((uint32_t)priv->screenPitch << 16) | priv->bppFormat);
bansheeWrite(priv, BAN_DSTFORMAT, ((uint32_t)priv->screenPitch << 16) | priv->bppFormat);
bansheeWrite(priv, BAN_ROP, BAN_ROP_COPY);
bansheeWrite(priv, BAN_SRCSIZE, (uint32_t)w | ((uint32_t)h << 16));
bansheeWrite(priv, BAN_SRCXY, (uint32_t)srcX | ((uint32_t)srcY << 16));
bansheeWrite(priv, BAN_DSTSIZE, (uint32_t)w | ((uint32_t)h << 16));
bansheeWrite(priv, BAN_DSTXY, (uint32_t)dstX | ((uint32_t)dstY << 16));
bansheeWrite(priv, BAN_COMMAND, BAN_CMD_S2S_BLIT | BAN_CMD_INITIATE);
}
// ============================================================
// bansheeColorExpand
// ============================================================
//
// Monochrome-to-color expansion using host-to-screen blit with
// the SRCMONO flag. Mono bitmap bits are expanded to fg/bg colors
// by the hardware. Data is fed as dwords through the launch area.
static void bansheeColorExpand(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h, uint32_t fg, uint32_t bg) {
BansheePrivateT *priv = (BansheePrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
int32_t bytesPerRow = (w + 7) / 8;
int32_t dwordsPerRow = (bytesPerRow + 3) / 4;
bansheeWaitIdle(drv);
bansheeWrite(priv, BAN_DSTBASEADDR, 0);
bansheeWrite(priv, BAN_DSTFORMAT, ((uint32_t)priv->screenPitch << 16) | priv->bppFormat);
bansheeWrite(priv, BAN_SRCFORMAT, ((uint32_t)bytesPerRow << 16) | BAN_FMT_8BPP);
bansheeWrite(priv, BAN_ROP, BAN_ROP_COPY);
bansheeWrite(priv, BAN_COLORFORE, fg);
bansheeWrite(priv, BAN_COLORBACK, bg);
bansheeWrite(priv, BAN_SRCSIZE, (uint32_t)w | ((uint32_t)h << 16));
bansheeWrite(priv, BAN_DSTSIZE, (uint32_t)w | ((uint32_t)h << 16));
bansheeWrite(priv, BAN_DSTXY, (uint32_t)dstX | ((uint32_t)dstY << 16));
bansheeWrite(priv, BAN_COMMAND, BAN_CMD_H2S_BLIT | BAN_CMD_INITIATE | BAN_CMD_SRCMONO);
// Feed mono data row by row through the launch area
for (int32_t row = 0; row < h; row++) {
const uint8_t *rowPtr = srcBuf + row * srcPitch;
for (int32_t dw = 0; dw < dwordsPerRow; dw++) {
uint32_t val = 0;
int32_t offset = dw * 4;
for (int32_t b = 0; b < 4; b++) {
if (offset + b < bytesPerRow) {
val |= (uint32_t)rowPtr[offset + b] << (b * 8);
}
}
priv->launch[0] = val;
}
}
}
// ============================================================
// bansheeDetect
// ============================================================
static bool bansheeDetect(AccelDriverT *drv) {
int32_t matchIdx;
if (!pciFindDeviceList(sBansheeDeviceIds, &drv->pciDev, &matchIdx)) {
return false;
}
switch (drv->pciDev.deviceId) {
case TDFX_BANSHEE:
drv->name = "3dfx Banshee";
break;
case TDFX_VOODOO3:
drv->name = "3dfx Voodoo3";
break;
default:
drv->name = "3dfx Banshee/Voodoo3";
break;
}
return true;
}
// ============================================================
// bansheeHostBlit
// ============================================================
//
// CPU-to-screen blit using host-to-screen command. Pixel data is
// fed as dwords through the launch area write-combining window.
static void bansheeHostBlit(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h) {
BansheePrivateT *priv = (BansheePrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
int32_t bytesPerRow = w * priv->bytesPerPixel;
int32_t dwordsPerRow = (bytesPerRow + 3) / 4;
bansheeWaitIdle(drv);
bansheeWrite(priv, BAN_DSTBASEADDR, 0);
bansheeWrite(priv, BAN_SRCBASEADDR, 0);
bansheeWrite(priv, BAN_SRCFORMAT, ((uint32_t)(w * priv->bytesPerPixel) << 16) | priv->bppFormat);
bansheeWrite(priv, BAN_DSTFORMAT, ((uint32_t)priv->screenPitch << 16) | priv->bppFormat);
bansheeWrite(priv, BAN_ROP, BAN_ROP_COPY);
bansheeWrite(priv, BAN_SRCSIZE, (uint32_t)w | ((uint32_t)h << 16));
bansheeWrite(priv, BAN_DSTSIZE, (uint32_t)w | ((uint32_t)h << 16));
bansheeWrite(priv, BAN_DSTXY, (uint32_t)dstX | ((uint32_t)dstY << 16));
bansheeWrite(priv, BAN_COMMAND, BAN_CMD_H2S_BLIT | BAN_CMD_INITIATE);
// Feed pixel data row by row through the launch area
for (int32_t row = 0; row < h; row++) {
const uint8_t *rowPtr = srcBuf + row * srcPitch;
for (int32_t dw = 0; dw < dwordsPerRow; dw++) {
uint32_t val = 0;
int32_t offset = dw * 4;
for (int32_t b = 0; b < 4; b++) {
if (offset + b < bytesPerRow) {
val |= (uint32_t)rowPtr[offset + b] << (b * 8);
}
}
priv->launch[0] = val;
}
}
}
// ============================================================
// bansheeInit
// ============================================================
static bool bansheeInit(AccelDriverT *drv, const AccelModeRequestT *req) {
BansheePrivateT *priv = (BansheePrivateT *)drv->privData;
// Read BARs
uint32_t bar0 = pciRead32(drv->pciDev.bus, drv->pciDev.dev,
drv->pciDev.func, PCI_BAR0);
uint32_t bar1 = pciRead32(drv->pciDev.bus, drv->pciDev.dev,
drv->pciDev.func, PCI_BAR1);
priv->mmioPhysAddr = bar0 & 0xFFFFFFF0;
priv->lfbPhysAddr = bar1 & 0xFFFFFFF0;
// Size the framebuffer BAR
priv->vramSize = pciSizeBar(drv->pciDev.bus, drv->pciDev.dev,
drv->pciDev.func, PCI_BAR1);
// Map MMIO control registers (32KB)
if (!dpmiMapFramebuffer(priv->mmioPhysAddr, BAN_MMIO_SIZE, &priv->mmioMap)) {
return false;
}
priv->mmio = (volatile uint32_t *)priv->mmioMap.ptr;
// Map launch area (4KB at MMIO phys + 0x80000)
if (!dpmiMapFramebuffer(priv->mmioPhysAddr + BAN_LAUNCH_OFFSET, BAN_LAUNCH_MAP_SIZE, &priv->launchMap)) {
dpmiUnmapFramebuffer(&priv->mmioMap);
return false;
}
priv->launch = (volatile uint32_t *)priv->launchMap.ptr;
// Find and set VESA mode
VesaModeResultT vesa;
if (!vesaFindAndSetMode(req->width, req->height, req->bpp, &vesa)) {
dpmiUnmapFramebuffer(&priv->launchMap);
dpmiUnmapFramebuffer(&priv->mmioMap);
return false;
}
// Map framebuffer
if (!dpmiMapFramebuffer(priv->lfbPhysAddr, priv->vramSize, &priv->lfbMap)) {
vgaRestoreTextMode();
dpmiUnmapFramebuffer(&priv->launchMap);
dpmiUnmapFramebuffer(&priv->mmioMap);
return false;
}
priv->bytesPerPixel = (vesa.bpp + 7) / 8;
priv->screenPitch = vesa.pitch;
priv->bppFormat = bppToFormat(vesa.bpp);
drv->mode.width = vesa.width;
drv->mode.height = vesa.height;
drv->mode.bpp = vesa.bpp;
drv->mode.pitch = vesa.pitch;
drv->mode.framebuffer = priv->lfbMap.ptr;
drv->mode.vramSize = priv->vramSize;
drv->mode.offscreenBase = vesa.pitch * vesa.height;
// Wait for engine idle before configuring
bansheeWaitIdle(drv);
// Set default engine state
bansheeWrite(priv, BAN_SRCBASEADDR, 0);
bansheeWrite(priv, BAN_DSTBASEADDR, 0);
bansheeWrite(priv, BAN_DSTFORMAT, ((uint32_t)priv->screenPitch << 16) | priv->bppFormat);
bansheeWrite(priv, BAN_SRCFORMAT, ((uint32_t)priv->screenPitch << 16) | priv->bppFormat);
bansheeWrite(priv, BAN_ROP, BAN_ROP_COPY);
bansheeWrite(priv, BAN_COMMANDEXTRA, 0);
drv->caps = ACAP_RECT_FILL
| ACAP_RECT_FILL_PAT
| ACAP_BITBLT
| ACAP_HOST_BLIT
| ACAP_COLOR_EXPAND
| ACAP_LINE_DRAW
| ACAP_HW_CURSOR
| ACAP_CLIP;
// Full screen clip
bansheeSetClip(drv, 0, 0, vesa.width, vesa.height);
return true;
}
// ============================================================
// bansheeLineDraw
// ============================================================
//
// Bresenham line draw with inclusive endpoints. The Banshee engine
// takes start/end XY coordinates directly via srcXY/dstXY registers.
static void bansheeLineDraw(AccelDriverT *drv, int32_t x1, int32_t y1, int32_t x2, int32_t y2, uint32_t color) {
BansheePrivateT *priv = (BansheePrivateT *)drv->privData;
bansheeWaitIdle(drv);
bansheeWrite(priv, BAN_DSTBASEADDR, 0);
bansheeWrite(priv, BAN_DSTFORMAT, ((uint32_t)priv->screenPitch << 16) | priv->bppFormat);
bansheeWrite(priv, BAN_ROP, BAN_ROP_COPY);
bansheeWrite(priv, BAN_COLORFORE, color);
bansheeWrite(priv, BAN_SRCXY, (uint32_t)x1 | ((uint32_t)y1 << 16));
bansheeWrite(priv, BAN_DSTXY, (uint32_t)x2 | ((uint32_t)y2 << 16));
bansheeWrite(priv, BAN_COMMAND, BAN_CMD_LINEDRAW | BAN_CMD_INITIATE);
}
// ============================================================
// bansheeMoveCursor
// ============================================================
static void bansheeMoveCursor(AccelDriverT *drv, int32_t x, int32_t y) {
BansheePrivateT *priv = (BansheePrivateT *)drv->privData;
if (x < 0) {
x = 0;
}
if (y < 0) {
y = 0;
}
bansheeWrite(priv, BAN_CURSORLOC, (uint32_t)x | ((uint32_t)y << 16));
}
// ============================================================
// bansheeRectFill
// ============================================================
//
// Solid rectangle fill using the Banshee RECTFILL command. The
// foreground color is set, coordinates and dimensions are loaded,
// and the command register triggers the fill.
static void bansheeRectFill(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, uint32_t color) {
BansheePrivateT *priv = (BansheePrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
bansheeWaitIdle(drv);
bansheeWrite(priv, BAN_DSTBASEADDR, 0);
bansheeWrite(priv, BAN_DSTFORMAT, ((uint32_t)priv->screenPitch << 16) | priv->bppFormat);
bansheeWrite(priv, BAN_ROP, BAN_ROP_COPY);
bansheeWrite(priv, BAN_COLORFORE, color);
bansheeWrite(priv, BAN_DSTSIZE, (uint32_t)w | ((uint32_t)h << 16));
bansheeWrite(priv, BAN_DSTXY, (uint32_t)x | ((uint32_t)y << 16));
bansheeWrite(priv, BAN_COMMAND, BAN_CMD_RECTFILL | BAN_CMD_INITIATE);
}
// ============================================================
// bansheeRectFillPat
// ============================================================
//
// 8x8 mono pattern fill using the Banshee RECTFILL command with
// BAN_CMD_MONOPAT. The pattern is 8 bytes (one per row, MSB-first),
// written to pattern0Alias and pattern1Alias as two 32-bit values.
// 1-bits use the foreground color, 0-bits use the background.
static void bansheeRectFillPat(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, const uint8_t *pattern, uint32_t fg, uint32_t bg) {
BansheePrivateT *priv = (BansheePrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
// Pack pattern rows 0-3 into PATTERN0 and rows 4-7 into PATTERN1
uint32_t pat0 = (uint32_t)pattern[0]
| ((uint32_t)pattern[1] << 8)
| ((uint32_t)pattern[2] << 16)
| ((uint32_t)pattern[3] << 24);
uint32_t pat1 = (uint32_t)pattern[4]
| ((uint32_t)pattern[5] << 8)
| ((uint32_t)pattern[6] << 16)
| ((uint32_t)pattern[7] << 24);
bansheeWaitIdle(drv);
bansheeWrite(priv, BAN_DSTBASEADDR, 0);
bansheeWrite(priv, BAN_DSTFORMAT, ((uint32_t)priv->screenPitch << 16) | priv->bppFormat);
bansheeWrite(priv, BAN_ROP, BAN_ROP_COPY);
bansheeWrite(priv, BAN_COLORFORE, fg);
bansheeWrite(priv, BAN_COLORBACK, bg);
bansheeWrite(priv, BAN_PATTERN0, pat0);
bansheeWrite(priv, BAN_PATTERN1, pat1);
bansheeWrite(priv, BAN_DSTSIZE, (uint32_t)w | ((uint32_t)h << 16));
bansheeWrite(priv, BAN_DSTXY, (uint32_t)x | ((uint32_t)y << 16));
bansheeWrite(priv, BAN_COMMAND, BAN_CMD_RECTFILL | BAN_CMD_INITIATE | BAN_CMD_MONOPAT);
}
// ============================================================
// bansheeSetClip
// ============================================================
static void bansheeSetClip(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h) {
BansheePrivateT *priv = (BansheePrivateT *)drv->privData;
bansheeWrite(priv, BAN_CLIP0MIN, (uint32_t)x | ((uint32_t)y << 16));
bansheeWrite(priv, BAN_CLIP0MAX, (uint32_t)(x + w) | ((uint32_t)(y + h) << 16));
}
// ============================================================
// bansheeSetCursor
// ============================================================
//
// The Banshee hardware cursor is a 64x64 two-color cursor stored
// in VRAM. The format is 2 bits per pixel: AND plane followed by
// XOR plane, packed as 64x64 = 1024 bytes per plane.
static void bansheeSetCursor(AccelDriverT *drv, const HwCursorImageT *image) {
BansheePrivateT *priv = (BansheePrivateT *)drv->privData;
if (!image) {
bansheeShowCursor(drv, false);
return;
}
bansheeWaitIdle(drv);
// Store cursor image at end of VRAM (1KB AND + 1KB XOR = 2KB)
uint32_t cursorOffset = priv->vramSize - 2048;
cursorOffset &= ~0x7FF; // align to 2KB
uint8_t *cursorMem = drv->mode.framebuffer + cursorOffset;
// Write AND mask then XOR mask, each 64x64 / 8 = 512 bytes
for (int32_t row = 0; row < BAN_HW_CURSOR_SIZE; row++) {
for (int32_t byteIdx = 0; byteIdx < 8; byteIdx++) {
int32_t srcIdx = row * 8 + byteIdx;
uint8_t andByte;
uint8_t xorByte;
if (row < image->height && byteIdx < (image->width + 7) / 8) {
andByte = image->andMask[srcIdx];
xorByte = image->xorMask[srcIdx];
} else {
andByte = 0xFF; // transparent
xorByte = 0x00;
}
cursorMem[row * 16 + byteIdx] = andByte;
cursorMem[row * 16 + byteIdx + 8] = xorByte;
}
}
}
// ============================================================
// bansheeShowCursor
// ============================================================
static void bansheeShowCursor(AccelDriverT *drv, bool visible) {
BansheePrivateT *priv = (BansheePrivateT *)drv->privData;
uint32_t vidProcCfg = bansheeRead(priv, BAN_VIDPROCCFG);
if (visible) {
vidProcCfg |= BAN_CURSOR_ENABLE;
} else {
vidProcCfg &= ~BAN_CURSOR_ENABLE;
}
bansheeWrite(priv, BAN_VIDPROCCFG, vidProcCfg);
}
// ============================================================
// bansheeShutdown
// ============================================================
static void bansheeShutdown(AccelDriverT *drv) {
BansheePrivateT *priv = (BansheePrivateT *)drv->privData;
bansheeShowCursor(drv, false);
vgaRestoreTextMode();
dpmiUnmapFramebuffer(&priv->launchMap);
dpmiUnmapFramebuffer(&priv->lfbMap);
dpmiUnmapFramebuffer(&priv->mmioMap);
priv->mmio = NULL;
priv->launch = NULL;
}
// ============================================================
// bansheeWaitIdle
// ============================================================
//
// Wait until the 2D engine is completely idle. Bits 0-10 of the
// status register must all be zero.
static void bansheeWaitIdle(AccelDriverT *drv) {
BansheePrivateT *priv = (BansheePrivateT *)drv->privData;
for (int32_t i = 0; i < BAN_MAX_IDLE_WAIT; i++) {
uint32_t stat = bansheeRead(priv, BAN_STATUS);
if (!(stat & BAN_STATUS_BUSY_MASK)) {
return;
}
}
}
// ============================================================
// bppToFormat
// ============================================================
//
// Convert bits-per-pixel to the Banshee srcFormat/dstFormat
// encoding for the low bits of those registers.
static uint32_t bppToFormat(int32_t bpp) {
switch (bpp) {
case 8:
return BAN_FMT_8BPP;
case 15:
case 16:
return BAN_FMT_16BPP;
case 32:
return BAN_FMT_32BPP;
default:
return BAN_FMT_16BPP;
}
}

732
cirrusGd54.c Normal file
View file

@ -0,0 +1,732 @@
// cirrusGd54.c -- Cirrus Logic GD5426/28/34/36/46/80 accelerated video driver
//
// Supports the Cirrus Logic GD54xx family of VGA controllers. These
// chips were extremely common in the early-to-mid 1990s, found in
// everything from budget desktops to laptops.
//
// The GD54xx BitBLT engine is accessed entirely through extended
// Graphics Controller (GR) registers at I/O ports 0x3CE/0x3CF.
// There is no MMIO option on the GD54xx series (unlike the later
// Laguna chips). The engine supports:
// - Screen-to-screen BitBLT
// - Solid rectangle fill
// - Color expansion (monochrome-to-color, for text)
// - 8x8 pattern fill
// - Transparent blit (color key)
// - Hardware cursor (32x32 on GD5426/28, 64x64 on GD5434+)
//
// Register unlock:
// Write 0x12 to SR6 (sequencer register 6) to unlock the Cirrus
// extended registers. Write 0x00 to re-lock.
//
// BLT engine registers (GR extended, indices 0x20-0x3F):
// All BLT parameters are set through the graphics controller
// index/data ports (0x3CE/0x3CF). Addresses are linear byte
// offsets into VRAM.
#include "accelVid.h"
#include "vgaCommon.h"
#include "pci.h"
#include <pc.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/nearptr.h>
// ============================================================
// Cirrus vendor/device IDs
// ============================================================
#define CL_VENDOR_ID 0x1013
#define CL_GD5426 0x0000 // ISA/VLB only, no PCI ID -- detected via probe
#define CL_GD5428 0x0000 // ISA/VLB only
#define CL_GD5429 0x00A0 // shared with 5434 on some boards
#define CL_GD5434 0x00A0
#define CL_GD5434_ALT 0x00A8
#define CL_GD5436 0x00AC
#define CL_GD5446 0x00B8
#define CL_GD5480 0x00BC
static const uint16_t sCirrusDeviceIds[] = {
CL_VENDOR_ID, CL_GD5434,
CL_VENDOR_ID, CL_GD5434_ALT,
CL_VENDOR_ID, CL_GD5436,
CL_VENDOR_ID, CL_GD5446,
CL_VENDOR_ID, CL_GD5480,
0, 0
};
// ============================================================
// Cirrus extended GR register indices for BLT engine
// ============================================================
#define CL_GR20_BLT_WIDTH_LO 0x20
#define CL_GR21_BLT_WIDTH_HI 0x21
#define CL_GR22_BLT_HEIGHT_LO 0x22
#define CL_GR23_BLT_HEIGHT_HI 0x23
#define CL_GR24_BLT_DST_PITCH_LO 0x24
#define CL_GR25_BLT_DST_PITCH_HI 0x25
#define CL_GR26_BLT_SRC_PITCH_LO 0x26
#define CL_GR27_BLT_SRC_PITCH_HI 0x27
#define CL_GR28_BLT_DST_ADDR_LO 0x28
#define CL_GR29_BLT_DST_ADDR_MID 0x29
#define CL_GR2A_BLT_DST_ADDR_HI 0x2A
#define CL_GR2C_BLT_SRC_ADDR_LO 0x2C
#define CL_GR2D_BLT_SRC_ADDR_MID 0x2D
#define CL_GR2E_BLT_SRC_ADDR_HI 0x2E
#define CL_GR30_BLT_MODE 0x30
#define CL_GR31_BLT_STATUS 0x31
#define CL_GR32_BLT_ROP 0x32
#define CL_GR33_BLT_MODE_EXT 0x33
#define CL_GR34_BLT_FGCOLOR_LO 0x34
#define CL_GR35_BLT_FGCOLOR_HI 0x35
#define CL_GR38_BLT_TRANS_COLOR_LO 0x38
#define CL_GR39_BLT_TRANS_COLOR_HI 0x39
#define CL_GR3A_BLT_TRANS_MASK_LO 0x3A
#define CL_GR3B_BLT_TRANS_MASK_HI 0x3B
// ============================================================
// Cirrus BLT mode bits (GR30)
// ============================================================
#define CL_BLT_DIR_BACKWARD 0x01 // blit direction backward
#define CL_BLT_SRC_SYSTEM 0x02 // source is system memory (CPU)
#define CL_BLT_SRC_PATTERN 0x04 // source is 8x8 pattern
#define CL_BLT_TRANSPARENT 0x08 // transparent background
#define CL_BLT_DST_SYSTEM 0x10 // destination is system memory
#define CL_BLT_COLOR_EXPAND 0x80 // monochrome color expansion
// ============================================================
// Cirrus BLT status bits (GR31)
// ============================================================
#define CL_BLT_START 0x02 // start BLT operation
#define CL_BLT_RESET 0x04 // reset BLT engine
#define CL_BLT_BUSY 0x01 // BLT engine busy (read)
// ============================================================
// Cirrus BLT ROP values (GR32)
// ============================================================
//
// The Cirrus ROP encoding is different from the S3/Windows ROP
// codes. These are the Cirrus-specific values.
#define CL_ROP_COPY 0x0D // dest = source
#define CL_ROP_PAT_COPY 0x0D // dest = pattern (same as copy in fill mode)
#define CL_ROP_XOR 0x59 // dest = src XOR dest
#define CL_ROP_AND 0x05 // dest = src AND dest
#define CL_ROP_OR 0x6D // dest = src OR dest
#define CL_ROP_ZERO 0x00 // dest = 0
#define CL_ROP_ONE 0x0B // dest = 1
// Cirrus sequencer unlock key
#define CL_SR6_UNLOCK 0x12
#define CL_SR6_LOCK 0x00
// Hardware cursor constants
#define CL_HW_CURSOR_SIZE 64 // 64x64 on GD5434+
#define CL_HW_CURSOR_BYTES 1024 // 64*64*2bpp / 8 = 1024
// Maximum wait iterations
#define CL_MAX_IDLE_WAIT 1000000
// ============================================================
// Private driver state
// ============================================================
typedef struct {
uint32_t lfbPhysAddr;
uint32_t vramSize;
uint32_t cursorOffset;
int32_t bytesPerPixel;
int32_t screenPitch;
DpmiMappingT lfbMapping;
bool is5434Plus; // true for GD5434 and later (64x64 cursor)
} CirrusPrivateT;
// ============================================================
// Prototypes
// ============================================================
static void clBitBlt(AccelDriverT *drv, int32_t srcX, int32_t srcY, int32_t dstX, int32_t dstY, int32_t w, int32_t h);
static void clColorExpand(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h, uint32_t fg, uint32_t bg);
static bool clDetect(AccelDriverT *drv);
static void clHostBlit(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h);
static bool clInit(AccelDriverT *drv, const AccelModeRequestT *req);
static void clMoveCursor(AccelDriverT *drv, int32_t x, int32_t y);
static void clRectFill(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, uint32_t color);
static void clSetClip(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h);
static void clSetCursor(AccelDriverT *drv, const HwCursorImageT *image);
static void clShowCursor(AccelDriverT *drv, bool visible);
static void clShutdown(AccelDriverT *drv);
static void clUnlockRegs(void);
static void clWaitIdle(AccelDriverT *drv);
// ============================================================
// Driver instance
// ============================================================
static CirrusPrivateT sCirrusPrivate;
static AccelDriverT sCirrusDriver = {
.name = "Cirrus Logic GD5434",
.chipFamily = "cirrus",
.caps = 0,
.privData = &sCirrusPrivate,
.detect = clDetect,
.init = clInit,
.shutdown = clShutdown,
.waitIdle = clWaitIdle,
.setClip = clSetClip,
.rectFill = clRectFill,
.rectFillPat = NULL,
.bitBlt = clBitBlt,
.hostBlit = clHostBlit,
.colorExpand = clColorExpand,
.lineDraw = NULL, // GD54xx has no hardware line draw
.setCursor = clSetCursor,
.moveCursor = clMoveCursor,
.showCursor = clShowCursor,
};
// ============================================================
// clRegisterDriver
// ============================================================
void clRegisterDriver(void) {
accelRegisterDriver(&sCirrusDriver);
}
// ============================================================
// clBitBlt
// ============================================================
//
// Screen-to-screen BitBLT. The Cirrus engine uses linear VRAM
// addresses for source and destination. Direction is controlled
// by the backward bit in GR30 -- for overlapping regions where
// dst > src, we must blit backward.
static void clBitBlt(AccelDriverT *drv, int32_t srcX, int32_t srcY, int32_t dstX, int32_t dstY, int32_t w, int32_t h) {
CirrusPrivateT *priv = (CirrusPrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
int32_t bpp = priv->bytesPerPixel;
int32_t pitch = priv->screenPitch;
// Calculate linear addresses
uint32_t srcAddr = srcY * pitch + srcX * bpp;
uint32_t dstAddr = dstY * pitch + dstX * bpp;
// Determine direction for overlapping blits
uint8_t mode = 0;
if (dstAddr > srcAddr) {
mode |= CL_BLT_DIR_BACKWARD;
// Adjust addresses to end of blit region
srcAddr += (h - 1) * pitch + (w - 1) * bpp;
dstAddr += (h - 1) * pitch + (w - 1) * bpp;
}
// Width in bytes minus 1
int32_t widthBytes = w * bpp - 1;
clWaitIdle(drv);
// Set up BLT parameters
vgaGfxWrite(CL_GR20_BLT_WIDTH_LO, widthBytes & 0xFF);
vgaGfxWrite(CL_GR21_BLT_WIDTH_HI, (widthBytes >> 8) & 0x1F);
vgaGfxWrite(CL_GR22_BLT_HEIGHT_LO, (h - 1) & 0xFF);
vgaGfxWrite(CL_GR23_BLT_HEIGHT_HI, ((h - 1) >> 8) & 0x07);
vgaGfxWrite(CL_GR24_BLT_DST_PITCH_LO, pitch & 0xFF);
vgaGfxWrite(CL_GR25_BLT_DST_PITCH_HI, (pitch >> 8) & 0x1F);
vgaGfxWrite(CL_GR26_BLT_SRC_PITCH_LO, pitch & 0xFF);
vgaGfxWrite(CL_GR27_BLT_SRC_PITCH_HI, (pitch >> 8) & 0x1F);
vgaGfxWrite(CL_GR28_BLT_DST_ADDR_LO, dstAddr & 0xFF);
vgaGfxWrite(CL_GR29_BLT_DST_ADDR_MID, (dstAddr >> 8) & 0xFF);
vgaGfxWrite(CL_GR2A_BLT_DST_ADDR_HI, (dstAddr >> 16) & 0x3F);
vgaGfxWrite(CL_GR2C_BLT_SRC_ADDR_LO, srcAddr & 0xFF);
vgaGfxWrite(CL_GR2D_BLT_SRC_ADDR_MID, (srcAddr >> 8) & 0xFF);
vgaGfxWrite(CL_GR2E_BLT_SRC_ADDR_HI, (srcAddr >> 16) & 0x3F);
vgaGfxWrite(CL_GR32_BLT_ROP, CL_ROP_COPY);
vgaGfxWrite(CL_GR30_BLT_MODE, mode);
// Start BLT
vgaGfxWrite(CL_GR31_BLT_STATUS, CL_BLT_START);
}
// ============================================================
// clColorExpand
// ============================================================
//
// Monochrome-to-color expansion. The source data is 1bpp bitmap
// in system memory, which gets transferred through the BLT engine
// with color expansion enabled. Each 1-bit becomes the foreground
// color, each 0-bit becomes the background color.
//
// The Cirrus color expand uses GR34/GR35 for the foreground color
// and the background is set by first doing a fill, or by using
// transparent mode with a pre-filled background.
static void clColorExpand(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h, uint32_t fg, uint32_t bg) {
CirrusPrivateT *priv = (CirrusPrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
int32_t bpp = priv->bytesPerPixel;
int32_t pitch = priv->screenPitch;
// First fill the destination with background color
clRectFill(drv, dstX, dstY, w, h, bg);
clWaitIdle(drv);
// Now do a transparent color expand for the foreground
uint32_t dstAddr = dstY * pitch + dstX * bpp;
int32_t widthBytes = w * bpp - 1;
// Set foreground color
vgaGfxWrite(CL_GR34_BLT_FGCOLOR_LO, fg & 0xFF);
vgaGfxWrite(CL_GR35_BLT_FGCOLOR_HI, (fg >> 8) & 0xFF);
// Set up BLT parameters
vgaGfxWrite(CL_GR20_BLT_WIDTH_LO, widthBytes & 0xFF);
vgaGfxWrite(CL_GR21_BLT_WIDTH_HI, (widthBytes >> 8) & 0x1F);
vgaGfxWrite(CL_GR22_BLT_HEIGHT_LO, (h - 1) & 0xFF);
vgaGfxWrite(CL_GR23_BLT_HEIGHT_HI, ((h - 1) >> 8) & 0x07);
vgaGfxWrite(CL_GR24_BLT_DST_PITCH_LO, pitch & 0xFF);
vgaGfxWrite(CL_GR25_BLT_DST_PITCH_HI, (pitch >> 8) & 0x1F);
// Source pitch for monochrome data
vgaGfxWrite(CL_GR26_BLT_SRC_PITCH_LO, srcPitch & 0xFF);
vgaGfxWrite(CL_GR27_BLT_SRC_PITCH_HI, (srcPitch >> 8) & 0x1F);
vgaGfxWrite(CL_GR28_BLT_DST_ADDR_LO, dstAddr & 0xFF);
vgaGfxWrite(CL_GR29_BLT_DST_ADDR_MID, (dstAddr >> 8) & 0xFF);
vgaGfxWrite(CL_GR2A_BLT_DST_ADDR_HI, (dstAddr >> 16) & 0x3F);
vgaGfxWrite(CL_GR32_BLT_ROP, CL_ROP_COPY);
vgaGfxWrite(CL_GR30_BLT_MODE, CL_BLT_COLOR_EXPAND | CL_BLT_SRC_SYSTEM | CL_BLT_TRANSPARENT);
// Start BLT
vgaGfxWrite(CL_GR31_BLT_STATUS, CL_BLT_START);
// Feed monochrome data through PIX_TRANS equivalent
// On Cirrus, system-memory source data is written to the
// BLT engine via the VGA aperture at 0xA0000 (mapped via DPMI).
// Each row of monochrome data is padded to a dword boundary.
int32_t srcBytesPerRow = (w + 7) / 8;
int32_t padBytesPerRow = (srcBytesPerRow + 3) & ~3;
for (int32_t row = 0; row < h; row++) {
const uint8_t *rowData = srcBuf + row * srcPitch;
for (int32_t i = 0; i < padBytesPerRow; i++) {
uint8_t byte = (i < srcBytesPerRow) ? rowData[i] : 0;
outportb(0x3CF, byte); // data through GR register space
}
}
}
// ============================================================
// clDetect
// ============================================================
static bool clDetect(AccelDriverT *drv) {
int32_t matchIdx;
if (!pciFindDeviceList(sCirrusDeviceIds, &drv->pciDev, &matchIdx)) {
return false;
}
switch (drv->pciDev.deviceId) {
case CL_GD5434:
case CL_GD5434_ALT:
drv->name = "Cirrus Logic GD5434";
break;
case CL_GD5436:
drv->name = "Cirrus Logic GD5436";
break;
case CL_GD5446:
drv->name = "Cirrus Logic GD5446";
break;
case CL_GD5480:
drv->name = "Cirrus Logic GD5480";
break;
default:
drv->name = "Cirrus Logic GD54xx";
break;
}
return true;
}
// ============================================================
// clHostBlit
// ============================================================
//
// CPU-to-screen blit. Transfers pixel data from system memory to
// the framebuffer via the BLT engine with CL_BLT_SRC_SYSTEM mode.
// Source data is fed byte-by-byte through the GR data port (0x3CF),
// with each row padded to a dword (4-byte) boundary.
static void clHostBlit(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h) {
CirrusPrivateT *priv = (CirrusPrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
int32_t bpp = priv->bytesPerPixel;
int32_t pitch = priv->screenPitch;
uint32_t dstAddr = dstY * pitch + dstX * bpp;
int32_t widthBytes = w * bpp - 1;
int32_t rowBytes = w * bpp;
int32_t padBytesPerRow = (rowBytes + 3) & ~3;
clWaitIdle(drv);
// Set up BLT parameters
vgaGfxWrite(CL_GR20_BLT_WIDTH_LO, widthBytes & 0xFF);
vgaGfxWrite(CL_GR21_BLT_WIDTH_HI, (widthBytes >> 8) & 0x1F);
vgaGfxWrite(CL_GR22_BLT_HEIGHT_LO, (h - 1) & 0xFF);
vgaGfxWrite(CL_GR23_BLT_HEIGHT_HI, ((h - 1) >> 8) & 0x07);
vgaGfxWrite(CL_GR24_BLT_DST_PITCH_LO, pitch & 0xFF);
vgaGfxWrite(CL_GR25_BLT_DST_PITCH_HI, (pitch >> 8) & 0x1F);
vgaGfxWrite(CL_GR28_BLT_DST_ADDR_LO, dstAddr & 0xFF);
vgaGfxWrite(CL_GR29_BLT_DST_ADDR_MID, (dstAddr >> 8) & 0xFF);
vgaGfxWrite(CL_GR2A_BLT_DST_ADDR_HI, (dstAddr >> 16) & 0x3F);
// BLT mode: source from CPU
vgaGfxWrite(CL_GR30_BLT_MODE, CL_BLT_SRC_SYSTEM);
vgaGfxWrite(CL_GR32_BLT_ROP, CL_ROP_COPY);
// Start BLT
vgaGfxWrite(CL_GR31_BLT_STATUS, CL_BLT_START);
// Feed pixel data row by row, padded to dword boundary
for (int32_t row = 0; row < h; row++) {
const uint8_t *rowData = srcBuf + row * srcPitch;
for (int32_t i = 0; i < padBytesPerRow; i++) {
uint8_t byte = (i < rowBytes) ? rowData[i] : 0;
outportb(0x3CF, byte);
}
}
}
// ============================================================
// clInit
// ============================================================
static bool clInit(AccelDriverT *drv, const AccelModeRequestT *req) {
CirrusPrivateT *priv = (CirrusPrivateT *)drv->privData;
memset(priv, 0, sizeof(*priv));
priv->is5434Plus = (drv->pciDev.deviceId != CL_GD5429);
// Get VRAM size and LFB address from PCI BAR0
uint32_t bar0 = pciRead32(drv->pciDev.bus, drv->pciDev.dev, drv->pciDev.func, PCI_BAR0);
priv->lfbPhysAddr = bar0 & 0xFFFFFFF0;
priv->vramSize = pciSizeBar(drv->pciDev.bus, drv->pciDev.dev, drv->pciDev.func, PCI_BAR0);
// Unlock Cirrus extended registers
clUnlockRegs();
// Detect VRAM size from SR0F if BAR sizing was unreasonable
uint8_t sr0f = vgaSeqRead(0x0F);
uint32_t ramFromSr = 0;
switch ((sr0f >> 3) & 0x03) {
case 0: ramFromSr = 256 * 1024; break;
case 1: ramFromSr = 512 * 1024; break;
case 2: ramFromSr = 1024 * 1024; break;
case 3: ramFromSr = 2048 * 1024; break;
}
// GD5434+ can have 4MB
if (priv->is5434Plus && (sr0f & 0x80)) {
ramFromSr = 4096 * 1024;
}
if (priv->vramSize < 256 * 1024 || priv->vramSize > 64 * 1024 * 1024) {
priv->vramSize = ramFromSr;
}
// Find and set VESA mode
VesaModeResultT vesa;
if (!vesaFindAndSetMode(req->width, req->height, req->bpp, &vesa)) {
return false;
}
// Map LFB via DPMI
if (!dpmiMapFramebuffer(priv->lfbPhysAddr, priv->vramSize, &priv->lfbMapping)) {
vgaRestoreTextMode();
return false;
}
priv->bytesPerPixel = (vesa.bpp + 7) / 8;
priv->screenPitch = vesa.pitch;
drv->mode.width = vesa.width;
drv->mode.height = vesa.height;
drv->mode.bpp = vesa.bpp;
drv->mode.pitch = vesa.pitch;
drv->mode.framebuffer = priv->lfbMapping.ptr;
drv->mode.vramSize = priv->vramSize;
drv->mode.offscreenBase = vesa.pitch * vesa.height;
// Re-unlock after mode set
clUnlockRegs();
// Reset BLT engine
vgaGfxWrite(CL_GR31_BLT_STATUS, CL_BLT_RESET);
vgaGfxWrite(CL_GR31_BLT_STATUS, 0x00);
// Set up cursor at end of VRAM
priv->cursorOffset = priv->vramSize - CL_HW_CURSOR_BYTES;
priv->cursorOffset &= ~(CL_HW_CURSOR_BYTES - 1);
drv->caps = ACAP_RECT_FILL
| ACAP_BITBLT
| ACAP_HOST_BLIT
| ACAP_COLOR_EXPAND
| ACAP_HW_CURSOR;
return true;
}
// ============================================================
// clMoveCursor
// ============================================================
//
// Moves the hardware cursor. On Cirrus GD5434+, cursor position
// is set through sequencer extended registers SR10-SR13.
static void clMoveCursor(AccelDriverT *drv, int32_t x, int32_t y) {
(void)drv;
if (x < 0) { x = 0; }
if (y < 0) { y = 0; }
vgaSeqWrite(0x10, x & 0xFF);
vgaSeqWrite(0x11, (x >> 8) & 0x07);
vgaSeqWrite(0x12, y & 0xFF);
vgaSeqWrite(0x13, (y >> 8) & 0x07);
}
// ============================================================
// clRectFill
// ============================================================
//
// Solid rectangle fill using the BLT engine. The Cirrus engine
// doesn't have a dedicated "fill" command -- instead, we set up
// a 1-pixel source and use pattern-fill mode, or we set the
// source to a single-color region. The simplest approach is to
// use the color expansion with all-ones data, but for solid fills
// the most efficient method is to use the ROP with the foreground
// color register.
static void clRectFill(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, uint32_t color) {
CirrusPrivateT *priv = (CirrusPrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
int32_t bpp = priv->bytesPerPixel;
int32_t pitch = priv->screenPitch;
uint32_t dstAddr = y * pitch + x * bpp;
int32_t widthBytes = w * bpp - 1;
clWaitIdle(drv);
// Set foreground color for fill
vgaGfxWrite(CL_GR34_BLT_FGCOLOR_LO, color & 0xFF);
vgaGfxWrite(CL_GR35_BLT_FGCOLOR_HI, (color >> 8) & 0xFF);
vgaGfxWrite(CL_GR20_BLT_WIDTH_LO, widthBytes & 0xFF);
vgaGfxWrite(CL_GR21_BLT_WIDTH_HI, (widthBytes >> 8) & 0x1F);
vgaGfxWrite(CL_GR22_BLT_HEIGHT_LO, (h - 1) & 0xFF);
vgaGfxWrite(CL_GR23_BLT_HEIGHT_HI, ((h - 1) >> 8) & 0x07);
vgaGfxWrite(CL_GR24_BLT_DST_PITCH_LO, pitch & 0xFF);
vgaGfxWrite(CL_GR25_BLT_DST_PITCH_HI, (pitch >> 8) & 0x1F);
vgaGfxWrite(CL_GR28_BLT_DST_ADDR_LO, dstAddr & 0xFF);
vgaGfxWrite(CL_GR29_BLT_DST_ADDR_MID, (dstAddr >> 8) & 0xFF);
vgaGfxWrite(CL_GR2A_BLT_DST_ADDR_HI, (dstAddr >> 16) & 0x3F);
// Source = foreground color, color expand with all 1s
vgaGfxWrite(CL_GR32_BLT_ROP, CL_ROP_COPY);
vgaGfxWrite(CL_GR30_BLT_MODE, CL_BLT_COLOR_EXPAND | CL_BLT_SRC_SYSTEM);
// Source pitch for monochrome data (1 byte per row of fill)
vgaGfxWrite(CL_GR26_BLT_SRC_PITCH_LO, 0);
vgaGfxWrite(CL_GR27_BLT_SRC_PITCH_HI, 0);
// Start BLT
vgaGfxWrite(CL_GR31_BLT_STATUS, CL_BLT_START);
// Feed all-ones data (every pixel is foreground color)
int32_t srcBytesPerRow = (w + 7) / 8;
int32_t padBytesPerRow = (srcBytesPerRow + 3) & ~3;
for (int32_t row = 0; row < h; row++) {
for (int32_t i = 0; i < padBytesPerRow; i++) {
outportb(0x3CF, 0xFF);
}
}
}
// ============================================================
// clSetClip
// ============================================================
//
// The GD54xx BLT engine doesn't have hardware scissor registers.
// Clipping must be done in software by adjusting coordinates
// before issuing BLT commands. This is a no-op placeholder.
static void clSetClip(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h) {
(void)drv;
(void)x;
(void)y;
(void)w;
(void)h;
}
// ============================================================
// clSetCursor
// ============================================================
//
// Uploads cursor image to VRAM. Cirrus GD5434+ uses 64x64
// 2bpp cursor stored at a 1KB-aligned VRAM address. The address
// is set via SR2D (high) and SR2C (low) in units of 256 bytes.
// Format: interleaved AND/XOR planes, 16 bytes per row
// (8 bytes AND, 8 bytes XOR).
static void clSetCursor(AccelDriverT *drv, const HwCursorImageT *image) {
CirrusPrivateT *priv = (CirrusPrivateT *)drv->privData;
if (!image) {
clShowCursor(drv, false);
return;
}
clWaitIdle(drv);
uint8_t *cursorMem = drv->mode.framebuffer + priv->cursorOffset;
for (int32_t row = 0; row < CL_HW_CURSOR_SIZE; row++) {
for (int32_t byte = 0; byte < 8; byte++) {
int32_t srcIdx = row * 8 + byte;
uint8_t andByte;
uint8_t xorByte;
if (row < image->height && byte < (image->width + 7) / 8) {
andByte = image->andMask[srcIdx];
xorByte = image->xorMask[srcIdx];
} else {
andByte = 0xFF;
xorByte = 0x00;
}
cursorMem[row * 16 + byte] = andByte;
cursorMem[row * 16 + byte + 8] = xorByte;
}
}
// Set cursor address (in units of 256 bytes)
uint16_t addrUnits = priv->cursorOffset / 256;
vgaSeqWrite(0x2C, addrUnits & 0xFF);
vgaSeqWrite(0x2D, (addrUnits >> 8) & 0x3F);
}
// ============================================================
// clShowCursor
// ============================================================
//
// Enable/disable the hardware cursor via SR12 bit 0 on Cirrus.
static void clShowCursor(AccelDriverT *drv, bool visible) {
(void)drv;
uint8_t sr12 = vgaSeqRead(0x12);
if (visible) {
sr12 |= 0x01;
} else {
sr12 &= ~0x01;
}
vgaSeqWrite(0x12, sr12);
}
// ============================================================
// clShutdown
// ============================================================
static void clShutdown(AccelDriverT *drv) {
CirrusPrivateT *priv = (CirrusPrivateT *)drv->privData;
clShowCursor(drv, false);
dpmiUnmapFramebuffer(&priv->lfbMapping);
vgaRestoreTextMode();
}
// ============================================================
// clUnlockRegs
// ============================================================
//
// Unlock Cirrus extended registers by writing 0x12 to SR6.
static void clUnlockRegs(void) {
vgaSeqWrite(0x06, CL_SR6_UNLOCK);
}
// ============================================================
// clWaitIdle
// ============================================================
//
// Wait for the BLT engine to finish. Poll GR31 bit 0.
static void clWaitIdle(AccelDriverT *drv) {
(void)drv;
for (int32_t i = 0; i < CL_MAX_IDLE_WAIT; i++) {
if (!(vgaGfxRead(CL_GR31_BLT_STATUS) & CL_BLT_BUSY)) {
return;
}
}
}

585
cirrusLaguna.c Normal file
View file

@ -0,0 +1,585 @@
// cirrusLaguna.c -- Cirrus Logic Laguna GD5462/5464/5465 accelerated video driver
//
// Supports the Cirrus Logic Laguna family: GD5462, GD5464, and GD5465.
// These are MMIO-based PCI accelerators completely different from the
// older GD54xx (Alpine) series -- different register set, different
// BLT engine, and different programming model.
//
// The Laguna 2D engine features:
// - Solid rectangle fill
// - Screen-to-screen BitBLT
// - CPU-to-screen blit (host data window)
// - Monochrome color expansion (text/glyph rendering)
// - Hardware clip rectangle
// - 64x64 hardware cursor
//
// BAR layout:
// BAR0 = MMIO registers (4KB)
// BAR1 = linear framebuffer
//
// The 2D engine is programmed via MMIO registers starting at offset
// 0x0100. Commands are initiated by writing to the COMMAND register
// at 0x0118. Host data (for CPU-to-screen and color expand) is fed
// through a 512-byte window at MMIO + 0x0200.
#include "accelVid.h"
#include "vgaCommon.h"
#include "pci.h"
#include <pc.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/nearptr.h>
// ============================================================
// Cirrus Laguna vendor/device IDs
// ============================================================
#define CIRRUS_VENDOR_ID 0x1013
#define LAGUNA_GD5462 0x00D0
#define LAGUNA_GD5464 0x00D4
#define LAGUNA_GD5465 0x00D6
static const uint16_t sLagunaDeviceIds[] = {
CIRRUS_VENDOR_ID, LAGUNA_GD5462,
CIRRUS_VENDOR_ID, LAGUNA_GD5464,
CIRRUS_VENDOR_ID, LAGUNA_GD5465,
0, 0
};
// ============================================================
// MMIO register offsets (from BAR0)
// ============================================================
// 0x0000-0x00FF: VGA compatible registers (mapped)
// 2D engine registers
#define LAG_CONTROL 0x0100 // engine control / status
#define LAG_FGCOLOR 0x0104 // foreground color
#define LAG_BGCOLOR 0x0108 // background color
#define LAG_DSTXY 0x010C // destination XY (X | Y<<16)
#define LAG_SRCXY 0x0110 // source XY (X | Y<<16)
#define LAG_DSTSIZE 0x0114 // destination size (W | H<<16)
#define LAG_COMMAND 0x0118 // command register (triggers operation)
#define LAG_PITCH 0x011C // pitch (srcPitch<<16 | dstPitch)
#define LAG_PAT0 0x0120 // 8x8 mono pattern (first 32 bits)
#define LAG_PAT1 0x0124 // 8x8 mono pattern (second 32 bits)
#define LAG_CLIPLT 0x0130 // clip left/top (left | top<<16)
#define LAG_CLIPRB 0x0134 // clip right/bottom (right | bottom<<16)
#define LAG_HOST_DATA 0x0200 // host data window (512 bytes)
// Hardware cursor registers
#define LAG_CUR_CTRL 0x0300 // cursor control (bit 0 = enable)
#define LAG_CUR_X 0x0304 // cursor X position
#define LAG_CUR_Y 0x0308 // cursor Y position
#define LAG_CUR_ADDR 0x030C // cursor VRAM address
// ============================================================
// Status register bits
// ============================================================
#define LAG_STATUS_BUSY 0x01 // engine busy (bit 0 of CONTROL)
// ============================================================
// Command register encoding
// ============================================================
// Operation codes (bits 3:0)
#define LAG_CMD_NOP 0x00
#define LAG_CMD_BITBLT 0x01 // screen-to-screen BitBlt
#define LAG_CMD_RECTFILL 0x02 // solid rectangle fill
#define LAG_CMD_HOST_BLIT 0x03 // host-to-screen blit
#define LAG_CMD_LINE 0x04 // line draw
#define LAG_CMD_COLOR_EXPAND 0x05 // mono color expansion from host
// ROP encoding (bits 7:4)
#define LAG_CMD_ROP_SHIFT 4
// Direction and option bits
#define LAG_CMD_DIR_REV 0x0100 // bit 8: reverse direction
#define LAG_CMD_PAT_EN 0x0200 // bit 9: pattern enable
#define LAG_CMD_TRANS_EN 0x0400 // bit 10: transparency enable
#define LAG_CMD_COLOREXP 0x0800 // bit 11: color expand (mono source)
// Common ROP values (shifted into bits 7:4)
#define LAG_ROP_COPY (0x0C << LAG_CMD_ROP_SHIFT) // 0xCC = dest = src
#define LAG_ROP_PAT (0x0F << LAG_CMD_ROP_SHIFT) // 0xF0 = dest = pat
// ============================================================
// Constants
// ============================================================
#define LAG_MMIO_SIZE 4096
#define LAG_MAX_IDLE_WAIT 1000000
#define LAG_HW_CURSOR_SIZE 64
#define LAG_HW_CURSOR_BYTES 1024 // 64x64x2bpp / 8 = 1024
// ============================================================
// Private driver state
// ============================================================
typedef struct {
uint32_t lfbPhysAddr;
uint32_t mmioPhysAddr;
uint32_t vramSize;
uint32_t cursorOffset;
int32_t bytesPerPixel;
int32_t screenPitch;
volatile uint32_t *mmio;
DpmiMappingT lfbMapping;
DpmiMappingT mmioMapping;
} LagunaPrivateT;
// ============================================================
// Prototypes
// ============================================================
static void lagBitBlt(AccelDriverT *drv, int32_t srcX, int32_t srcY, int32_t dstX, int32_t dstY, int32_t w, int32_t h);
static void lagColorExpand(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h, uint32_t fg, uint32_t bg);
static bool lagDetect(AccelDriverT *drv);
static void lagHostBlit(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h);
static bool lagInit(AccelDriverT *drv, const AccelModeRequestT *req);
static void lagMoveCursor(AccelDriverT *drv, int32_t x, int32_t y);
static void lagRectFill(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, uint32_t color);
static void lagSetClip(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h);
static void lagSetCursor(AccelDriverT *drv, const HwCursorImageT *image);
static void lagShowCursor(AccelDriverT *drv, bool visible);
static void lagShutdown(AccelDriverT *drv);
static void lagWaitIdle(AccelDriverT *drv);
static inline void lagWrite(LagunaPrivateT *priv, uint32_t reg, uint32_t val) {
priv->mmio[reg / 4] = val;
}
static inline uint32_t lagRead(LagunaPrivateT *priv, uint32_t reg) {
return priv->mmio[reg / 4];
}
// ============================================================
// Driver instance
// ============================================================
static LagunaPrivateT sLagunaPrivate;
static AccelDriverT sLagunaDriver = {
.name = "Cirrus Logic Laguna",
.chipFamily = "cirrus-laguna",
.caps = 0,
.privData = &sLagunaPrivate,
.detect = lagDetect,
.init = lagInit,
.shutdown = lagShutdown,
.waitIdle = lagWaitIdle,
.setClip = lagSetClip,
.rectFill = lagRectFill,
.rectFillPat = NULL,
.bitBlt = lagBitBlt,
.hostBlit = lagHostBlit,
.colorExpand = lagColorExpand,
.lineDraw = NULL,
.setCursor = lagSetCursor,
.moveCursor = lagMoveCursor,
.showCursor = lagShowCursor,
};
// ============================================================
// lagunaRegisterDriver
// ============================================================
void lagunaRegisterDriver(void) {
accelRegisterDriver(&sLagunaDriver);
}
// ============================================================
// lagBitBlt
// ============================================================
//
// Screen-to-screen BitBLT. Handles overlapping regions by
// selecting forward or reverse direction based on src/dst
// relationship.
static void lagBitBlt(AccelDriverT *drv, int32_t srcX, int32_t srcY, int32_t dstX, int32_t dstY, int32_t w, int32_t h) {
LagunaPrivateT *priv = (LagunaPrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
lagWaitIdle(drv);
// Determine direction for overlapping blits
uint32_t cmd = LAG_CMD_BITBLT | LAG_ROP_COPY;
if (dstY > srcY || (dstY == srcY && dstX > srcX)) {
// Reverse direction: start from bottom-right
cmd |= LAG_CMD_DIR_REV;
lagWrite(priv, LAG_SRCXY, (uint32_t)(srcX + w - 1) | ((uint32_t)(srcY + h - 1) << 16));
lagWrite(priv, LAG_DSTXY, (uint32_t)(dstX + w - 1) | ((uint32_t)(dstY + h - 1) << 16));
} else {
// Forward direction: start from top-left
lagWrite(priv, LAG_SRCXY, (uint32_t)srcX | ((uint32_t)srcY << 16));
lagWrite(priv, LAG_DSTXY, (uint32_t)dstX | ((uint32_t)dstY << 16));
}
lagWrite(priv, LAG_DSTSIZE, (uint32_t)(w - 1) | ((uint32_t)(h - 1) << 16));
lagWrite(priv, LAG_PITCH, ((uint32_t)priv->screenPitch << 16) | (uint32_t)priv->screenPitch);
// Trigger operation
lagWrite(priv, LAG_COMMAND, cmd);
}
// ============================================================
// lagColorExpand
// ============================================================
//
// Monochrome color expansion: convert 1bpp bitmap data to
// full-color pixels using the hardware color expand engine.
// Set foreground/background colors, then feed mono data
// through the host data window at MMIO + 0x0200.
static void lagColorExpand(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h, uint32_t fg, uint32_t bg) {
LagunaPrivateT *priv = (LagunaPrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
int32_t bytesPerRow = (w + 7) / 8;
int32_t dwordsPerRow = (bytesPerRow + 3) / 4;
lagWaitIdle(drv);
lagWrite(priv, LAG_FGCOLOR, fg);
lagWrite(priv, LAG_BGCOLOR, bg);
lagWrite(priv, LAG_DSTXY, (uint32_t)dstX | ((uint32_t)dstY << 16));
lagWrite(priv, LAG_DSTSIZE, (uint32_t)(w - 1) | ((uint32_t)(h - 1) << 16));
lagWrite(priv, LAG_PITCH, ((uint32_t)priv->screenPitch << 16) | (uint32_t)priv->screenPitch);
// Start color expand operation
lagWrite(priv, LAG_COMMAND, LAG_CMD_COLOR_EXPAND | LAG_ROP_COPY | LAG_CMD_COLOREXP);
// Feed mono data row by row through host data window
volatile uint32_t *hostWin = (volatile uint32_t *)((volatile uint8_t *)priv->mmio + LAG_HOST_DATA);
for (int32_t row = 0; row < h; row++) {
const uint8_t *rowPtr = srcBuf + row * srcPitch;
for (int32_t dw = 0; dw < dwordsPerRow; dw++) {
uint32_t val = 0;
int32_t offset = dw * 4;
for (int32_t b = 0; b < 4; b++) {
if (offset + b < bytesPerRow) {
val |= (uint32_t)rowPtr[offset + b] << (b * 8);
}
}
lagWaitIdle(drv);
hostWin[0] = val;
}
}
}
// ============================================================
// lagDetect
// ============================================================
static bool lagDetect(AccelDriverT *drv) {
int32_t matchIdx;
if (!pciFindDeviceList(sLagunaDeviceIds, &drv->pciDev, &matchIdx)) {
return false;
}
switch (drv->pciDev.deviceId) {
case LAGUNA_GD5462:
drv->name = "Cirrus Logic Laguna GD5462";
break;
case LAGUNA_GD5464:
drv->name = "Cirrus Logic Laguna GD5464";
break;
case LAGUNA_GD5465:
drv->name = "Cirrus Logic Laguna GD5465";
break;
default:
drv->name = "Cirrus Logic Laguna";
break;
}
return true;
}
// ============================================================
// lagHostBlit
// ============================================================
//
// CPU-to-screen blit: transfer pixel data from system RAM to
// VRAM through the host data window at MMIO + 0x0200. Each
// row is padded to a dword boundary.
static void lagHostBlit(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h) {
LagunaPrivateT *priv = (LagunaPrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
int32_t bytesPerRow = w * priv->bytesPerPixel;
int32_t dwordsPerRow = (bytesPerRow + 3) / 4;
lagWaitIdle(drv);
lagWrite(priv, LAG_DSTXY, (uint32_t)dstX | ((uint32_t)dstY << 16));
lagWrite(priv, LAG_DSTSIZE, (uint32_t)(w - 1) | ((uint32_t)(h - 1) << 16));
lagWrite(priv, LAG_PITCH, ((uint32_t)priv->screenPitch << 16) | (uint32_t)priv->screenPitch);
// Start host-to-screen blit
lagWrite(priv, LAG_COMMAND, LAG_CMD_HOST_BLIT | LAG_ROP_COPY);
// Feed pixel data row by row through host data window
volatile uint32_t *hostWin = (volatile uint32_t *)((volatile uint8_t *)priv->mmio + LAG_HOST_DATA);
for (int32_t row = 0; row < h; row++) {
const uint8_t *rowPtr = srcBuf + row * srcPitch;
for (int32_t dw = 0; dw < dwordsPerRow; dw++) {
uint32_t val = 0;
int32_t offset = dw * 4;
for (int32_t b = 0; b < 4; b++) {
if (offset + b < bytesPerRow) {
val |= (uint32_t)rowPtr[offset + b] << (b * 8);
}
}
lagWaitIdle(drv);
hostWin[0] = val;
}
}
}
// ============================================================
// lagInit
// ============================================================
static bool lagInit(AccelDriverT *drv, const AccelModeRequestT *req) {
LagunaPrivateT *priv = (LagunaPrivateT *)drv->privData;
// Read BARs from PCI config space
uint32_t bar0 = pciRead32(drv->pciDev.bus, drv->pciDev.dev,
drv->pciDev.func, PCI_BAR0);
uint32_t bar1 = pciRead32(drv->pciDev.bus, drv->pciDev.dev,
drv->pciDev.func, PCI_BAR1);
priv->mmioPhysAddr = bar0 & 0xFFFFFFF0;
priv->lfbPhysAddr = bar1 & 0xFFFFFFF0;
// Size the framebuffer BAR
priv->vramSize = pciSizeBar(drv->pciDev.bus, drv->pciDev.dev,
drv->pciDev.func, PCI_BAR1);
// Map MMIO control registers (4KB)
if (!dpmiMapFramebuffer(priv->mmioPhysAddr, LAG_MMIO_SIZE, &priv->mmioMapping)) {
return false;
}
priv->mmio = (volatile uint32_t *)priv->mmioMapping.ptr;
// Find and set VESA mode
VesaModeResultT vesa;
if (!vesaFindAndSetMode(req->width, req->height, req->bpp, &vesa)) {
return false;
}
// Map framebuffer
if (!dpmiMapFramebuffer(priv->lfbPhysAddr, priv->vramSize, &priv->lfbMapping)) {
dpmiUnmapFramebuffer(&priv->mmioMapping);
vgaRestoreTextMode();
return false;
}
priv->bytesPerPixel = (vesa.bpp + 7) / 8;
priv->screenPitch = vesa.pitch;
drv->mode.width = vesa.width;
drv->mode.height = vesa.height;
drv->mode.bpp = vesa.bpp;
drv->mode.pitch = vesa.pitch;
drv->mode.framebuffer = priv->lfbMapping.ptr;
drv->mode.vramSize = priv->vramSize;
drv->mode.offscreenBase = vesa.pitch * vesa.height;
// Wait for engine idle before configuring
lagWaitIdle(drv);
// Set up hardware cursor at end of VRAM
priv->cursorOffset = priv->vramSize - LAG_HW_CURSOR_BYTES;
priv->cursorOffset &= ~(LAG_HW_CURSOR_BYTES - 1);
drv->caps = ACAP_RECT_FILL
| ACAP_BITBLT
| ACAP_HOST_BLIT
| ACAP_COLOR_EXPAND
| ACAP_HW_CURSOR
| ACAP_CLIP;
// Set full-screen clip rectangle
lagSetClip(drv, 0, 0, vesa.width, vesa.height);
return true;
}
// ============================================================
// lagMoveCursor
// ============================================================
static void lagMoveCursor(AccelDriverT *drv, int32_t x, int32_t y) {
LagunaPrivateT *priv = (LagunaPrivateT *)drv->privData;
if (x < 0) { x = 0; }
if (y < 0) { y = 0; }
lagWrite(priv, LAG_CUR_X, (uint32_t)x);
lagWrite(priv, LAG_CUR_Y, (uint32_t)y);
}
// ============================================================
// lagRectFill
// ============================================================
//
// Solid rectangle fill using command 0x02. Sets the foreground
// color, destination position, and size, then triggers the fill.
static void lagRectFill(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, uint32_t color) {
LagunaPrivateT *priv = (LagunaPrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
lagWaitIdle(drv);
lagWrite(priv, LAG_FGCOLOR, color);
lagWrite(priv, LAG_DSTXY, (uint32_t)x | ((uint32_t)y << 16));
lagWrite(priv, LAG_DSTSIZE, (uint32_t)(w - 1) | ((uint32_t)(h - 1) << 16));
lagWrite(priv, LAG_PITCH, ((uint32_t)priv->screenPitch << 16) | (uint32_t)priv->screenPitch);
// Trigger solid fill
lagWrite(priv, LAG_COMMAND, LAG_CMD_RECTFILL | LAG_ROP_COPY);
}
// ============================================================
// lagSetClip
// ============================================================
static void lagSetClip(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h) {
LagunaPrivateT *priv = (LagunaPrivateT *)drv->privData;
lagWrite(priv, LAG_CLIPLT, (uint32_t)x | ((uint32_t)y << 16));
lagWrite(priv, LAG_CLIPRB, (uint32_t)(x + w - 1) | ((uint32_t)(y + h - 1) << 16));
}
// ============================================================
// lagSetCursor
// ============================================================
//
// Upload a hardware cursor image to VRAM at the cursor offset.
// The Laguna uses a 64x64 2bpp AND/XOR format stored in VRAM.
static void lagSetCursor(AccelDriverT *drv, const HwCursorImageT *image) {
LagunaPrivateT *priv = (LagunaPrivateT *)drv->privData;
if (!image) {
lagShowCursor(drv, false);
return;
}
lagWaitIdle(drv);
uint8_t *cursorMem = drv->mode.framebuffer + priv->cursorOffset;
for (int32_t row = 0; row < LAG_HW_CURSOR_SIZE; row++) {
for (int32_t byte = 0; byte < 8; byte++) {
int32_t srcIdx = row * 8 + byte;
uint8_t andByte;
uint8_t xorByte;
if (row < image->height && byte < (image->width + 7) / 8) {
andByte = image->andMask[srcIdx];
xorByte = image->xorMask[srcIdx];
} else {
andByte = 0xFF; // transparent
xorByte = 0x00;
}
cursorMem[row * 16 + byte] = andByte;
cursorMem[row * 16 + byte + 8] = xorByte;
}
}
// Set cursor VRAM address
lagWrite(priv, LAG_CUR_ADDR, priv->cursorOffset);
}
// ============================================================
// lagShowCursor
// ============================================================
static void lagShowCursor(AccelDriverT *drv, bool visible) {
LagunaPrivateT *priv = (LagunaPrivateT *)drv->privData;
uint32_t ctrl = lagRead(priv, LAG_CUR_CTRL);
if (visible) {
ctrl |= 0x01;
} else {
ctrl &= ~0x01;
}
lagWrite(priv, LAG_CUR_CTRL, ctrl);
}
// ============================================================
// lagShutdown
// ============================================================
static void lagShutdown(AccelDriverT *drv) {
LagunaPrivateT *priv = (LagunaPrivateT *)drv->privData;
lagShowCursor(drv, false);
dpmiUnmapFramebuffer(&priv->mmioMapping);
dpmiUnmapFramebuffer(&priv->lfbMapping);
vgaRestoreTextMode();
}
// ============================================================
// lagWaitIdle
// ============================================================
//
// Poll the CONTROL register until bit 0 (engine busy) clears.
// Bounded by LAG_MAX_IDLE_WAIT iterations to avoid hangs on
// hardware failure.
static void lagWaitIdle(AccelDriverT *drv) {
LagunaPrivateT *priv = (LagunaPrivateT *)drv->privData;
for (int32_t i = 0; i < LAG_MAX_IDLE_WAIT; i++) {
uint32_t stat = lagRead(priv, LAG_CONTROL);
if (!(stat & LAG_STATUS_BUSY)) {
return;
}
}
}

869
demo.c Normal file
View file

@ -0,0 +1,869 @@
// demo.c -- Test/demo application for accelerated video drivers
//
// Detects the video card, sets a graphics mode, exercises the
// hardware acceleration (fill rects, blit, draw lines, color
// expand), and provides a simple interactive benchmark comparing
// hardware vs software rendering speed.
//
// Usage: demo [width height bpp]
// Defaults to 640x480x16 if no arguments given.
//
// Press ESC to exit, 'b' to run benchmark, space to cycle tests.
#include "accelVid.h"
#include "pci.h"
#include <dpmi.h>
#include <go32.h>
#include <pc.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/nearptr.h>
#include <time.h>
// Scancode for ESC key
#define KEY_ESC 0x01
// Default video mode
#define DEFAULT_WIDTH 640
#define DEFAULT_HEIGHT 480
#define DEFAULT_BPP 16
// Benchmark iteration counts
#define BENCH_FILL_COUNT 1000
#define BENCH_BLIT_COUNT 1000
#define BENCH_LINE_COUNT 5000
#define BENCH_EXPAND_COUNT 500
#define BENCH_HBLIT_COUNT 1000
#define BENCH_PATFILL_COUNT 1000
// Host blit test pattern dimensions
#define HBLIT_PAT_W 100
#define HBLIT_PAT_H 100
// ============================================================
// External driver registration functions
// ============================================================
extern void atiRegisterDriver(void);
extern void bansheeRegisterDriver(void);
extern void clRegisterDriver(void);
extern void etRegisterDriver(void);
extern void lagunaRegisterDriver(void);
extern void mgaRegisterDriver(void);
extern void nvRegisterDriver(void);
extern void s3RegisterDriver(void);
extern void sisRegisterDriver(void);
extern void tridentRegisterDriver(void);
// ============================================================
// Prototypes
// ============================================================
static void demoBenchmark(AccelDriverT *drv);
static void demoBitBlt(AccelDriverT *drv);
static void demoColorExpand(AccelDriverT *drv);
static void demoFillRects(AccelDriverT *drv);
static void demoHostBlit(AccelDriverT *drv);
static void demoLines(AccelDriverT *drv);
static void demoPatternFill(AccelDriverT *drv);
static bool isKeyPressed(void);
static uint32_t packColor16(uint8_t r, uint8_t g, uint8_t b);
static uint8_t readKey(void);
static void softFillRect(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, uint32_t color);
// ============================================================
// demoBenchmark
// ============================================================
//
// Runs timed comparisons of hardware vs software rendering for
// rectangle fills and blits. Prints results to stdout after
// restoring text mode.
static void demoBenchmark(AccelDriverT *drv) {
int32_t screenW = drv->mode.width;
int32_t screenH = drv->mode.height;
// Benchmark hardware rect fill
clock_t hwFillStart = clock();
for (int32_t i = 0; i < BENCH_FILL_COUNT; i++) {
int32_t x = (i * 37) % (screenW - 100);
int32_t y = (i * 53) % (screenH - 100);
drv->rectFill(drv, x, y, 100, 100, packColor16(i & 0xFF, (i >> 3) & 0xFF, (i >> 6) & 0xFF));
}
drv->waitIdle(drv);
clock_t hwFillEnd = clock();
// Benchmark software rect fill
clock_t swFillStart = clock();
for (int32_t i = 0; i < BENCH_FILL_COUNT; i++) {
int32_t x = (i * 37) % (screenW - 100);
int32_t y = (i * 53) % (screenH - 100);
softFillRect(drv, x, y, 100, 100, packColor16(i & 0xFF, (i >> 3) & 0xFF, (i >> 6) & 0xFF));
}
clock_t swFillEnd = clock();
// Benchmark hardware bitblt
clock_t hwBltStart = clock();
for (int32_t i = 0; i < BENCH_BLIT_COUNT; i++) {
int32_t sx = (i * 31) % (screenW - 100);
int32_t sy = (i * 47) % (screenH - 100);
int32_t dx = (i * 43) % (screenW - 100);
int32_t dy = (i * 59) % (screenH - 100);
drv->bitBlt(drv, sx, sy, dx, dy, 100, 100);
}
drv->waitIdle(drv);
clock_t hwBltEnd = clock();
// Benchmark hardware line draw
clock_t hwLineStart = clock();
for (int32_t i = 0; i < BENCH_LINE_COUNT; i++) {
int32_t x1 = (i * 37) % screenW;
int32_t y1 = (i * 53) % screenH;
int32_t x2 = (i * 71) % screenW;
int32_t y2 = (i * 89) % screenH;
drv->lineDraw(drv, x1, y1, x2, y2, packColor16(255, 255, 255));
}
drv->waitIdle(drv);
clock_t hwLineEnd = clock();
// Benchmark host blit (CPU-to-screen)
int32_t bytesPerPix = (drv->mode.bpp + 7) / 8;
int32_t hblitPitch = HBLIT_PAT_W * bytesPerPix;
uint8_t *hblitBuf = (uint8_t *)malloc(hblitPitch * HBLIT_PAT_H);
clock_t hwHblitEnd = 0;
clock_t hwHblitStart = 0;
bool hblitValid = false;
if (hblitBuf) {
// Fill buffer with a checkerboard pattern
for (int32_t row = 0; row < HBLIT_PAT_H; row++) {
for (int32_t col = 0; col < HBLIT_PAT_W; col++) {
uint32_t color;
if ((row / 8 + col / 8) & 1) {
color = packColor16(255, 255, 0);
} else {
color = packColor16(0, 0, 128);
}
if (bytesPerPix == 2) {
((uint16_t *)(hblitBuf + row * hblitPitch))[col] = (uint16_t)color;
} else if (bytesPerPix == 4) {
((uint32_t *)(hblitBuf + row * hblitPitch))[col] = color;
} else {
hblitBuf[row * hblitPitch + col] = (uint8_t)color;
}
}
}
hwHblitStart = clock();
for (int32_t i = 0; i < BENCH_HBLIT_COUNT; i++) {
int32_t dx = (i * 37) % (screenW - HBLIT_PAT_W);
int32_t dy = (i * 53) % (screenH - HBLIT_PAT_H);
drv->hostBlit(drv, hblitBuf, hblitPitch, dx, dy, HBLIT_PAT_W, HBLIT_PAT_H);
}
drv->waitIdle(drv);
hwHblitEnd = clock();
hblitValid = true;
free(hblitBuf);
}
// Benchmark pattern fill
static const uint8_t benchPattern[8] = {
0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55
};
clock_t hwPatStart = clock();
for (int32_t i = 0; i < BENCH_PATFILL_COUNT; i++) {
int32_t px = (i * 37) % (screenW - 100);
int32_t py = (i * 53) % (screenH - 100);
drv->rectFillPat(drv, px, py, 100, 100, benchPattern, packColor16(255, 255, 255), packColor16(0, 0, 0));
}
drv->waitIdle(drv);
clock_t hwPatEnd = clock();
// Calculate times in milliseconds
double hwFillMs = (double)(hwFillEnd - hwFillStart) * 1000.0 / CLOCKS_PER_SEC;
double swFillMs = (double)(swFillEnd - swFillStart) * 1000.0 / CLOCKS_PER_SEC;
double hwBltMs = (double)(hwBltEnd - hwBltStart) * 1000.0 / CLOCKS_PER_SEC;
double hwLineMs = (double)(hwLineEnd - hwLineStart) * 1000.0 / CLOCKS_PER_SEC;
double hwHblitMs = (double)(hwHblitEnd - hwHblitStart) * 1000.0 / CLOCKS_PER_SEC;
double hwPatMs = (double)(hwPatEnd - hwPatStart) * 1000.0 / CLOCKS_PER_SEC;
// Store results, then restore text mode to print
accelShutdown(drv);
printf("\n=== Benchmark Results ===\n\n");
printf("Rectangle Fill (%d x 100x100):\n", BENCH_FILL_COUNT);
printf(" Hardware: %.1f ms (%.0f rects/sec)\n",
hwFillMs, BENCH_FILL_COUNT * 1000.0 / hwFillMs);
printf(" Software: %.1f ms (%.0f rects/sec)\n",
swFillMs, BENCH_FILL_COUNT * 1000.0 / swFillMs);
if (swFillMs > 0) {
printf(" Speedup: %.1fx\n", swFillMs / hwFillMs);
}
printf("\nBitBlt (%d x 100x100 screen-to-screen):\n", BENCH_BLIT_COUNT);
printf(" Hardware: %.1f ms (%.0f blits/sec)\n",
hwBltMs, BENCH_BLIT_COUNT * 1000.0 / hwBltMs);
printf("\nLine Draw (%d lines):\n", BENCH_LINE_COUNT);
printf(" Hardware: %.1f ms (%.0f lines/sec)\n",
hwLineMs, BENCH_LINE_COUNT * 1000.0 / hwLineMs);
if (hblitValid) {
printf("\nHost Blit (%d x %dx%d CPU-to-screen):\n",
BENCH_HBLIT_COUNT, HBLIT_PAT_W, HBLIT_PAT_H);
printf(" Hardware: %.1f ms (%.0f blits/sec)\n",
hwHblitMs, BENCH_HBLIT_COUNT * 1000.0 / hwHblitMs);
}
printf("\nPattern Fill (%d x 100x100):\n", BENCH_PATFILL_COUNT);
printf(" Hardware: %.1f ms (%.0f fills/sec)\n",
hwPatMs, BENCH_PATFILL_COUNT * 1000.0 / hwPatMs);
printf("\nPress any key to exit...\n");
readKey();
}
// ============================================================
// demoBitBlt
// ============================================================
//
// Demonstrates screen-to-screen BitBLT by filling colored
// rectangles and then copying them around the screen.
static void demoBitBlt(AccelDriverT *drv) {
int32_t screenW = drv->mode.width;
int32_t screenH = drv->mode.height;
// Clear screen
drv->rectFill(drv, 0, 0, screenW, screenH, 0);
// Draw some source rectangles
drv->rectFill(drv, 10, 10, 100, 100, packColor16(255, 0, 0));
drv->rectFill(drv, 120, 10, 100, 100, packColor16(0, 255, 0));
drv->rectFill(drv, 230, 10, 100, 100, packColor16(0, 0, 255));
drv->rectFill(drv, 340, 10, 100, 100, packColor16(255, 255, 0));
drv->waitIdle(drv);
// Copy them diagonally across the screen
for (int32_t i = 0; i < 5; i++) {
int32_t offsetY = 120 + i * 60;
if (offsetY + 100 > screenH) {
break;
}
drv->bitBlt(drv, 10, 10, 10 + i * 30, offsetY, 430, 100);
}
drv->waitIdle(drv);
}
// ============================================================
// demoColorExpand
// ============================================================
//
// Demonstrates monochrome color expansion by rendering text-like
// patterns. Creates a simple 8x16 glyph and renders it repeatedly.
static void demoColorExpand(AccelDriverT *drv) {
int32_t screenW = drv->mode.width;
int32_t screenH = drv->mode.height;
// Clear screen
drv->rectFill(drv, 0, 0, screenW, screenH, packColor16(0, 0, 128));
drv->waitIdle(drv);
// 8x16 glyph bitmaps for several characters
static const uint8_t glyphA[16] = {
0x00, 0x18, 0x3C, 0x66, 0x66, 0xC3, 0xC3, 0xFF,
0xFF, 0xC3, 0xC3, 0xC3, 0xC3, 0xC3, 0x00, 0x00
};
static const uint8_t glyphB[16] = {
0x00, 0xFC, 0xC6, 0xC6, 0xC6, 0xFC, 0xC6, 0xC3,
0xC3, 0xC3, 0xC6, 0xFC, 0x00, 0x00, 0x00, 0x00
};
static const uint8_t glyphC[16] = {
0x00, 0x3E, 0x63, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0,
0xC0, 0xC0, 0x63, 0x3E, 0x00, 0x00, 0x00, 0x00
};
static const uint8_t glyphD[16] = {
0x00, 0xFC, 0xC6, 0xC3, 0xC3, 0xC3, 0xC3, 0xC3,
0xC3, 0xC3, 0xC6, 0xFC, 0x00, 0x00, 0x00, 0x00
};
static const uint8_t glyphE[16] = {
0x00, 0xFF, 0xC0, 0xC0, 0xC0, 0xFE, 0xC0, 0xC0,
0xC0, 0xC0, 0xC0, 0xFF, 0x00, 0x00, 0x00, 0x00
};
static const uint8_t glyphF[16] = {
0x00, 0xFF, 0xC0, 0xC0, 0xC0, 0xFE, 0xC0, 0xC0,
0xC0, 0xC0, 0xC0, 0xC0, 0x00, 0x00, 0x00, 0x00
};
static const uint8_t *glyphs[6] = {
glyphA, glyphB, glyphC, glyphD, glyphE, glyphF
};
#define NUM_GLYPHS 6
// Color pairs for different rows (foreground/background)
static const uint8_t colorPairs[][6] = {
// R G B R G B (fg, then bg)
{255, 255, 255, 0, 0, 128}, // white on dark blue
{255, 255, 0, 0, 0, 0}, // yellow on black
{ 0, 255, 0, 0, 64, 0}, // green on dark green
{255, 128, 0, 64, 0, 0}, // orange on dark red
{ 0, 255, 255, 0, 0, 64}, // cyan on navy
{255, 0, 255, 32, 0, 32}, // magenta on dark purple
};
#define NUM_COLOR_PAIRS 6
int32_t cols = screenW / 8;
int32_t rows = screenH / 16;
for (int32_t row = 0; row < rows; row++) {
int32_t pairIdx = row % NUM_COLOR_PAIRS;
const uint8_t *pair = colorPairs[pairIdx];
uint32_t fg = packColor16(pair[0], pair[1], pair[2]);
uint32_t bg = packColor16(pair[3], pair[4], pair[5]);
for (int32_t col = 0; col < cols; col++) {
const uint8_t *glyph = glyphs[(row + col) % NUM_GLYPHS];
drv->colorExpand(drv, glyph, 1,
col * 8, row * 16, 8, 16, fg, bg);
}
}
drv->waitIdle(drv);
#undef NUM_GLYPHS
#undef NUM_COLOR_PAIRS
}
// ============================================================
// demoFillRects
// ============================================================
//
// Demonstrates hardware rectangle fill with various colors
// and sizes. Draws a pattern of overlapping rectangles.
static void demoFillRects(AccelDriverT *drv) {
int32_t screenW = drv->mode.width;
int32_t screenH = drv->mode.height;
// Clear screen to dark blue
drv->rectFill(drv, 0, 0, screenW, screenH, packColor16(0, 0, 64));
drv->waitIdle(drv);
// Draw concentric rectangles
int32_t colors[][3] = {
{255, 0, 0},
{0, 255, 0},
{0, 0, 255},
{255, 255, 0},
{255, 0, 255},
{0, 255, 255},
{255, 128, 0},
{128, 0, 255}
};
int32_t numColors = 8;
int32_t cx = screenW / 2;
int32_t cy = screenH / 2;
for (int32_t i = 0; i < numColors; i++) {
int32_t size = 200 - i * 20;
if (size < 10) {
break;
}
uint32_t color = packColor16(colors[i][0], colors[i][1], colors[i][2]);
drv->rectFill(drv, cx - size / 2, cy - size / 2, size, size, color);
}
// Draw a grid of small rectangles
for (int32_t y = 10; y < screenH - 30; y += 25) {
for (int32_t x = 10; x < 150; x += 25) {
uint32_t color = packColor16((x * 7) & 0xFF, (y * 3) & 0xFF, ((x + y) * 5) & 0xFF);
drv->rectFill(drv, x, y, 20, 20, color);
}
}
// Draw grid on right side too
for (int32_t y = 10; y < screenH - 30; y += 25) {
for (int32_t x = screenW - 160; x < screenW - 10; x += 25) {
uint32_t color = packColor16((x * 3) & 0xFF, (y * 7) & 0xFF, ((x + y) * 2) & 0xFF);
drv->rectFill(drv, x, y, 20, 20, color);
}
}
drv->waitIdle(drv);
}
// ============================================================
// demoHostBlit
// ============================================================
//
// Demonstrates CPU-to-screen blit by creating a colorful gradient
// pattern in system RAM, then tiling copies across the screen.
static void demoHostBlit(AccelDriverT *drv) {
int32_t screenW = drv->mode.width;
int32_t screenH = drv->mode.height;
// Clear screen
drv->rectFill(drv, 0, 0, screenW, screenH, 0);
drv->waitIdle(drv);
// Create a gradient tile in system RAM
int32_t tileW = 64;
int32_t tileH = 64;
int32_t bytesPerPix = (drv->mode.bpp + 7) / 8;
int32_t tilePitch = tileW * bytesPerPix;
uint8_t *tileBuf = (uint8_t *)malloc(tilePitch * tileH);
if (!tileBuf) {
return;
}
// Fill tile with a radial gradient pattern
int32_t cx = tileW / 2;
int32_t cy = tileH / 2;
for (int32_t row = 0; row < tileH; row++) {
for (int32_t col = 0; col < tileW; col++) {
int32_t dx = col - cx;
int32_t dy = row - cy;
int32_t dist = dx * dx + dy * dy;
// Map distance to color -- creates concentric rings
uint8_t r = (dist * 7) & 0xFF;
uint8_t g = (dist * 3 + col * 4) & 0xFF;
uint8_t b = (row * 8 + col * 2) & 0xFF;
uint32_t color = packColor16(r, g, b);
if (bytesPerPix == 2) {
((uint16_t *)(tileBuf + row * tilePitch))[col] = (uint16_t)color;
} else if (bytesPerPix == 4) {
((uint32_t *)(tileBuf + row * tilePitch))[col] = color;
} else {
tileBuf[row * tilePitch + col] = (uint8_t)color;
}
}
}
// Tile the pattern across the screen
for (int32_t y = 0; y + tileH <= screenH; y += tileH) {
for (int32_t x = 0; x + tileW <= screenW; x += tileW) {
drv->hostBlit(drv, tileBuf, tilePitch, x, y, tileW, tileH);
}
}
drv->waitIdle(drv);
// Draw a border around each tile using rect fills for contrast
uint32_t borderColor = packColor16(255, 255, 255);
for (int32_t y = 0; y + tileH <= screenH; y += tileH) {
drv->rectFill(drv, 0, y, screenW, 1, borderColor);
}
for (int32_t x = 0; x + tileW <= screenW; x += tileW) {
drv->rectFill(drv, x, 0, 1, screenH, borderColor);
}
drv->waitIdle(drv);
free(tileBuf);
}
// ============================================================
// demoLines
// ============================================================
//
// Demonstrates hardware line drawing with a starburst pattern.
static void demoLines(AccelDriverT *drv) {
int32_t screenW = drv->mode.width;
int32_t screenH = drv->mode.height;
// Clear screen
drv->rectFill(drv, 0, 0, screenW, screenH, 0);
drv->waitIdle(drv);
int32_t cx = screenW / 2;
int32_t cy = screenH / 2;
// Draw starburst from center
for (int32_t i = 0; i < 360; i += 3) {
// Simple integer approximation of sin/cos using a lookup
// approach. For a demo, we just use the endpoint calculation.
int32_t dx = 0;
int32_t dy = 0;
// Approximate angle -> direction
int32_t radius = (screenH / 2) - 10;
int32_t angle = i;
// Crude trig via quadrant decomposition
int32_t quadrant = (angle / 90) % 4;
int32_t subAngle = angle % 90;
// Linear interpolation within each quadrant (good enough for demo)
int32_t frac = subAngle * radius / 90;
int32_t comp = radius - frac;
switch (quadrant) {
case 0: dx = frac; dy = -comp; break;
case 1: dx = comp; dy = frac; break;
case 2: dx = -frac; dy = comp; break;
case 3: dx = -comp; dy = -frac; break;
}
uint32_t color = packColor16(
(i * 3) & 0xFF,
(i * 5 + 100) & 0xFF,
(i * 7 + 50) & 0xFF
);
drv->lineDraw(drv, cx, cy, cx + dx, cy + dy, color);
}
// Draw border rectangle with lines
uint32_t white = packColor16(255, 255, 255);
drv->lineDraw(drv, 0, 0, screenW - 1, 0, white);
drv->lineDraw(drv, screenW - 1, 0, screenW - 1, screenH - 1, white);
drv->lineDraw(drv, screenW - 1, screenH - 1, 0, screenH - 1, white);
drv->lineDraw(drv, 0, screenH - 1, 0, 0, white);
drv->waitIdle(drv);
}
// ============================================================
// demoPatternFill
// ============================================================
//
// Demonstrates 8x8 pattern fills with several distinct patterns
// drawn side by side in colored rectangles.
static void demoPatternFill(AccelDriverT *drv) {
int32_t screenW = drv->mode.width;
int32_t screenH = drv->mode.height;
// Clear screen to dark gray
drv->rectFill(drv, 0, 0, screenW, screenH, packColor16(32, 32, 32));
drv->waitIdle(drv);
// Define several 8x8 patterns
static const uint8_t patCheckerboard[8] = {
0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55, 0xAA, 0x55
};
static const uint8_t patCrosshatch[8] = {
0xFF, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
};
static const uint8_t patDiagStripes[8] = {
0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80
};
static const uint8_t patDots[8] = {
0x00, 0x22, 0x00, 0x88, 0x00, 0x22, 0x00, 0x88
};
static const uint8_t patHorzStripes[8] = {
0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00
};
static const uint8_t patVertStripes[8] = {
0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA
};
struct {
const uint8_t *pattern;
uint32_t fg;
uint32_t bg;
} patterns[] = {
{patCheckerboard, packColor16(255, 255, 255), packColor16(0, 0, 0)},
{patCrosshatch, packColor16(255, 255, 0), packColor16(0, 0, 128)},
{patDiagStripes, packColor16(0, 255, 0), packColor16(0, 64, 0)},
{patDots, packColor16(255, 0, 0), packColor16(64, 0, 0)},
{patHorzStripes, packColor16(0, 255, 255), packColor16(0, 0, 64)},
{patVertStripes, packColor16(255, 0, 255), packColor16(64, 0, 64)},
};
int32_t numPatterns = 6;
// Arrange patterns in a 3x2 grid
int32_t margin = 20;
int32_t spacing = 10;
int32_t cellW = (screenW - 2 * margin - (3 - 1) * spacing) / 3;
int32_t cellH = (screenH - 2 * margin - (2 - 1) * spacing) / 2;
for (int32_t i = 0; i < numPatterns; i++) {
int32_t gridCol = i % 3;
int32_t gridRow = i / 3;
int32_t x = margin + gridCol * (cellW + spacing);
int32_t y = margin + gridRow * (cellH + spacing);
drv->rectFillPat(drv, x, y, cellW, cellH,
patterns[i].pattern,
patterns[i].fg, patterns[i].bg);
}
drv->waitIdle(drv);
}
// ============================================================
// isKeyPressed
// ============================================================
//
// Non-blocking check for a keypress using BIOS INT 16h.
static bool isKeyPressed(void) {
__dpmi_regs r;
memset(&r, 0, sizeof(r));
r.h.ah = 0x11; // check for extended keystroke
__dpmi_int(0x16, &r);
return !(r.x.flags & 0x40); // ZF clear = key available
}
// ============================================================
// main
// ============================================================
int main(int argc, char *argv[]) {
int32_t reqW = DEFAULT_WIDTH;
int32_t reqH = DEFAULT_HEIGHT;
int32_t reqBpp = DEFAULT_BPP;
if (argc >= 4) {
reqW = atoi(argv[1]);
reqH = atoi(argv[2]);
reqBpp = atoi(argv[3]);
}
printf("DOS Accelerated Video Driver Demo\n");
printf("Requested mode: %ldx%ldx%ld\n\n", (long)reqW, (long)reqH, (long)reqBpp);
// Register all available drivers
atiRegisterDriver();
bansheeRegisterDriver();
clRegisterDriver();
etRegisterDriver();
lagunaRegisterDriver();
mgaRegisterDriver();
nvRegisterDriver();
s3RegisterDriver();
sisRegisterDriver();
tridentRegisterDriver();
// Detect hardware
printf("Scanning PCI bus for supported video hardware...\n");
AccelDriverT *drv = accelDetect();
if (!drv) {
printf("No supported video hardware found.\n");
printf("\nPCI video devices present:\n");
// Enumerate and display all VGA-class PCI devices for diagnostics
for (int32_t bus = 0; bus < 256; bus++) {
for (int32_t dev = 0; dev < 32; dev++) {
uint16_t vid = pciRead16(bus, dev, 0, PCI_VENDOR_ID);
if (vid == 0xFFFF) {
continue;
}
uint8_t baseClass = pciRead8(bus, dev, 0, PCI_BASE_CLASS);
if (baseClass == PCI_CLASS_DISPLAY) {
uint16_t did = pciRead16(bus, dev, 0, PCI_DEVICE_ID);
printf(" %02lX:%02lX.0 vendor=%04X device=%04X\n",
(long)bus, (long)dev, vid, did);
}
}
}
return 1;
}
// Initialize with requested mode
AccelModeRequestT modeReq;
modeReq.width = reqW;
modeReq.height = reqH;
modeReq.bpp = reqBpp;
if (!accelInit(drv, &modeReq)) {
printf("Failed to initialize video driver.\n");
return 1;
}
printf("\nDriver: %s\n", accelGetName(drv));
printf("Mode: %ldx%ldx%ld (pitch=%ld)\n",
(long)drv->mode.width, (long)drv->mode.height,
(long)drv->mode.bpp, (long)drv->mode.pitch);
printf("VRAM: %lu KB\n", (unsigned long)(drv->mode.vramSize / 1024));
printf("\nPress any key to start demos...\n");
printf(" SPACE = next demo\n");
printf(" B = benchmark\n");
printf(" ESC = exit\n");
readKey();
// Run demos in a loop
int32_t currentDemo = 0;
int32_t numDemos = 6;
bool running = true;
while (running) {
switch (currentDemo) {
case 0:
demoFillRects(drv);
break;
case 1:
demoBitBlt(drv);
break;
case 2:
demoLines(drv);
break;
case 3:
demoColorExpand(drv);
break;
case 4:
demoHostBlit(drv);
break;
case 5:
demoPatternFill(drv);
break;
}
// Wait for keypress
while (!isKeyPressed()) {
// spin
}
uint8_t key = readKey();
switch (key) {
case 0x01: // ESC
running = false;
break;
case 0x30: // 'b'
demoBenchmark(drv);
return 0; // benchmark already shut down the driver
case 0x39: // space
currentDemo = (currentDemo + 1) % numDemos;
break;
default:
currentDemo = (currentDemo + 1) % numDemos;
break;
}
}
accelShutdown(drv);
printf("Demo complete.\n");
return 0;
}
// ============================================================
// packColor16
// ============================================================
//
// Packs an RGB triplet into 16-bit 565 format.
// This is a simplification -- a real integration would use the
// display's actual pixel format. For the demo, 565 is fine since
// that's what most 16-bit VESA modes use.
static uint32_t packColor16(uint8_t r, uint8_t g, uint8_t b) {
return ((uint32_t)(r >> 3) << 11)
| ((uint32_t)(g >> 2) << 5)
| ((uint32_t)(b >> 3));
}
// ============================================================
// readKey
// ============================================================
//
// Blocking read of one keypress via BIOS INT 16h.
// Returns the scan code.
static uint8_t readKey(void) {
__dpmi_regs r;
memset(&r, 0, sizeof(r));
r.h.ah = 0x10; // read extended keystroke
__dpmi_int(0x16, &r);
return r.h.ah; // scan code
}
// ============================================================
// softFillRect
// ============================================================
//
// Software rectangle fill for benchmark comparison. Writes
// directly to the LFB (intentionally slow due to PCI bus writes).
static void softFillRect(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, uint32_t color) {
uint8_t *fb = drv->mode.framebuffer;
int32_t pitch = drv->mode.pitch;
int32_t bpp = (drv->mode.bpp + 7) / 8;
for (int32_t row = 0; row < h; row++) {
uint8_t *dst = fb + (y + row) * pitch + x * bpp;
if (bpp == 2) {
uint16_t *dst16 = (uint16_t *)dst;
for (int32_t col = 0; col < w; col++) {
dst16[col] = (uint16_t)color;
}
} else if (bpp == 4) {
uint32_t *dst32 = (uint32_t *)dst;
for (int32_t col = 0; col < w; col++) {
dst32[col] = color;
}
} else {
for (int32_t col = 0; col < w; col++) {
dst[col] = (uint8_t)color;
}
}
}
}

843
matroxMga.c Normal file
View file

@ -0,0 +1,843 @@
// matroxMga.c -- Matrox Millennium/Mystique/G200/G400 accelerated video driver
//
// Supports the Matrox MGA family: MGA2064W (Millennium), MGA1064SG
// (Mystique), G100, G200, and G400/G450. The Matrox 2D drawing engine
// is widely regarded as the best 2D accelerator of the PCI/AGP era,
// with features including:
// - Solid and pattern rectangle fill
// - Screen-to-screen BitBLT (very fast, pipelined)
// - CPU-to-screen blit with color expansion (ILOAD)
// - Bresenham line draw (antialiased on G200+)
// - Trapezoid fill
// - Hardware clip rectangle
// - 64x64 three-color hardware cursor
//
// Register access:
// The MGA register block is mapped via BAR0 (PCI) or BAR1
// depending on the chip. It's a 16KB MMIO region. The drawing
// engine registers start at offset 0x1C00 within this block.
//
// The drawing engine uses a command-based model: you set up
// parameters (colors, coordinates, dimensions) in the setup
// registers, then write to DWGCTL to start the operation.
// Some operations auto-execute when the last parameter is
// written (e.g., LEN triggers a draw).
//
// FIFO:
// The MGA has a deep command FIFO (64 entries on Millennium).
// The FIFOSTATUS register indicates how many entries are free.
// On G200+, the FIFO is deeper and the STATUS register has
// a busy bit that's more reliable.
#include "accelVid.h"
#include "vgaCommon.h"
#include "pci.h"
#include <pc.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/nearptr.h>
// ============================================================
// Matrox vendor/device IDs
// ============================================================
#define MATROX_VENDOR_ID 0x102B
#define MGA_2064W 0x0519 // Millennium
#define MGA_1064SG 0x051A // Mystique
#define MGA_G100_PCI 0x1000
#define MGA_G100_AGP 0x1001
#define MGA_G200_PCI 0x0521
#define MGA_G200_AGP 0x0520
#define MGA_G400 0x0525
#define MGA_G450 0x2527
static const uint16_t sMatroxDeviceIds[] = {
MATROX_VENDOR_ID, MGA_2064W,
MATROX_VENDOR_ID, MGA_1064SG,
MATROX_VENDOR_ID, MGA_G100_PCI,
MATROX_VENDOR_ID, MGA_G100_AGP,
MATROX_VENDOR_ID, MGA_G200_PCI,
MATROX_VENDOR_ID, MGA_G200_AGP,
MATROX_VENDOR_ID, MGA_G400,
MATROX_VENDOR_ID, MGA_G450,
0, 0
};
// ============================================================
// MGA drawing engine register offsets (from MMIO base)
// ============================================================
// Drawing engine setup registers (0x1C00 - 0x1CFF)
#define MGA_DWGCTL 0x1C00 // drawing control
#define MGA_MACCESS 0x1C04 // memory access control
#define MGA_MCTLWTST 0x1C08 // memory control wait state
#define MGA_ZORG 0x1C0C // Z origin
#define MGA_PAT0 0x1C10 // pattern register 0
#define MGA_PAT1 0x1C14 // pattern register 1
#define MGA_PLNWT 0x1C1C // plane write mask
#define MGA_BCOL 0x1C20 // background color
#define MGA_FCOL 0x1C24 // foreground color
#define MGA_SRC0 0x1C30 // source data 0 (for color expand)
#define MGA_SRC1 0x1C34
#define MGA_SRC2 0x1C38
#define MGA_SRC3 0x1C3C
#define MGA_XYSTRT 0x1C40 // XY start (for lines)
#define MGA_XYEND 0x1C44 // XY end (triggers line draw)
#define MGA_SHIFT 0x1C50
#define MGA_SGN 0x1C58 // sign register
#define MGA_LEN 0x1C5C // number of lines (triggers rect ops)
#define MGA_AR0 0x1C60 // line draw parameter 0
#define MGA_AR1 0x1C64
#define MGA_AR2 0x1C68
#define MGA_AR3 0x1C6C
#define MGA_AR4 0x1C70
#define MGA_AR5 0x1C74
#define MGA_AR6 0x1C78
#define MGA_CXBNDRY 0x1C80 // clip X boundaries (left | right<<16)
#define MGA_FXBNDRY 0x1C84 // fill X boundaries (left | right<<16)
#define MGA_YDSTLEN 0x1C88 // Y dest and length (triggers fill)
#define MGA_PITCH 0x1C8C // destination pitch (in pixels)
#define MGA_YDST 0x1C90 // Y destination
#define MGA_YDSTORG 0x1C94 // Y destination origin (byte offset)
#define MGA_YTOP 0x1C98 // clip Y top
#define MGA_YBOT 0x1C9C // clip Y bottom
#define MGA_CXLEFT 0x1CA0 // clip X left
#define MGA_CXRIGHT 0x1CA4 // clip X right
#define MGA_FXLEFT 0x1CA8 // fill X left
#define MGA_FXRIGHT 0x1CAC // fill X right
#define MGA_XDST 0x1CB0 // X destination
// Status registers (0x1E00 - 0x1EFF)
#define MGA_FIFOSTATUS 0x1E10 // FIFO status
#define MGA_STATUS 0x1E14 // engine status
#define MGA_ICLEAR 0x1E18 // interrupt clear
#define MGA_IEN 0x1E1C // interrupt enable
// Source window (for BitBLT)
#define MGA_SRCORG 0x2CB4 // source origin
// DWGSYNC for synchronization
#define MGA_DWGSYNC 0x2C4C
// ============================================================
// MGA DWGCTL command values
// ============================================================
//
// The DWGCTL register is a 32-bit command word that encodes the
// operation type, drawing options, and raster operation.
// Operation codes (bits 3:0)
#define MGA_OPCOD_LINE_OPEN 0x00 // line (open)
#define MGA_OPCOD_AUTOLINE_OPEN 0x01
#define MGA_OPCOD_LINE_CLOSE 0x02 // line (closed)
#define MGA_OPCOD_AUTOLINE_CLOSE 0x03
#define MGA_OPCOD_TRAP 0x04 // trapezoid fill
#define MGA_OPCOD_TEXTURE 0x05 // texture mapping (G200+)
#define MGA_OPCOD_BITBLT 0x08 // screen-to-screen blit
#define MGA_OPCOD_ILOAD 0x09 // CPU-to-screen (image load)
#define MGA_OPCOD_IDUMP 0x0A // screen-to-CPU
// Drawing options (bits 31:4)
#define MGA_ATYPE_RPL 0x0000 // replace
#define MGA_ATYPE_RSTR 0x0010 // raster
#define MGA_ATYPE_ZI 0x0030 // Z interpolate
#define MGA_ATYPE_BLK 0x0040 // block transfer
#define MGA_ATYPE_I 0x0070 // interpolate
#define MGA_ZMODE_NOZCMP 0x0000 // no Z compare
#define MGA_ZMODE_ZE 0x0200 // Z equal
#define MGA_ZMODE_ZNE 0x0300 // Z not equal
#define MGA_SOLID 0x0800 // solid fill (no pattern)
#define MGA_ARZERO 0x1000 // AR regs are zero (solid fill optimization)
#define MGA_SGNZERO 0x2000 // SGN reg is zero
#define MGA_SHFTZERO 0x4000 // SHIFT reg is zero
#define MGA_BOP_MASK 0x000F0000 // boolean operation (ROP) mask
#define MGA_BOP_SHIFT 16
// Boolean operations (ROP2, bits 19:16)
#define MGA_BOP_CLEAR (0x0 << MGA_BOP_SHIFT)
#define MGA_BOP_NOR (0x1 << MGA_BOP_SHIFT)
#define MGA_BOP_COPYINV (0x3 << MGA_BOP_SHIFT)
#define MGA_BOP_AND (0x8 << MGA_BOP_SHIFT)
#define MGA_BOP_XOR (0x6 << MGA_BOP_SHIFT)
#define MGA_BOP_COPY (0xC << MGA_BOP_SHIFT)
#define MGA_BOP_OR (0xE << MGA_BOP_SHIFT)
#define MGA_BOP_SET (0xF << MGA_BOP_SHIFT)
// Transparency
#define MGA_TRANSC 0x00100000 // transparent color compare
#define MGA_BLTMOD_BFCOL 0x04000000 // BLT mode: foreground color
#define MGA_BLTMOD_BU32RGB 0x0C000000 // BLT mode: 32bpp ILOAD
#define MGA_BLTMOD_BMONOWF 0x08000000 // BLT mode: mono word expand MSB first
// Pattern
#define MGA_PATTERN 0x20000000 // enable pattern
// Linear source
#define MGA_LINEAR 0x80000000 // linear addressing (not XY)
// ============================================================
// MGA MACCESS values
// ============================================================
#define MGA_MACCESS_8BPP 0x00
#define MGA_MACCESS_16BPP 0x01
#define MGA_MACCESS_32BPP 0x02
#define MGA_MACCESS_24BPP 0x03
// ============================================================
// MGA SGN register bits
// ============================================================
#define MGA_SGN_SCANLEFT 0x01 // scan direction left
#define MGA_SGN_SCANRIGHT 0x00 // scan direction right
#define MGA_SGN_SDY_NEG 0x02 // negative Y direction
#define MGA_SGN_SDX_NEG 0x04 // negative X direction
// ============================================================
// MGA STATUS register bits
// ============================================================
#define MGA_STATUS_BUSY 0x00010000 // drawing engine busy
#define MGA_FIFO_FULL_MASK 0x0000007F // FIFO free count
// Maximum wait iterations
#define MGA_MAX_IDLE_WAIT 1000000
// Hardware cursor
#define MGA_HW_CURSOR_SIZE 64
#define MGA_HW_CURSOR_BYTES 1024
// ============================================================
// Private driver state
// ============================================================
typedef struct {
uint32_t lfbPhysAddr;
uint32_t mmioPhysAddr;
uint32_t vramSize;
uint32_t cursorOffset;
int32_t bytesPerPixel;
int32_t screenPitch;
volatile uint32_t *mmio; // mapped MMIO base
DpmiMappingT lfbMapping;
DpmiMappingT mmioMapping;
bool isG200Plus; // G200/G400/G450
} MatroxPrivateT;
// ============================================================
// Prototypes
// ============================================================
static void mgaBitBlt(AccelDriverT *drv, int32_t srcX, int32_t srcY, int32_t dstX, int32_t dstY, int32_t w, int32_t h);
static void mgaColorExpand(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h, uint32_t fg, uint32_t bg);
static bool mgaDetect(AccelDriverT *drv);
static void mgaHostBlit(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h);
static bool mgaInit(AccelDriverT *drv, const AccelModeRequestT *req);
static void mgaLineDraw(AccelDriverT *drv, int32_t x1, int32_t y1, int32_t x2, int32_t y2, uint32_t color);
static void mgaMoveCursor(AccelDriverT *drv, int32_t x, int32_t y);
static void mgaRectFill(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, uint32_t color);
static void mgaRectFillPat(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, const uint8_t *pattern, uint32_t fg, uint32_t bg);
static void mgaSetClip(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h);
static void mgaSetCursor(AccelDriverT *drv, const HwCursorImageT *image);
static void mgaShowCursor(AccelDriverT *drv, bool visible);
static void mgaShutdown(AccelDriverT *drv);
static void mgaWaitFifo(MatroxPrivateT *priv, int32_t entries);
static void mgaWaitIdle(AccelDriverT *drv);
static inline void mgaWrite(MatroxPrivateT *priv, uint32_t reg, uint32_t val) {
priv->mmio[reg / 4] = val;
}
static inline uint32_t mgaRead(MatroxPrivateT *priv, uint32_t reg) {
return priv->mmio[reg / 4];
}
// ============================================================
// Driver instance
// ============================================================
static MatroxPrivateT sMatroxPrivate;
static AccelDriverT sMatroxDriver = {
.name = "Matrox Millennium",
.chipFamily = "matrox",
.caps = 0,
.privData = &sMatroxPrivate,
.detect = mgaDetect,
.init = mgaInit,
.shutdown = mgaShutdown,
.waitIdle = mgaWaitIdle,
.setClip = mgaSetClip,
.rectFill = mgaRectFill,
.rectFillPat = mgaRectFillPat,
.bitBlt = mgaBitBlt,
.hostBlit = mgaHostBlit,
.colorExpand = mgaColorExpand,
.lineDraw = mgaLineDraw,
.setCursor = mgaSetCursor,
.moveCursor = mgaMoveCursor,
.showCursor = mgaShowCursor,
};
// ============================================================
// mgaRegisterDriver
// ============================================================
void mgaRegisterDriver(void) {
accelRegisterDriver(&sMatroxDriver);
}
// ============================================================
// mgaBitBlt
// ============================================================
//
// Screen-to-screen BitBLT using the MGA BITBLT opcode.
// The MGA engine uses pixel coordinates and pitch, with the
// sign register controlling direction for overlapping blits.
static void mgaBitBlt(AccelDriverT *drv, int32_t srcX, int32_t srcY, int32_t dstX, int32_t dstY, int32_t w, int32_t h) {
MatroxPrivateT *priv = (MatroxPrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
// Determine direction
uint32_t sgn = 0;
int32_t startX;
int32_t endX;
int32_t startY = dstY;
uint32_t srcOrg = srcY * priv->screenPitch + srcX * priv->bytesPerPixel;
if (dstX <= srcX) {
// Left to right
startX = dstX;
endX = dstX + w - 1;
} else {
// Right to left
startX = dstX + w - 1;
endX = dstX;
sgn |= MGA_SGN_SCANLEFT;
srcOrg = srcY * priv->screenPitch + (srcX + w - 1) * priv->bytesPerPixel;
}
if (dstY > srcY) {
// Bottom to top
sgn |= MGA_SGN_SDY_NEG;
startY = dstY + h - 1;
srcOrg = (srcY + h - 1) * priv->screenPitch + srcX * priv->bytesPerPixel;
if (sgn & MGA_SGN_SCANLEFT) {
srcOrg = (srcY + h - 1) * priv->screenPitch + (srcX + w - 1) * priv->bytesPerPixel;
}
}
mgaWaitFifo(priv, 8);
mgaWrite(priv, MGA_DWGCTL,
MGA_OPCOD_BITBLT | MGA_ATYPE_BLK | MGA_BOP_COPY | MGA_SHFTZERO);
mgaWrite(priv, MGA_SGN, sgn);
mgaWrite(priv, MGA_PLNWT, 0xFFFFFFFF);
mgaWrite(priv, MGA_SRCORG, srcOrg);
mgaWrite(priv, MGA_AR5, (sgn & MGA_SGN_SDY_NEG) ? -(priv->screenPitch / priv->bytesPerPixel) : (priv->screenPitch / priv->bytesPerPixel));
// Set boundaries and trigger
mgaWrite(priv, MGA_FXBNDRY, ((uint32_t)endX << 16) | (uint32_t)(startX & 0xFFFF));
mgaWrite(priv, MGA_YDSTLEN, ((uint32_t)startY << 16) | (uint32_t)h);
}
// ============================================================
// mgaColorExpand
// ============================================================
//
// CPU-to-screen monochrome color expansion using the MGA ILOAD
// opcode with BLTMOD_BMONOWF. Monochrome bitmap bits are expanded
// to foreground/background colors by the hardware. Data is fed
// as dwords through MGA_SRC0.
static void mgaColorExpand(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h, uint32_t fg, uint32_t bg) {
MatroxPrivateT *priv = (MatroxPrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
int32_t bytesPerRow = (w + 7) / 8;
int32_t dwordsPerRow = (bytesPerRow + 3) / 4;
mgaWaitFifo(priv, 6);
mgaWrite(priv, MGA_DWGCTL,
MGA_OPCOD_ILOAD | MGA_ATYPE_RPL | MGA_BOP_COPY
| MGA_BLTMOD_BMONOWF | MGA_SHFTZERO | MGA_SGNZERO);
mgaWrite(priv, MGA_FCOL, fg);
mgaWrite(priv, MGA_BCOL, bg);
mgaWrite(priv, MGA_PLNWT, 0xFFFFFFFF);
mgaWrite(priv, MGA_FXBNDRY, (uint32_t)dstX | ((uint32_t)(dstX + w) << 16));
mgaWrite(priv, MGA_YDSTLEN, ((uint32_t)dstY << 16) | (uint32_t)h);
// Feed monochrome data row by row
for (int32_t row = 0; row < h; row++) {
const uint8_t *rowPtr = srcBuf + row * srcPitch;
for (int32_t dw = 0; dw < dwordsPerRow; dw++) {
uint32_t val = 0;
int32_t offset = dw * 4;
for (int32_t b = 0; b < 4; b++) {
if (offset + b < bytesPerRow) {
val |= (uint32_t)rowPtr[offset + b] << (b * 8);
}
}
mgaWaitFifo(priv, 1);
mgaWrite(priv, MGA_SRC0, val);
}
}
}
// ============================================================
// mgaDetect
// ============================================================
static bool mgaDetect(AccelDriverT *drv) {
int32_t matchIdx;
if (!pciFindDeviceList(sMatroxDeviceIds, &drv->pciDev, &matchIdx)) {
return false;
}
MatroxPrivateT *priv = (MatroxPrivateT *)drv->privData;
switch (drv->pciDev.deviceId) {
case MGA_2064W:
drv->name = "Matrox Millennium";
priv->isG200Plus = false;
break;
case MGA_1064SG:
drv->name = "Matrox Mystique";
priv->isG200Plus = false;
break;
case MGA_G100_PCI:
case MGA_G100_AGP:
drv->name = "Matrox G100";
priv->isG200Plus = true;
break;
case MGA_G200_PCI:
case MGA_G200_AGP:
drv->name = "Matrox G200";
priv->isG200Plus = true;
break;
case MGA_G400:
drv->name = "Matrox G400";
priv->isG200Plus = true;
break;
case MGA_G450:
drv->name = "Matrox G450";
priv->isG200Plus = true;
break;
default:
drv->name = "Matrox MGA";
priv->isG200Plus = false;
break;
}
return true;
}
// ============================================================
// mgaHostBlit
// ============================================================
//
// CPU-to-screen blit using the MGA ILOAD opcode. Pixel data is
// written from host memory to the framebuffer through the MMIO
// window via MGA_SRC0. Each row is padded to a dword boundary.
static void mgaHostBlit(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h) {
MatroxPrivateT *priv = (MatroxPrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
int32_t bytesPerRow = w * priv->bytesPerPixel;
int32_t dwordsPerRow = (bytesPerRow + 3) / 4;
mgaWaitFifo(priv, 5);
mgaWrite(priv, MGA_DWGCTL,
MGA_OPCOD_ILOAD | MGA_ATYPE_RPL | MGA_BOP_COPY
| MGA_SHFTZERO | MGA_SGNZERO);
mgaWrite(priv, MGA_FCOL, 0xFFFFFFFF);
mgaWrite(priv, MGA_PLNWT, 0xFFFFFFFF);
mgaWrite(priv, MGA_FXBNDRY, (uint32_t)dstX | ((uint32_t)(dstX + w) << 16));
mgaWrite(priv, MGA_YDSTLEN, ((uint32_t)dstY << 16) | (uint32_t)h);
// Feed pixel data row by row
for (int32_t row = 0; row < h; row++) {
const uint8_t *rowPtr = srcBuf + row * srcPitch;
for (int32_t dw = 0; dw < dwordsPerRow; dw++) {
uint32_t val = 0;
int32_t offset = dw * 4;
for (int32_t b = 0; b < 4; b++) {
if (offset + b < bytesPerRow) {
val |= (uint32_t)rowPtr[offset + b] << (b * 8);
}
}
mgaWaitFifo(priv, 1);
mgaWrite(priv, MGA_SRC0, val);
}
}
}
// ============================================================
// mgaInit
// ============================================================
static bool mgaInit(AccelDriverT *drv, const AccelModeRequestT *req) {
MatroxPrivateT *priv = (MatroxPrivateT *)drv->privData;
// BAR layout depends on chip:
// Millennium (2064W): BAR0 = control regs (16KB), BAR1 = framebuffer
// Mystique+: BAR0 = control regs (16KB), BAR1 = framebuffer
uint32_t bar0 = pciRead32(drv->pciDev.bus, drv->pciDev.dev,
drv->pciDev.func, PCI_BAR0);
uint32_t bar1 = pciRead32(drv->pciDev.bus, drv->pciDev.dev,
drv->pciDev.func, PCI_BAR1);
priv->mmioPhysAddr = bar0 & 0xFFFFFFF0;
priv->lfbPhysAddr = bar1 & 0xFFFFFFF0;
// Size the framebuffer BAR
priv->vramSize = pciSizeBar(drv->pciDev.bus, drv->pciDev.dev,
drv->pciDev.func, PCI_BAR1);
// Map MMIO control registers (16KB)
if (!dpmiMapFramebuffer(priv->mmioPhysAddr, 16384, &priv->mmioMapping)) {
return false;
}
priv->mmio = (volatile uint32_t *)priv->mmioMapping.ptr;
// Find and set VESA mode
VesaModeResultT vesa;
if (!vesaFindAndSetMode(req->width, req->height, req->bpp, &vesa)) {
dpmiUnmapFramebuffer(&priv->mmioMapping);
return false;
}
// Map framebuffer
if (!dpmiMapFramebuffer(priv->lfbPhysAddr, priv->vramSize, &priv->lfbMapping)) {
dpmiUnmapFramebuffer(&priv->mmioMapping);
vgaRestoreTextMode();
return false;
}
priv->bytesPerPixel = (vesa.bpp + 7) / 8;
priv->screenPitch = vesa.pitch;
drv->mode.width = vesa.width;
drv->mode.height = vesa.height;
drv->mode.bpp = vesa.bpp;
drv->mode.pitch = vesa.pitch;
drv->mode.framebuffer = priv->lfbMapping.ptr;
drv->mode.vramSize = priv->vramSize;
drv->mode.offscreenBase = vesa.pitch * vesa.height;
// Configure MACCESS for pixel depth
uint32_t maccess;
switch (vesa.bpp) {
case 8: maccess = MGA_MACCESS_8BPP; break;
case 15:
case 16: maccess = MGA_MACCESS_16BPP; break;
case 32: maccess = MGA_MACCESS_32BPP; break;
default: maccess = MGA_MACCESS_16BPP; break;
}
mgaWaitIdle(drv);
mgaWrite(priv, MGA_MACCESS, maccess);
// Set pitch (in pixels)
mgaWrite(priv, MGA_PITCH, vesa.pitch / priv->bytesPerPixel);
// Set YDSTORG to 0 (framebuffer starts at beginning of VRAM)
mgaWrite(priv, MGA_YDSTORG, 0);
// Plane write mask: all bits
mgaWrite(priv, MGA_PLNWT, 0xFFFFFFFF);
// Set up cursor at end of VRAM
priv->cursorOffset = priv->vramSize - MGA_HW_CURSOR_BYTES;
priv->cursorOffset &= ~(MGA_HW_CURSOR_BYTES - 1);
drv->caps = ACAP_RECT_FILL
| ACAP_RECT_FILL_PAT
| ACAP_BITBLT
| ACAP_HOST_BLIT
| ACAP_COLOR_EXPAND
| ACAP_LINE_DRAW
| ACAP_HW_CURSOR
| ACAP_CLIP;
// Full screen clip
mgaSetClip(drv, 0, 0, vesa.width, vesa.height);
return true;
}
// ============================================================
// mgaLineDraw
// ============================================================
//
// Line drawing using the MGA AUTOLINE opcode. The MGA engine
// takes start XY and end XY coordinates directly (no Bresenham
// parameter computation needed on the CPU side).
static void mgaLineDraw(AccelDriverT *drv, int32_t x1, int32_t y1, int32_t x2, int32_t y2, uint32_t color) {
MatroxPrivateT *priv = (MatroxPrivateT *)drv->privData;
mgaWaitFifo(priv, 5);
mgaWrite(priv, MGA_DWGCTL,
MGA_OPCOD_AUTOLINE_CLOSE | MGA_ATYPE_RPL | MGA_SOLID
| MGA_BOP_COPY | MGA_SHFTZERO | MGA_SGNZERO | MGA_ARZERO);
mgaWrite(priv, MGA_FCOL, color);
mgaWrite(priv, MGA_PLNWT, 0xFFFFFFFF);
// Start coordinate
mgaWrite(priv, MGA_XYSTRT, ((uint32_t)(y1 & 0xFFFF) << 16) | (uint32_t)(x1 & 0xFFFF));
// End coordinate (triggers draw)
mgaWrite(priv, MGA_XYEND, ((uint32_t)(y2 & 0xFFFF) << 16) | (uint32_t)(x2 & 0xFFFF));
}
// ============================================================
// mgaMoveCursor
// ============================================================
//
// Matrox cursor position is set via RAMDAC registers.
// On Millennium: TVP3026 RAMDAC external registers.
// On Mystique+: integrated RAMDAC at MMIO offset 0x3C00+.
static void mgaMoveCursor(AccelDriverT *drv, int32_t x, int32_t y) {
MatroxPrivateT *priv = (MatroxPrivateT *)drv->privData;
if (x < 0) { x = 0; }
if (y < 0) { y = 0; }
// Cursor position via DAC registers (Mystique/G200+ integrated DAC)
// CURPOS register at MMIO + 0x3C0C
mgaWrite(priv, 0x3C0C, ((uint32_t)(y & 0xFFF) << 16) | (uint32_t)(x & 0xFFF));
}
// ============================================================
// mgaRectFill
// ============================================================
//
// Solid rectangle fill using the MGA TRAP opcode with the SOLID
// bit set. This is the fastest path for solid fills -- the
// engine fills with the foreground color using the ARZERO and
// SGNZERO hints to skip setup of unused registers.
static void mgaRectFill(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, uint32_t color) {
MatroxPrivateT *priv = (MatroxPrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
mgaWaitFifo(priv, 5);
mgaWrite(priv, MGA_DWGCTL,
MGA_OPCOD_TRAP | MGA_ATYPE_BLK | MGA_SOLID
| MGA_BOP_COPY | MGA_ARZERO | MGA_SGNZERO | MGA_SHFTZERO);
mgaWrite(priv, MGA_FCOL, color);
// Set X boundaries
mgaWrite(priv, MGA_FXBNDRY, ((uint32_t)(x + w) << 16) | (uint32_t)(x & 0xFFFF));
// Set Y destination and length (triggers fill)
mgaWrite(priv, MGA_YDSTLEN, ((uint32_t)(y & 0xFFFF) << 16) | (uint32_t)(h & 0xFFFF));
}
// ============================================================
// mgaRectFillPat
// ============================================================
//
// 8x8 mono pattern fill using the MGA TRAP opcode with the
// MGA_PATTERN bit set. The pattern is 8 bytes (one per row,
// MSB-first), loaded into PAT0 (rows 0-3) and PAT1 (rows 4-7).
// 1-bits use the foreground color, 0-bits use the background.
static void mgaRectFillPat(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, const uint8_t *pattern, uint32_t fg, uint32_t bg) {
MatroxPrivateT *priv = (MatroxPrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
// Pack pattern rows 0-3 into PAT0 and rows 4-7 into PAT1
uint32_t pat0 = (uint32_t)pattern[0]
| ((uint32_t)pattern[1] << 8)
| ((uint32_t)pattern[2] << 16)
| ((uint32_t)pattern[3] << 24);
uint32_t pat1 = (uint32_t)pattern[4]
| ((uint32_t)pattern[5] << 8)
| ((uint32_t)pattern[6] << 16)
| ((uint32_t)pattern[7] << 24);
mgaWaitFifo(priv, 8);
mgaWrite(priv, MGA_DWGCTL,
MGA_OPCOD_TRAP | MGA_ATYPE_RPL | MGA_PATTERN
| MGA_BOP_COPY | MGA_ARZERO | MGA_SGNZERO | MGA_SHFTZERO);
mgaWrite(priv, MGA_FCOL, fg);
mgaWrite(priv, MGA_BCOL, bg);
mgaWrite(priv, MGA_PAT0, pat0);
mgaWrite(priv, MGA_PAT1, pat1);
mgaWrite(priv, MGA_PLNWT, 0xFFFFFFFF);
// Set X boundaries and trigger fill
mgaWrite(priv, MGA_FXBNDRY, ((uint32_t)(x + w) << 16) | (uint32_t)(x & 0xFFFF));
mgaWrite(priv, MGA_YDSTLEN, ((uint32_t)(y & 0xFFFF) << 16) | (uint32_t)(h & 0xFFFF));
}
// ============================================================
// mgaSetClip
// ============================================================
static void mgaSetClip(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h) {
MatroxPrivateT *priv = (MatroxPrivateT *)drv->privData;
mgaWaitFifo(priv, 3);
mgaWrite(priv, MGA_CXBNDRY, ((uint32_t)(x + w - 1) << 16) | (uint32_t)(x & 0xFFFF));
mgaWrite(priv, MGA_YTOP, y * (priv->screenPitch / priv->bytesPerPixel));
mgaWrite(priv, MGA_YBOT, (y + h - 1) * (priv->screenPitch / priv->bytesPerPixel));
}
// ============================================================
// mgaSetCursor
// ============================================================
static void mgaSetCursor(AccelDriverT *drv, const HwCursorImageT *image) {
MatroxPrivateT *priv = (MatroxPrivateT *)drv->privData;
if (!image) {
mgaShowCursor(drv, false);
return;
}
mgaWaitIdle(drv);
uint8_t *cursorMem = drv->mode.framebuffer + priv->cursorOffset;
for (int32_t row = 0; row < MGA_HW_CURSOR_SIZE; row++) {
for (int32_t byte = 0; byte < 8; byte++) {
int32_t srcIdx = row * 8 + byte;
uint8_t andByte;
uint8_t xorByte;
if (row < image->height && byte < (image->width + 7) / 8) {
andByte = image->andMask[srcIdx];
xorByte = image->xorMask[srcIdx];
} else {
andByte = 0xFF;
xorByte = 0x00;
}
cursorMem[row * 16 + byte] = andByte;
cursorMem[row * 16 + byte + 8] = xorByte;
}
}
// Set cursor base address via DAC register
// CURBASE at MMIO + 0x3C04
mgaWrite(priv, 0x3C04, priv->cursorOffset);
}
// ============================================================
// mgaShowCursor
// ============================================================
static void mgaShowCursor(AccelDriverT *drv, bool visible) {
MatroxPrivateT *priv = (MatroxPrivateT *)drv->privData;
// CURCTL at MMIO + 0x3C00
uint32_t curCtl = mgaRead(priv, 0x3C00);
if (visible) {
curCtl |= 0x01; // enable cursor
} else {
curCtl &= ~0x01;
}
mgaWrite(priv, 0x3C00, curCtl);
}
// ============================================================
// mgaShutdown
// ============================================================
static void mgaShutdown(AccelDriverT *drv) {
MatroxPrivateT *priv = (MatroxPrivateT *)drv->privData;
mgaShowCursor(drv, false);
dpmiUnmapFramebuffer(&priv->mmioMapping);
dpmiUnmapFramebuffer(&priv->lfbMapping);
vgaRestoreTextMode();
}
// ============================================================
// mgaWaitFifo
// ============================================================
//
// Wait until the MGA FIFO has enough free entries.
// FIFOSTATUS bits 6:0 indicate the number of free slots.
static void mgaWaitFifo(MatroxPrivateT *priv, int32_t entries) {
for (int32_t i = 0; i < MGA_MAX_IDLE_WAIT; i++) {
uint32_t stat = mgaRead(priv, MGA_FIFOSTATUS);
int32_t free = stat & MGA_FIFO_FULL_MASK;
if (free >= entries) {
return;
}
}
}
// ============================================================
// mgaWaitIdle
// ============================================================
static void mgaWaitIdle(AccelDriverT *drv) {
MatroxPrivateT *priv = (MatroxPrivateT *)drv->privData;
for (int32_t i = 0; i < MGA_MAX_IDLE_WAIT; i++) {
uint32_t stat = mgaRead(priv, MGA_STATUS);
if (!(stat & MGA_STATUS_BUSY)) {
return;
}
}
}

677
nvidia.c Normal file
View file

@ -0,0 +1,677 @@
// nvidia.c -- Nvidia RIVA 128/TNT/TNT2 accelerated video driver
//
// Supports the Nvidia RIVA family: RIVA 128, RIVA 128 ZX, TNT,
// TNT2, TNT2 Ultra, TNT2 M64, and Vanta. These were high-
// performance 2D/3D accelerators of the late 1990s featuring:
// - Solid rectangle fill
// - Screen-to-screen BitBLT
// - Host-to-screen blit (CPU data transfer)
// - Hardware clip rectangle
// - 64x64 two-color hardware cursor via PRAMDAC
//
// Register access:
// The NV architecture uses memory-mapped I/O via BAR0 (16MB
// MMIO register space) and BAR1 (framebuffer). The 2D engine
// is accessed through the FIFO user space at BAR0 + 0x800000,
// which provides subchannel-based access to graphics objects.
//
// Subchannel layout:
// Sub 0 (0x0000): ROP
// Sub 1 (0x2000): Clip
// Sub 2 (0x4000): Pattern
// Sub 3 (0x6000): GdiRectangle (solid fill)
// Sub 4 (0x8000): ScreenScreenBlt
// Sub 5 (0xA000): ImageFromCpu
//
// Each subchannel has methods starting at +0x0100 within
// its range. The PGRAPH_STATUS register at 0x400700 indicates
// engine busy status (0 = idle).
#include "accelVid.h"
#include "vgaCommon.h"
#include "pci.h"
#include <pc.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/nearptr.h>
// ============================================================
// Nvidia vendor/device IDs
// ============================================================
#define NV_VENDOR_ID 0x10DE
#define NV_RIVA_128 0x0018 // RIVA 128
#define NV_RIVA_128_ZX 0x0019 // RIVA 128 ZX
#define NV_TNT 0x0020 // RIVA TNT
#define NV_TNT2 0x0028 // RIVA TNT2
#define NV_TNT2_ULTRA 0x0029 // RIVA TNT2 Ultra
#define NV_TNT2_M64 0x002D // RIVA TNT2 M64
#define NV_VANTA 0x002C // Vanta
static const uint16_t sNvDeviceIds[] = {
NV_VENDOR_ID, NV_RIVA_128,
NV_VENDOR_ID, NV_RIVA_128_ZX,
NV_VENDOR_ID, NV_TNT,
NV_VENDOR_ID, NV_TNT2,
NV_VENDOR_ID, NV_TNT2_ULTRA,
NV_VENDOR_ID, NV_TNT2_M64,
NV_VENDOR_ID, NV_VANTA,
0, 0
};
// ============================================================
// MMIO register offsets (from BAR0)
// ============================================================
// PGRAPH status
#define NV_PGRAPH_STATUS 0x400700 // 0 = idle
// PRAMDAC hardware cursor
#define NV_PRAMDAC_CURSOR_CFG 0x680300 // bit 0 = enable, bits 2:1 = color mode
#define NV_PRAMDAC_CURSOR_POS 0x680320 // cursor X/Y position
// PRAMIN area -- cursor image storage offset in VRAM
// The cursor image lives at the top of VRAM, 1KB for 32x32 or 4KB for 64x64.
// PRAMDAC fetches it from the address configured in NV_PRAMDAC_CURSOR_START.
#define NV_PRAMDAC_CURSOR_START 0x680324 // cursor image VRAM offset
// PFB -- framebuffer config (for reading VRAM size)
#define NV_PFB_BOOT_0 0x100000 // boot config (NV3)
#define NV_PFB_CFG_0 0x100200 // framebuffer config (NV4/NV5)
// ============================================================
// FIFO user space offsets (from BAR0 + 0x800000)
// ============================================================
//
// Subchannel base addresses within the user FIFO area.
#define NV_FIFO_BASE 0x800000
// Subchannel 0: ROP
#define NV_ROP_SUBCHAN 0x0000
#define NV_ROP_ROP 0x0300 // raster operation
// Subchannel 1: Clip
#define NV_CLIP_SUBCHAN 0x2000
#define NV_CLIP_POINT 0x2300 // x | y<<16
#define NV_CLIP_SIZE 0x2304 // w | h<<16
// Subchannel 3: GdiRectangle (solid fill)
#define NV_RECT_SUBCHAN 0x6000
#define NV_RECT_COLOR 0x62FC // fill color
#define NV_RECT_POINT 0x6300 // x | y<<16
#define NV_RECT_SIZE 0x6304 // w | h<<16 (triggers fill)
// Subchannel 4: ScreenScreenBlt
#define NV_BLIT_SUBCHAN 0x8000
#define NV_BLIT_POINT_IN 0x8300 // srcX | srcY<<16
#define NV_BLIT_POINT_OUT 0x8304 // dstX | dstY<<16
#define NV_BLIT_SIZE 0x8308 // w | h<<16
// Subchannel 5: ImageFromCpu
#define NV_IMAGE_SUBCHAN 0xA000
#define NV_IMAGE_POINT 0xA300 // dstX | dstY<<16
#define NV_IMAGE_SIZE_OUT 0xA304 // w | h<<16
#define NV_IMAGE_SIZE_IN 0xA308 // srcW | srcH<<16
#define NV_IMAGE_DATA 0xA400 // color data (dwords)
// ============================================================
// Constants
// ============================================================
#define NV_ROP_COPY 0xCC // dest = src
#define NV_MMIO_SIZE 0x1000000 // 16MB MMIO region
#define NV_MAX_IDLE_WAIT 1000000
#define NV_HW_CURSOR_SIZE 64
#define NV_HW_CURSOR_BYTES (NV_HW_CURSOR_SIZE * NV_HW_CURSOR_SIZE * 2 / 8)
// Cursor config bits
#define NV_CURSOR_ENABLE 0x01
#define NV_CURSOR_MODE_2COLOR 0x00 // 2-color mode (bits 2:1 = 0)
// RIVA 128 (NV3) vs TNT (NV4/NV5) detection
#define NV_ARCH_NV3 3
#define NV_ARCH_NV4 4
// ============================================================
// Private driver state
// ============================================================
typedef struct {
volatile uint32_t *mmio; // mapped MMIO base (BAR0)
volatile uint32_t *fifo; // FIFO user space (BAR0 + 0x800000)
uint32_t mmioPhysAddr;
uint32_t lfbPhysAddr;
uint32_t vramSize;
uint32_t cursorOffset; // cursor image offset in VRAM
int32_t bytesPerPixel;
int32_t screenPitch;
int32_t arch; // NV_ARCH_NV3 or NV_ARCH_NV4
DpmiMappingT mmioMapping;
DpmiMappingT lfbMapping;
} NvPrivateT;
// ============================================================
// Prototypes
// ============================================================
static void nvBitBlt(AccelDriverT *drv, int32_t srcX, int32_t srcY, int32_t dstX, int32_t dstY, int32_t w, int32_t h);
static bool nvDetect(AccelDriverT *drv);
static uint32_t nvDetectVram(NvPrivateT *priv);
static void nvHostBlit(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h);
static bool nvInit(AccelDriverT *drv, const AccelModeRequestT *req);
static void nvMoveCursor(AccelDriverT *drv, int32_t x, int32_t y);
static void nvRectFill(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, uint32_t color);
static void nvSetClip(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h);
static void nvSetCursor(AccelDriverT *drv, const HwCursorImageT *image);
static void nvSetupEngine(NvPrivateT *priv);
static void nvShowCursor(AccelDriverT *drv, bool visible);
static void nvShutdown(AccelDriverT *drv);
static void nvWaitIdle(AccelDriverT *drv);
static void nvWriteFifo(NvPrivateT *priv, uint32_t offset, uint32_t val);
static uint32_t nvReadMmio(NvPrivateT *priv, uint32_t offset);
static void nvWriteMmio(NvPrivateT *priv, uint32_t offset, uint32_t val);
// ============================================================
// Driver instance
// ============================================================
static NvPrivateT sNvPrivate;
static AccelDriverT sNvDriver = {
.name = "Nvidia RIVA",
.chipFamily = "nvidia",
.caps = 0,
.privData = &sNvPrivate,
.detect = nvDetect,
.init = nvInit,
.shutdown = nvShutdown,
.waitIdle = nvWaitIdle,
.setClip = nvSetClip,
.rectFill = nvRectFill,
.rectFillPat = NULL,
.bitBlt = nvBitBlt,
.hostBlit = nvHostBlit,
.colorExpand = NULL,
.lineDraw = NULL,
.setCursor = nvSetCursor,
.moveCursor = nvMoveCursor,
.showCursor = nvShowCursor,
};
// ============================================================
// nvRegisterDriver
// ============================================================
void nvRegisterDriver(void) {
accelRegisterDriver(&sNvDriver);
}
// ============================================================
// nvBitBlt
// ============================================================
//
// Screen-to-screen blit via the ScreenScreenBlt subchannel.
// The NV engine handles overlapping source/destination regions
// internally when the blit direction is set appropriately.
static void nvBitBlt(AccelDriverT *drv, int32_t srcX, int32_t srcY, int32_t dstX, int32_t dstY, int32_t w, int32_t h) {
NvPrivateT *priv = (NvPrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
nvWaitIdle(drv);
nvWriteFifo(priv, NV_BLIT_POINT_IN, (uint32_t)srcX | ((uint32_t)srcY << 16));
nvWriteFifo(priv, NV_BLIT_POINT_OUT, (uint32_t)dstX | ((uint32_t)dstY << 16));
nvWriteFifo(priv, NV_BLIT_SIZE, (uint32_t)w | ((uint32_t)h << 16));
}
// ============================================================
// nvDetect
// ============================================================
static bool nvDetect(AccelDriverT *drv) {
int32_t matchIdx;
if (!pciFindDeviceList(sNvDeviceIds, &drv->pciDev, &matchIdx)) {
return false;
}
switch (drv->pciDev.deviceId) {
case NV_RIVA_128:
drv->name = "Nvidia RIVA 128";
break;
case NV_RIVA_128_ZX:
drv->name = "Nvidia RIVA 128 ZX";
break;
case NV_TNT:
drv->name = "Nvidia RIVA TNT";
break;
case NV_TNT2:
drv->name = "Nvidia RIVA TNT2";
break;
case NV_TNT2_ULTRA:
drv->name = "Nvidia RIVA TNT2 Ultra";
break;
case NV_TNT2_M64:
drv->name = "Nvidia RIVA TNT2 M64";
break;
case NV_VANTA:
drv->name = "Nvidia Vanta";
break;
default:
drv->name = "Nvidia RIVA";
break;
}
return true;
}
// ============================================================
// nvDetectVram
// ============================================================
//
// Read VRAM size from the PFB registers. NV3 (RIVA 128) uses
// PFB_BOOT_0, while NV4/NV5 (TNT/TNT2) use PFB_CFG_0.
static uint32_t nvDetectVram(NvPrivateT *priv) {
if (priv->arch == NV_ARCH_NV3) {
// NV3: PFB_BOOT_0 bits 1:0 encode VRAM size
uint32_t boot0 = nvReadMmio(priv, NV_PFB_BOOT_0);
uint32_t sizeIdx = boot0 & 0x03;
switch (sizeIdx) {
case 0: return 8 * 1024 * 1024;
case 1: return 2 * 1024 * 1024;
case 2: return 4 * 1024 * 1024;
default: return 4 * 1024 * 1024;
}
}
// NV4/NV5: PFB_CFG_0 bits 1:0 encode VRAM size
uint32_t cfg0 = nvReadMmio(priv, NV_PFB_CFG_0);
uint32_t sizeIdx = cfg0 & 0x03;
switch (sizeIdx) {
case 0: return 32 * 1024 * 1024;
case 1: return 4 * 1024 * 1024;
case 2: return 8 * 1024 * 1024;
case 3: return 16 * 1024 * 1024;
default: return 4 * 1024 * 1024;
}
}
// ============================================================
// nvHostBlit
// ============================================================
//
// CPU-to-screen blit via the ImageFromCpu subchannel. Transfers
// pixel data from system memory to VRAM through the FIFO.
static void nvHostBlit(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h) {
NvPrivateT *priv = (NvPrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
int32_t rowBytes = w * priv->bytesPerPixel;
int32_t dwordsPerRow = (rowBytes + 3) / 4;
nvWaitIdle(drv);
// Set up the image transfer
nvWriteFifo(priv, NV_IMAGE_POINT, (uint32_t)dstX | ((uint32_t)dstY << 16));
nvWriteFifo(priv, NV_IMAGE_SIZE_OUT, (uint32_t)w | ((uint32_t)h << 16));
nvWriteFifo(priv, NV_IMAGE_SIZE_IN, (uint32_t)w | ((uint32_t)h << 16));
// Write pixel data row by row
for (int32_t row = 0; row < h; row++) {
const uint8_t *rowPtr = srcBuf + row * srcPitch;
for (int32_t dw = 0; dw < dwordsPerRow; dw++) {
int32_t byteOff = dw * 4;
uint32_t data = 0;
// Pack bytes into a dword (little-endian native order)
for (int32_t b = 0; b < 4; b++) {
if (byteOff + b < rowBytes) {
data |= (uint32_t)rowPtr[byteOff + b] << (b * 8);
}
}
// Write to the color data area; each dword goes to the
// next sequential offset starting at NV_IMAGE_DATA.
nvWriteFifo(priv, NV_IMAGE_DATA + (uint32_t)(dw * 4), data);
}
// Wait for engine between rows to avoid FIFO overflow
nvWaitIdle(drv);
}
}
// ============================================================
// nvInit
// ============================================================
static bool nvInit(AccelDriverT *drv, const AccelModeRequestT *req) {
NvPrivateT *priv = (NvPrivateT *)drv->privData;
memset(priv, 0, sizeof(*priv));
// Determine architecture (NV3 vs NV4/NV5)
if (drv->pciDev.deviceId == NV_RIVA_128 || drv->pciDev.deviceId == NV_RIVA_128_ZX) {
priv->arch = NV_ARCH_NV3;
} else {
priv->arch = NV_ARCH_NV4;
}
// Get BAR0 (MMIO) and BAR1 (framebuffer) addresses
uint32_t bar0 = pciRead32(drv->pciDev.bus, drv->pciDev.dev, drv->pciDev.func, PCI_BAR0);
uint32_t bar1 = pciRead32(drv->pciDev.bus, drv->pciDev.dev, drv->pciDev.func, PCI_BAR1);
priv->mmioPhysAddr = bar0 & 0xFFFFFFF0;
priv->lfbPhysAddr = bar1 & 0xFFFFFFF0;
// Size the framebuffer BAR
uint32_t lfbBarSize = pciSizeBar(drv->pciDev.bus, drv->pciDev.dev, drv->pciDev.func, PCI_BAR1);
// Enable bus mastering and memory space access
uint16_t pciCmd = pciRead16(drv->pciDev.bus, drv->pciDev.dev, drv->pciDev.func, PCI_COMMAND);
pciCmd |= PCI_CMD_MEM_ENABLE | PCI_CMD_BUS_MASTER;
pciWrite16(drv->pciDev.bus, drv->pciDev.dev, drv->pciDev.func, PCI_COMMAND, pciCmd);
// Map MMIO region (BAR0, 16MB)
if (!dpmiMapFramebuffer(priv->mmioPhysAddr, NV_MMIO_SIZE, &priv->mmioMapping)) {
return false;
}
priv->mmio = (volatile uint32_t *)priv->mmioMapping.ptr;
priv->fifo = (volatile uint32_t *)(priv->mmioMapping.ptr + NV_FIFO_BASE);
// Detect VRAM size
priv->vramSize = nvDetectVram(priv);
// Use whichever is smaller: the BAR size or detected VRAM
if (lfbBarSize < priv->vramSize) {
priv->vramSize = lfbBarSize;
}
// Set VESA mode
VesaModeResultT vesa;
if (!vesaFindAndSetMode(req->width, req->height, req->bpp, &vesa)) {
dpmiUnmapFramebuffer(&priv->mmioMapping);
return false;
}
// Map framebuffer (BAR1)
if (!dpmiMapFramebuffer(priv->lfbPhysAddr, priv->vramSize, &priv->lfbMapping)) {
vgaRestoreTextMode();
dpmiUnmapFramebuffer(&priv->mmioMapping);
return false;
}
priv->bytesPerPixel = (vesa.bpp + 7) / 8;
priv->screenPitch = vesa.pitch;
drv->mode.width = vesa.width;
drv->mode.height = vesa.height;
drv->mode.bpp = vesa.bpp;
drv->mode.pitch = vesa.pitch;
drv->mode.framebuffer = priv->lfbMapping.ptr;
drv->mode.vramSize = priv->vramSize;
drv->mode.offscreenBase = vesa.pitch * vesa.height;
// Reserve space for hardware cursor at end of VRAM
priv->cursorOffset = priv->vramSize - NV_HW_CURSOR_BYTES;
priv->cursorOffset &= ~(uint32_t)(NV_HW_CURSOR_BYTES - 1);
// Initialize the 2D engine
nvSetupEngine(priv);
drv->caps = ACAP_RECT_FILL
| ACAP_BITBLT
| ACAP_HOST_BLIT
| ACAP_HW_CURSOR
| ACAP_CLIP;
// Set full-screen clip
nvSetClip(drv, 0, 0, vesa.width, vesa.height);
nvWaitIdle(drv);
return true;
}
// ============================================================
// nvMoveCursor
// ============================================================
static void nvMoveCursor(AccelDriverT *drv, int32_t x, int32_t y) {
NvPrivateT *priv = (NvPrivateT *)drv->privData;
// PRAMDAC cursor position: bits 15:0 = X, bits 31:16 = Y
// Negative values are handled by clamping to 0; the cursor
// offset register could be used for sub-pixel adjustment but
// that is not needed for typical use.
if (x < 0) {
x = 0;
}
if (y < 0) {
y = 0;
}
nvWriteMmio(priv, NV_PRAMDAC_CURSOR_POS, (uint32_t)x | ((uint32_t)y << 16));
}
// ============================================================
// nvReadMmio / nvWriteMmio
// ============================================================
//
// Direct MMIO register access via BAR0.
static uint32_t nvReadMmio(NvPrivateT *priv, uint32_t offset) {
return priv->mmio[offset / 4];
}
static void nvWriteMmio(NvPrivateT *priv, uint32_t offset, uint32_t val) {
priv->mmio[offset / 4] = val;
}
// ============================================================
// nvRectFill
// ============================================================
//
// Solid rectangle fill via the GdiRectangle subchannel.
static void nvRectFill(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, uint32_t color) {
NvPrivateT *priv = (NvPrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
nvWaitIdle(drv);
nvWriteFifo(priv, NV_RECT_COLOR, color);
nvWriteFifo(priv, NV_RECT_POINT, (uint32_t)x | ((uint32_t)y << 16));
nvWriteFifo(priv, NV_RECT_SIZE, (uint32_t)w | ((uint32_t)h << 16));
}
// ============================================================
// nvSetClip
// ============================================================
//
// Set the hardware clip rectangle via the Clip subchannel.
static void nvSetClip(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h) {
NvPrivateT *priv = (NvPrivateT *)drv->privData;
nvWaitIdle(drv);
nvWriteFifo(priv, NV_CLIP_POINT, (uint32_t)x | ((uint32_t)y << 16));
nvWriteFifo(priv, NV_CLIP_SIZE, (uint32_t)w | ((uint32_t)h << 16));
}
// ============================================================
// nvSetCursor
// ============================================================
//
// Upload a cursor image to VRAM and configure the PRAMDAC
// to display it. The NV hardware cursor is 64x64, 2 bits per
// pixel, stored in VRAM at the offset configured in
// NV_PRAMDAC_CURSOR_START.
//
// 2bpp encoding:
// 00 = cursor color 0 (background)
// 01 = cursor color 1 (foreground)
// 10 = transparent
// 11 = inverted
static void nvSetCursor(AccelDriverT *drv, const HwCursorImageT *image) {
NvPrivateT *priv = (NvPrivateT *)drv->privData;
if (!image) {
nvShowCursor(drv, false);
return;
}
nvWaitIdle(drv);
// Write cursor image to VRAM at the reserved offset
uint8_t *cursorMem = drv->mode.framebuffer + priv->cursorOffset;
for (int32_t row = 0; row < NV_HW_CURSOR_SIZE; row++) {
for (int32_t byteIdx = 0; byteIdx < 16; byteIdx++) {
uint8_t val = 0xAA; // all transparent (10 pattern)
if (row < image->height && byteIdx < (image->width + 3) / 4) {
int32_t bitOff = byteIdx * 4;
uint8_t andBits = 0;
uint8_t xorBits = 0;
if (bitOff / 8 < (image->width + 7) / 8) {
andBits = image->andMask[row * 8 + bitOff / 8];
xorBits = image->xorMask[row * 8 + bitOff / 8];
}
// Pack 4 pixels into one byte (2 bits each)
val = 0;
for (int32_t px = 0; px < 4; px++) {
int32_t srcBit = (bitOff + px) % 8;
uint8_t andBit = (andBits >> (7 - srcBit)) & 1;
uint8_t xorBit = (xorBits >> (7 - srcBit)) & 1;
uint8_t pixel;
if (andBit && !xorBit) {
pixel = 0x02; // transparent
} else if (andBit && xorBit) {
pixel = 0x03; // inverted
} else if (!andBit && xorBit) {
pixel = 0x01; // cursor color 1
} else {
pixel = 0x00; // cursor color 0
}
val |= pixel << (6 - px * 2);
}
}
cursorMem[row * 16 + byteIdx] = val;
}
}
// Point the PRAMDAC at the cursor image in VRAM
nvWriteMmio(priv, NV_PRAMDAC_CURSOR_START, priv->cursorOffset);
}
// ============================================================
// nvSetupEngine
// ============================================================
//
// Initialize the 2D acceleration engine. Sets the ROP to copy
// mode and prepares the FIFO subchannels for use.
static void nvSetupEngine(NvPrivateT *priv) {
// Set ROP to copy
nvWriteFifo(priv, NV_ROP_ROP, NV_ROP_COPY);
}
// ============================================================
// nvShowCursor
// ============================================================
static void nvShowCursor(AccelDriverT *drv, bool visible) {
NvPrivateT *priv = (NvPrivateT *)drv->privData;
uint32_t cfg = nvReadMmio(priv, NV_PRAMDAC_CURSOR_CFG);
if (visible) {
cfg |= NV_CURSOR_ENABLE;
} else {
cfg &= ~(uint32_t)NV_CURSOR_ENABLE;
}
nvWriteMmio(priv, NV_PRAMDAC_CURSOR_CFG, cfg);
}
// ============================================================
// nvShutdown
// ============================================================
static void nvShutdown(AccelDriverT *drv) {
NvPrivateT *priv = (NvPrivateT *)drv->privData;
nvShowCursor(drv, false);
vgaRestoreTextMode();
dpmiUnmapFramebuffer(&priv->lfbMapping);
dpmiUnmapFramebuffer(&priv->mmioMapping);
}
// ============================================================
// nvWaitIdle
// ============================================================
//
// Wait for the PGRAPH engine to become idle by polling the
// PGRAPH_STATUS register.
static void nvWaitIdle(AccelDriverT *drv) {
NvPrivateT *priv = (NvPrivateT *)drv->privData;
for (int32_t i = 0; i < NV_MAX_IDLE_WAIT; i++) {
if (nvReadMmio(priv, NV_PGRAPH_STATUS) == 0) {
return;
}
}
}
// ============================================================
// nvWriteFifo
// ============================================================
//
// Write a value to the FIFO user space. The offset is relative
// to the FIFO base (BAR0 + 0x800000).
static void nvWriteFifo(NvPrivateT *priv, uint32_t offset, uint32_t val) {
priv->fifo[offset / 4] = val;
}

307
pci.c Normal file
View file

@ -0,0 +1,307 @@
// pci.c -- PCI configuration space access for DOS/DJGPP
//
// Implements PCI mechanism 1 (CONFIG_ADDRESS at 0xCF8, CONFIG_DATA
// at 0xCFC). This is the standard PCI configuration access method
// supported by all PCI-capable chipsets.
//
// How mechanism 1 works:
// 1. Write a 32-bit address to port 0xCF8 with bit 31 set (enable),
// bus/dev/func/register fields encoded in bits 23:0
// 2. Read or write the 32-bit data at port 0xCFC
// 3. For sub-dword access (8/16-bit), read the full dword and
// mask/shift, or write with a read-modify-write
//
// Detection: write 0x80000000 to 0xCF8 and read back. If the value
// matches, mechanism 1 is present. This works because bit 31 is the
// enable bit -- on non-PCI systems, port 0xCF8 is either absent
// (reads back 0xFF) or belongs to a different device.
#include "pci.h"
#include <pc.h>
// PCI configuration mechanism 1 I/O ports
#define PCI_CONFIG_ADDR 0x0CF8
#define PCI_CONFIG_DATA 0x0CFC
// ============================================================
// Prototypes
// ============================================================
uint32_t pciBuildAddress(uint8_t bus, uint8_t dev, uint8_t func, uint8_t reg);
bool pciDetect(void);
int32_t pciEnumerate(PciEnumCallbackT cb, void *userData);
bool pciFindDevice(uint16_t vendorId, uint16_t deviceId, PciDeviceT *dev);
bool pciFindDeviceList(const uint16_t *idPairs, PciDeviceT *dev, int32_t *matchIdx);
uint8_t pciRead8(uint8_t bus, uint8_t dev, uint8_t func, uint8_t reg);
uint16_t pciRead16(uint8_t bus, uint8_t dev, uint8_t func, uint8_t reg);
uint32_t pciRead32(uint8_t bus, uint8_t dev, uint8_t func, uint8_t reg);
void pciWrite8(uint8_t bus, uint8_t dev, uint8_t func, uint8_t reg, uint8_t val);
void pciWrite16(uint8_t bus, uint8_t dev, uint8_t func, uint8_t reg, uint16_t val);
void pciWrite32(uint8_t bus, uint8_t dev, uint8_t func, uint8_t reg, uint32_t val);
// ============================================================
// pciBuildAddress
// ============================================================
//
// Constructs a PCI configuration space address for mechanism 1.
// Format: [31]=enable, [23:16]=bus, [15:11]=device, [10:8]=function,
// [7:2]=register (dword-aligned), [1:0]=0
uint32_t pciBuildAddress(uint8_t bus, uint8_t dev, uint8_t func, uint8_t reg) {
return 0x80000000
| ((uint32_t)bus << 16)
| ((uint32_t)dev << 11)
| ((uint32_t)func << 8)
| ((uint32_t)reg & 0xFC);
}
// ============================================================
// pciDetect
// ============================================================
//
// Checks for PCI mechanism 1 by writing the enable bit to the
// CONFIG_ADDRESS port and reading it back. Saves and restores
// the original port value to avoid disturbing any in-progress
// PCI transaction.
bool pciDetect(void) {
uint32_t saved = inportl(PCI_CONFIG_ADDR);
outportl(PCI_CONFIG_ADDR, 0x80000000);
uint32_t readBack = inportl(PCI_CONFIG_ADDR);
outportl(PCI_CONFIG_ADDR, saved);
return (readBack == 0x80000000);
}
// ============================================================
// pciEnumerate
// ============================================================
//
// Scans all bus/device/function combinations for present devices.
// A device is present if its vendor ID is not 0xFFFF. Multi-function
// devices are detected by checking bit 7 of the header type register
// on function 0; single-function devices only probe function 0.
int32_t pciEnumerate(PciEnumCallbackT cb, void *userData) {
int32_t count = 0;
for (int32_t bus = 0; bus < PCI_MAX_BUS; bus++) {
for (int32_t dev = 0; dev < PCI_MAX_DEV; dev++) {
uint16_t vendor0 = pciRead16(bus, dev, 0, PCI_VENDOR_ID);
if (vendor0 == 0xFFFF) {
continue;
}
// Check if multi-function device
uint8_t headerType = pciRead8(bus, dev, 0, PCI_HEADER_TYPE);
int32_t maxFunc = (headerType & 0x80) ? PCI_MAX_FUNC : 1;
for (int32_t func = 0; func < maxFunc; func++) {
uint16_t vendorId = pciRead16(bus, dev, func, PCI_VENDOR_ID);
if (vendorId == 0xFFFF) {
continue;
}
PciDeviceT device;
device.bus = bus;
device.dev = dev;
device.func = func;
device.vendorId = vendorId;
device.deviceId = pciRead16(bus, dev, func, PCI_DEVICE_ID);
device.revision = pciRead8(bus, dev, func, PCI_REVISION_ID);
device.baseClass = pciRead8(bus, dev, func, PCI_BASE_CLASS);
device.subClass = pciRead8(bus, dev, func, PCI_SUBCLASS);
for (int32_t i = 0; i < 6; i++) {
device.bar[i] = pciRead32(bus, dev, func, PCI_BAR0 + i * 4);
}
count++;
if (cb && cb(&device, userData)) {
return count;
}
}
}
}
return count;
}
// ============================================================
// pciFindDevice
// ============================================================
bool pciFindDevice(uint16_t vendorId, uint16_t deviceId, PciDeviceT *dev) {
for (int32_t bus = 0; bus < PCI_MAX_BUS; bus++) {
for (int32_t d = 0; d < PCI_MAX_DEV; d++) {
uint16_t vendor0 = pciRead16(bus, d, 0, PCI_VENDOR_ID);
if (vendor0 == 0xFFFF) {
continue;
}
uint8_t headerType = pciRead8(bus, d, 0, PCI_HEADER_TYPE);
int32_t maxFunc = (headerType & 0x80) ? PCI_MAX_FUNC : 1;
for (int32_t func = 0; func < maxFunc; func++) {
uint16_t vid = pciRead16(bus, d, func, PCI_VENDOR_ID);
uint16_t did = pciRead16(bus, d, func, PCI_DEVICE_ID);
if (vid == vendorId && did == deviceId) {
dev->bus = bus;
dev->dev = d;
dev->func = func;
dev->vendorId = vid;
dev->deviceId = did;
dev->revision = pciRead8(bus, d, func, PCI_REVISION_ID);
dev->baseClass = pciRead8(bus, d, func, PCI_BASE_CLASS);
dev->subClass = pciRead8(bus, d, func, PCI_SUBCLASS);
for (int32_t i = 0; i < 6; i++) {
dev->bar[i] = pciRead32(bus, d, func, PCI_BAR0 + i * 4);
}
return true;
}
}
}
}
return false;
}
// ============================================================
// pciFindDeviceList
// ============================================================
//
// Searches for the first PCI device matching any vendor/device pair
// in the given list. The list is an array of uint16_t pairs:
// { vendor1, device1, vendor2, device2, ..., 0, 0 }
// On match, fills dev and sets matchIdx to the pair index (0-based).
bool pciFindDeviceList(const uint16_t *idPairs, PciDeviceT *dev, int32_t *matchIdx) {
for (int32_t bus = 0; bus < PCI_MAX_BUS; bus++) {
for (int32_t d = 0; d < PCI_MAX_DEV; d++) {
uint16_t vendor0 = pciRead16(bus, d, 0, PCI_VENDOR_ID);
if (vendor0 == 0xFFFF) {
continue;
}
uint8_t headerType = pciRead8(bus, d, 0, PCI_HEADER_TYPE);
int32_t maxFunc = (headerType & 0x80) ? PCI_MAX_FUNC : 1;
for (int32_t func = 0; func < maxFunc; func++) {
uint16_t vid = pciRead16(bus, d, func, PCI_VENDOR_ID);
uint16_t did = pciRead16(bus, d, func, PCI_DEVICE_ID);
if (vid == 0xFFFF) {
continue;
}
for (int32_t idx = 0; idPairs[idx * 2] != 0; idx++) {
if (vid == idPairs[idx * 2] && did == idPairs[idx * 2 + 1]) {
dev->bus = bus;
dev->dev = d;
dev->func = func;
dev->vendorId = vid;
dev->deviceId = did;
dev->revision = pciRead8(bus, d, func, PCI_REVISION_ID);
dev->baseClass = pciRead8(bus, d, func, PCI_BASE_CLASS);
dev->subClass = pciRead8(bus, d, func, PCI_SUBCLASS);
for (int32_t i = 0; i < 6; i++) {
dev->bar[i] = pciRead32(bus, d, func, PCI_BAR0 + i * 4);
}
if (matchIdx) {
*matchIdx = idx;
}
return true;
}
}
}
}
}
return false;
}
// ============================================================
// pciRead8
// ============================================================
uint8_t pciRead8(uint8_t bus, uint8_t dev, uint8_t func, uint8_t reg) {
outportl(PCI_CONFIG_ADDR, pciBuildAddress(bus, dev, func, reg));
uint32_t dword = inportl(PCI_CONFIG_DATA);
return (dword >> ((reg & 3) * 8)) & 0xFF;
}
// ============================================================
// pciRead16
// ============================================================
uint16_t pciRead16(uint8_t bus, uint8_t dev, uint8_t func, uint8_t reg) {
outportl(PCI_CONFIG_ADDR, pciBuildAddress(bus, dev, func, reg));
uint32_t dword = inportl(PCI_CONFIG_DATA);
return (dword >> ((reg & 2) * 8)) & 0xFFFF;
}
// ============================================================
// pciRead32
// ============================================================
uint32_t pciRead32(uint8_t bus, uint8_t dev, uint8_t func, uint8_t reg) {
outportl(PCI_CONFIG_ADDR, pciBuildAddress(bus, dev, func, reg));
return inportl(PCI_CONFIG_DATA);
}
// ============================================================
// pciWrite8
// ============================================================
void pciWrite8(uint8_t bus, uint8_t dev, uint8_t func, uint8_t reg, uint8_t val) {
outportl(PCI_CONFIG_ADDR, pciBuildAddress(bus, dev, func, reg));
uint32_t dword = inportl(PCI_CONFIG_DATA);
int32_t shift = (reg & 3) * 8;
dword = (dword & ~(0xFF << shift)) | ((uint32_t)val << shift);
outportl(PCI_CONFIG_DATA, dword);
}
// ============================================================
// pciWrite16
// ============================================================
void pciWrite16(uint8_t bus, uint8_t dev, uint8_t func, uint8_t reg, uint16_t val) {
outportl(PCI_CONFIG_ADDR, pciBuildAddress(bus, dev, func, reg));
uint32_t dword = inportl(PCI_CONFIG_DATA);
int32_t shift = (reg & 2) * 8;
dword = (dword & ~(0xFFFF << shift)) | ((uint32_t)val << shift);
outportl(PCI_CONFIG_DATA, dword);
}
// ============================================================
// pciWrite32
// ============================================================
void pciWrite32(uint8_t bus, uint8_t dev, uint8_t func, uint8_t reg, uint32_t val) {
outportl(PCI_CONFIG_ADDR, pciBuildAddress(bus, dev, func, reg));
outportl(PCI_CONFIG_DATA, val);
}

98
pci.h Normal file
View file

@ -0,0 +1,98 @@
// pci.h -- PCI configuration space access for DOS/DJGPP
//
// Provides functions to read/write PCI configuration registers and
// enumerate devices on the PCI bus. Uses the standard mechanism 1
// (I/O ports 0xCF8/0xCFC) which is supported by all PCI-capable
// systems from 1993 onward.
//
// All functions operate synchronously via inportl/outportl. No BIOS
// calls (INT 1Ah) are used because mechanism 1 is faster, simpler,
// and doesn't require a DPMI real-mode callback.
#ifndef PCI_H
#define PCI_H
#include <stdint.h>
#include <stdbool.h>
// PCI configuration space register offsets (common header)
#define PCI_VENDOR_ID 0x00
#define PCI_DEVICE_ID 0x02
#define PCI_COMMAND 0x04
#define PCI_STATUS 0x06
#define PCI_REVISION_ID 0x08
#define PCI_CLASS_CODE 0x09
#define PCI_SUBCLASS 0x0A
#define PCI_BASE_CLASS 0x0B
#define PCI_HEADER_TYPE 0x0E
#define PCI_BAR0 0x10
#define PCI_BAR1 0x14
#define PCI_BAR2 0x18
#define PCI_BAR3 0x1C
#define PCI_BAR4 0x20
#define PCI_BAR5 0x24
#define PCI_SUBSYS_VENDOR 0x2C
#define PCI_SUBSYS_ID 0x2E
// PCI command register bits
#define PCI_CMD_IO_ENABLE 0x0001
#define PCI_CMD_MEM_ENABLE 0x0002
#define PCI_CMD_BUS_MASTER 0x0004
// PCI base class for display controllers
#define PCI_CLASS_DISPLAY 0x03
// Maximum PCI bus/device/function values
#define PCI_MAX_BUS 256
#define PCI_MAX_DEV 32
#define PCI_MAX_FUNC 8
// PCI device descriptor returned by enumeration
typedef struct {
uint8_t bus;
uint8_t dev;
uint8_t func;
uint16_t vendorId;
uint16_t deviceId;
uint8_t revision;
uint8_t baseClass;
uint8_t subClass;
uint32_t bar[6];
} PciDeviceT;
// Callback for pciEnumerate(). Return true to stop enumeration.
typedef bool (*PciEnumCallbackT)(const PciDeviceT *device, void *userData);
// ============================================================
// Prototypes
// ============================================================
// Build a CONFIG_ADDRESS dword for the given bus/dev/func/register.
uint32_t pciBuildAddress(uint8_t bus, uint8_t dev, uint8_t func, uint8_t reg);
// Check whether PCI mechanism 1 is available.
bool pciDetect(void);
// Enumerate all PCI devices. Calls cb for each device found.
// Stops early if cb returns true. Returns the number of devices found.
int32_t pciEnumerate(PciEnumCallbackT cb, void *userData);
// Find the first PCI device matching vendorId/deviceId.
// Returns true if found (and fills out dev), false if not.
bool pciFindDevice(uint16_t vendorId, uint16_t deviceId, PciDeviceT *dev);
// Find the first PCI device matching any of the given vendor/device
// pairs. The list is terminated by a {0, 0} entry. Returns true if
// found (and fills out dev and matchIdx), false if not.
bool pciFindDeviceList(const uint16_t *idPairs, PciDeviceT *dev, int32_t *matchIdx);
// Read an 8/16/32-bit value from PCI configuration space.
uint8_t pciRead8(uint8_t bus, uint8_t dev, uint8_t func, uint8_t reg);
uint16_t pciRead16(uint8_t bus, uint8_t dev, uint8_t func, uint8_t reg);
uint32_t pciRead32(uint8_t bus, uint8_t dev, uint8_t func, uint8_t reg);
// Write an 8/16/32-bit value to PCI configuration space.
void pciWrite8(uint8_t bus, uint8_t dev, uint8_t func, uint8_t reg, uint8_t val);
void pciWrite16(uint8_t bus, uint8_t dev, uint8_t func, uint8_t reg, uint16_t val);
void pciWrite32(uint8_t bus, uint8_t dev, uint8_t func, uint8_t reg, uint32_t val);
#endif // PCI_H

1216
s3Trio.c Normal file

File diff suppressed because it is too large Load diff

561
sis.c Normal file
View file

@ -0,0 +1,561 @@
// sis.c -- SiS 6326/300/305/315/330 accelerated video driver
//
// Supports the SiS 6326, 300, 305, 315, and 330 integrated graphics
// chipsets. These share a similar 2D engine interface based on a
// queue-based command submission model:
// - Hardware rectangle fill
// - Screen-to-screen BitBLT
// - CPU-to-screen blit (host blit via data port)
// - Hardware clip rectangle
// - 64x64 hardware cursor
//
// Register access:
// BAR0 maps the linear framebuffer.
// BAR1 maps 128KB of MMIO registers. The 2D engine registers
// live at offsets 0x8200-0x8244 within this block. Host data
// is written to the MMIO data port at offset 0x8300.
//
// The 2D engine uses a command register at 0x822C to specify the
// operation type and ROP, then a fire register at 0x8230 to trigger
// execution. Engine status is polled at 0x8244.
#include "accelVid.h"
#include "vgaCommon.h"
#include "pci.h"
#include <pc.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// ============================================================
// SiS vendor/device IDs
// ============================================================
#define SIS_VENDOR_ID 0x1039
#define SIS_6326 0x6326
#define SIS_300 0x0300
#define SIS_305 0x0305
#define SIS_315 0x0315
#define SIS_330 0x0330
static const uint16_t sSisDeviceIds[] = {
SIS_VENDOR_ID, SIS_6326,
SIS_VENDOR_ID, SIS_300,
SIS_VENDOR_ID, SIS_305,
SIS_VENDOR_ID, SIS_315,
SIS_VENDOR_ID, SIS_330,
0, 0
};
// ============================================================
// 2D engine register offsets (from MMIO base)
// ============================================================
#define SIS_SRC_ADDR 0x8200 // source address (for blit)
#define SIS_SRC_PITCH 0x8204 // source pitch
#define SIS_SRC_YX 0x8208 // src Y<<16 | X
#define SIS_DST_YX 0x820C // dst Y<<16 | X
#define SIS_RECT_WH 0x8210 // width<<16 | height
#define SIS_FG_COLOR 0x8214 // foreground color
#define SIS_BG_COLOR 0x8218 // background color
#define SIS_MONO_PAT0 0x821C // mono pattern 0
#define SIS_MONO_PAT1 0x8220 // mono pattern 1
#define SIS_CLIP_LT 0x8224 // clip left<<16 | top
#define SIS_CLIP_RB 0x8228 // clip right<<16 | bottom
#define SIS_CMD 0x822C // command register
#define SIS_FIRE 0x8230 // fire trigger
#define SIS_LINE_PARAMS 0x8234 // line parameters
#define SIS_DST_ADDR 0x8238 // destination address
#define SIS_SRC_DST_PITCH 0x823C // src/dst pitch combined
#define SIS_AGP_BASE 0x8240 // AGP base (unused)
// ============================================================
// Engine status register
// ============================================================
#define SIS_ENGINE_STATUS 0x8244 // bit 0 = queues empty, bit 1 = idle
#define SIS_STATUS_QUEUE_EMPTY 0x01
#define SIS_STATUS_ENGINE_IDLE 0x02
#define SIS_STATUS_ALL_IDLE (SIS_STATUS_QUEUE_EMPTY | SIS_STATUS_ENGINE_IDLE)
// ============================================================
// Host data port
// ============================================================
#define SIS_HOST_DATA 0x8300 // write pixel data here as dwords
// ============================================================
// Command register encoding
// ============================================================
// Bits 7:0 = ROP
#define SIS_ROP_COPY 0xCC
#define SIS_ROP_PAT_COPY 0xF0
// Bit 8 = X direction
#define SIS_CMD_XDIR_RIGHT (1 << 8)
// Bit 9 = Y direction
#define SIS_CMD_YDIR_DOWN (1 << 9)
// Bits 13:10 = command type
#define SIS_CMD_BITBLT 0x0000
#define SIS_CMD_COLOREXP 0x0400
#define SIS_CMD_LINEDRAW 0x0800
#define SIS_CMD_TRAPEZOID 0x0C00
// Bit 14 = pattern enable
#define SIS_CMD_PAT_ENABLE (1 << 14)
// Bit 16 = clipping enable
#define SIS_CMD_CLIP_ENABLE (1 << 16)
// Bit 24 = source is mono
#define SIS_CMD_SRC_MONO (1 << 24)
// ============================================================
// Hardware cursor registers
// ============================================================
#define SIS_CURSOR_ENABLE 0x8500 // bit 0 = enable
#define SIS_CURSOR_X 0x8504 // cursor X position
#define SIS_CURSOR_Y 0x8508 // cursor Y position
#define SIS_CURSOR_ADDR 0x850C // cursor VRAM byte offset
// ============================================================
// Misc constants
// ============================================================
#define SIS_MMIO_SIZE 131072 // BAR1: 128KB MMIO
#define SIS_MAX_IDLE_WAIT 1000000
#define SIS_HW_CURSOR_SIZE 64
// ============================================================
// Private driver state
// ============================================================
typedef struct {
uint32_t lfbPhysAddr;
uint32_t mmioPhysAddr;
uint32_t vramSize;
int32_t bytesPerPixel;
int32_t screenPitch;
volatile uint32_t *mmio;
DpmiMappingT mmioMapping;
DpmiMappingT lfbMapping;
} SisPrivateT;
// ============================================================
// Prototypes
// ============================================================
static void sisBitBlt(AccelDriverT *drv, int32_t srcX, int32_t srcY, int32_t dstX, int32_t dstY, int32_t w, int32_t h);
static bool sisDetect(AccelDriverT *drv);
static void sisHostBlit(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h);
static bool sisInit(AccelDriverT *drv, const AccelModeRequestT *req);
static void sisMoveCursor(AccelDriverT *drv, int32_t x, int32_t y);
static void sisRectFill(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, uint32_t color);
static void sisSetClip(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h);
static void sisSetCursor(AccelDriverT *drv, const HwCursorImageT *image);
static void sisShowCursor(AccelDriverT *drv, bool visible);
static void sisShutdown(AccelDriverT *drv);
static void sisWaitIdle(AccelDriverT *drv);
static inline void sisWrite(SisPrivateT *priv, uint32_t reg, uint32_t val) {
priv->mmio[reg / 4] = val;
}
static inline uint32_t sisRead(SisPrivateT *priv, uint32_t reg) {
return priv->mmio[reg / 4];
}
// ============================================================
// Driver instance
// ============================================================
static SisPrivateT sSisPrivate;
static AccelDriverT sSisDriver = {
.name = "SiS 6326",
.chipFamily = "sis",
.caps = 0,
.privData = &sSisPrivate,
.detect = sisDetect,
.init = sisInit,
.shutdown = sisShutdown,
.waitIdle = sisWaitIdle,
.setClip = sisSetClip,
.rectFill = sisRectFill,
.rectFillPat = NULL,
.bitBlt = sisBitBlt,
.hostBlit = sisHostBlit,
.colorExpand = NULL,
.lineDraw = NULL,
.setCursor = sisSetCursor,
.moveCursor = sisMoveCursor,
.showCursor = sisShowCursor,
};
// ============================================================
// sisRegisterDriver
// ============================================================
void sisRegisterDriver(void) {
accelRegisterDriver(&sSisDriver);
}
// ============================================================
// sisBitBlt
// ============================================================
//
// Screen-to-screen BitBLT. Handles overlapping regions by choosing
// the correct X/Y direction based on source and destination positions.
static void sisBitBlt(AccelDriverT *drv, int32_t srcX, int32_t srcY, int32_t dstX, int32_t dstY, int32_t w, int32_t h) {
SisPrivateT *priv = (SisPrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
sisWaitIdle(drv);
// Determine blit direction for overlapping regions
uint32_t cmd = SIS_CMD_BITBLT | SIS_ROP_COPY | SIS_CMD_CLIP_ENABLE;
int32_t sx = srcX;
int32_t sy = srcY;
int32_t dx = dstX;
int32_t dy = dstY;
if (dstX <= srcX) {
cmd |= SIS_CMD_XDIR_RIGHT;
} else {
sx += w - 1;
dx += w - 1;
}
if (dstY <= srcY) {
cmd |= SIS_CMD_YDIR_DOWN;
} else {
sy += h - 1;
dy += h - 1;
}
uint32_t pitch = ((uint32_t)priv->screenPitch << 16) | (uint32_t)priv->screenPitch;
sisWrite(priv, SIS_SRC_DST_PITCH, pitch);
sisWrite(priv, SIS_SRC_YX, ((uint32_t)sy << 16) | (uint32_t)sx);
sisWrite(priv, SIS_DST_YX, ((uint32_t)dy << 16) | (uint32_t)dx);
sisWrite(priv, SIS_RECT_WH, ((uint32_t)w << 16) | (uint32_t)h);
sisWrite(priv, SIS_CMD, cmd);
sisWrite(priv, SIS_FIRE, 0);
}
// ============================================================
// sisDetect
// ============================================================
static bool sisDetect(AccelDriverT *drv) {
int32_t matchIdx;
if (!pciFindDeviceList(sSisDeviceIds, &drv->pciDev, &matchIdx)) {
return false;
}
switch (drv->pciDev.deviceId) {
case SIS_6326:
drv->name = "SiS 6326";
break;
case SIS_300:
drv->name = "SiS 300";
break;
case SIS_305:
drv->name = "SiS 305";
break;
case SIS_315:
drv->name = "SiS 315";
break;
case SIS_330:
drv->name = "SiS 330";
break;
default:
drv->name = "SiS 6326/3xx";
break;
}
return true;
}
// ============================================================
// sisHostBlit
// ============================================================
//
// CPU-to-screen blit. Issues a BitBLT command, then feeds pixel data
// as dwords through the MMIO host data port at offset 0x8300.
static void sisHostBlit(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h) {
SisPrivateT *priv = (SisPrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
int32_t bytesPerRow = w * priv->bytesPerPixel;
int32_t dwordsPerRow = (bytesPerRow + 3) / 4;
sisWaitIdle(drv);
sisWrite(priv, SIS_SRC_DST_PITCH, (uint32_t)priv->screenPitch);
sisWrite(priv, SIS_DST_YX, ((uint32_t)dstY << 16) | (uint32_t)dstX);
sisWrite(priv, SIS_RECT_WH, ((uint32_t)w << 16) | (uint32_t)h);
sisWrite(priv, SIS_FG_COLOR, 0);
sisWrite(priv, SIS_CMD, SIS_CMD_BITBLT | SIS_ROP_COPY | SIS_CMD_CLIP_ENABLE | SIS_CMD_XDIR_RIGHT | SIS_CMD_YDIR_DOWN | SIS_CMD_SRC_MONO);
sisWrite(priv, SIS_FIRE, 0);
// Feed pixel data row by row through the host data port
for (int32_t row = 0; row < h; row++) {
const uint8_t *rowPtr = srcBuf + row * srcPitch;
for (int32_t dw = 0; dw < dwordsPerRow; dw++) {
uint32_t val = 0;
int32_t offset = dw * 4;
for (int32_t b = 0; b < 4; b++) {
if (offset + b < bytesPerRow) {
val |= (uint32_t)rowPtr[offset + b] << (b * 8);
}
}
sisWrite(priv, SIS_HOST_DATA, val);
}
}
}
// ============================================================
// sisInit
// ============================================================
static bool sisInit(AccelDriverT *drv, const AccelModeRequestT *req) {
SisPrivateT *priv = (SisPrivateT *)drv->privData;
// Read BARs
uint32_t bar0 = pciRead32(drv->pciDev.bus, drv->pciDev.dev,
drv->pciDev.func, PCI_BAR0);
uint32_t bar1 = pciRead32(drv->pciDev.bus, drv->pciDev.dev,
drv->pciDev.func, PCI_BAR1);
priv->lfbPhysAddr = bar0 & 0xFFFFFFF0;
priv->mmioPhysAddr = bar1 & 0xFFFFFFF0;
// Size the framebuffer BAR
priv->vramSize = pciSizeBar(drv->pciDev.bus, drv->pciDev.dev,
drv->pciDev.func, PCI_BAR0);
// Map MMIO control registers (128KB)
if (!dpmiMapFramebuffer(priv->mmioPhysAddr, SIS_MMIO_SIZE, &priv->mmioMapping)) {
return false;
}
priv->mmio = (volatile uint32_t *)priv->mmioMapping.ptr;
// Find and set VESA mode
VesaModeResultT vesa;
if (!vesaFindAndSetMode(req->width, req->height, req->bpp, &vesa)) {
dpmiUnmapFramebuffer(&priv->mmioMapping);
return false;
}
// Map framebuffer
if (!dpmiMapFramebuffer(priv->lfbPhysAddr, priv->vramSize, &priv->lfbMapping)) {
vgaRestoreTextMode();
dpmiUnmapFramebuffer(&priv->mmioMapping);
return false;
}
priv->bytesPerPixel = (vesa.bpp + 7) / 8;
priv->screenPitch = vesa.pitch;
drv->mode.width = vesa.width;
drv->mode.height = vesa.height;
drv->mode.bpp = vesa.bpp;
drv->mode.pitch = vesa.pitch;
drv->mode.framebuffer = priv->lfbMapping.ptr;
drv->mode.vramSize = priv->vramSize;
drv->mode.offscreenBase = vesa.pitch * vesa.height;
// Wait for engine idle before configuring
sisWaitIdle(drv);
drv->caps = ACAP_RECT_FILL
| ACAP_BITBLT
| ACAP_HOST_BLIT
| ACAP_HW_CURSOR
| ACAP_CLIP;
// Full screen clip
sisSetClip(drv, 0, 0, vesa.width, vesa.height);
return true;
}
// ============================================================
// sisMoveCursor
// ============================================================
static void sisMoveCursor(AccelDriverT *drv, int32_t x, int32_t y) {
SisPrivateT *priv = (SisPrivateT *)drv->privData;
if (x < 0) {
x = 0;
}
if (y < 0) {
y = 0;
}
sisWrite(priv, SIS_CURSOR_X, (uint32_t)x);
sisWrite(priv, SIS_CURSOR_Y, (uint32_t)y);
}
// ============================================================
// sisRectFill
// ============================================================
//
// Solid rectangle fill. Sets the foreground color, loads the
// destination coordinates and dimensions, then fires a BitBLT
// command with PAT_COPY ROP and pattern enable to fill with a
// solid color.
static void sisRectFill(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, uint32_t color) {
SisPrivateT *priv = (SisPrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
sisWaitIdle(drv);
sisWrite(priv, SIS_SRC_DST_PITCH, (uint32_t)priv->screenPitch);
sisWrite(priv, SIS_FG_COLOR, color);
sisWrite(priv, SIS_MONO_PAT0, 0xFFFFFFFF);
sisWrite(priv, SIS_MONO_PAT1, 0xFFFFFFFF);
sisWrite(priv, SIS_DST_YX, ((uint32_t)y << 16) | (uint32_t)x);
sisWrite(priv, SIS_RECT_WH, ((uint32_t)w << 16) | (uint32_t)h);
sisWrite(priv, SIS_CMD, SIS_CMD_BITBLT | SIS_ROP_PAT_COPY | SIS_CMD_PAT_ENABLE | SIS_CMD_CLIP_ENABLE | SIS_CMD_XDIR_RIGHT | SIS_CMD_YDIR_DOWN);
sisWrite(priv, SIS_FIRE, 0);
}
// ============================================================
// sisSetClip
// ============================================================
static void sisSetClip(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h) {
SisPrivateT *priv = (SisPrivateT *)drv->privData;
sisWrite(priv, SIS_CLIP_LT, ((uint32_t)x << 16) | (uint32_t)y);
sisWrite(priv, SIS_CLIP_RB, ((uint32_t)(x + w - 1) << 16) | (uint32_t)(y + h - 1));
}
// ============================================================
// sisSetCursor
// ============================================================
//
// Upload a 64x64 hardware cursor image to VRAM. The SiS cursor
// format is 2bpp: AND mask and XOR mask interleaved per row,
// 16 bytes per row (8 AND + 8 XOR). Total size is 1024 bytes.
static void sisSetCursor(AccelDriverT *drv, const HwCursorImageT *image) {
SisPrivateT *priv = (SisPrivateT *)drv->privData;
if (!image) {
sisShowCursor(drv, false);
return;
}
sisWaitIdle(drv);
// Store cursor image at end of VRAM (1KB aligned)
uint32_t cursorOffset = priv->vramSize - 1024;
cursorOffset &= ~0x3FF;
uint8_t *cursorMem = drv->mode.framebuffer + cursorOffset;
// Write AND mask then XOR mask, interleaved per row
for (int32_t row = 0; row < SIS_HW_CURSOR_SIZE; row++) {
for (int32_t byteIdx = 0; byteIdx < 8; byteIdx++) {
int32_t srcIdx = row * 8 + byteIdx;
uint8_t andByte;
uint8_t xorByte;
if (row < image->height && byteIdx < (image->width + 7) / 8) {
andByte = image->andMask[srcIdx];
xorByte = image->xorMask[srcIdx];
} else {
andByte = 0xFF; // transparent
xorByte = 0x00;
}
cursorMem[row * 16 + byteIdx] = andByte;
cursorMem[row * 16 + byteIdx + 8] = xorByte;
}
}
// Set cursor address register
sisWrite(priv, SIS_CURSOR_ADDR, cursorOffset);
}
// ============================================================
// sisShowCursor
// ============================================================
static void sisShowCursor(AccelDriverT *drv, bool visible) {
SisPrivateT *priv = (SisPrivateT *)drv->privData;
sisWrite(priv, SIS_CURSOR_ENABLE, visible ? 1 : 0);
}
// ============================================================
// sisShutdown
// ============================================================
static void sisShutdown(AccelDriverT *drv) {
SisPrivateT *priv = (SisPrivateT *)drv->privData;
sisShowCursor(drv, false);
vgaRestoreTextMode();
dpmiUnmapFramebuffer(&priv->lfbMapping);
dpmiUnmapFramebuffer(&priv->mmioMapping);
priv->mmio = NULL;
}
// ============================================================
// sisWaitIdle
// ============================================================
//
// Wait until the 2D engine is completely idle. Both bit 0 (queues
// empty) and bit 1 (engine idle) of the status register at 0x8244
// must be set.
static void sisWaitIdle(AccelDriverT *drv) {
SisPrivateT *priv = (SisPrivateT *)drv->privData;
for (int32_t i = 0; i < SIS_MAX_IDLE_WAIT; i++) {
uint32_t stat = sisRead(priv, SIS_ENGINE_STATUS);
if ((stat & SIS_STATUS_ALL_IDLE) == SIS_STATUS_ALL_IDLE) {
return;
}
}
}

62
test/86box.cfg Normal file
View file

@ -0,0 +1,62 @@
# 86Box configuration for testing DOS accelerated video drivers
# Target: S3 Trio64 with 2MB VRAM
[Machine]
machine = Award 430FX
cpu_family = intel_pentium_p54c
cpu_speed = 75000000
cpu_multi = 1.5
fpu_type = internal
mem_size = 16
time_sync = local
[Video]
gfxcard = S3 Trio64
voodoo = off
[Video S3 Trio64]
memory = 2
[Input]
mouse_type = ps2
[Sound]
sndcard = Sound Blaster 16
midi_device = none
mpu401 = none
opl_type = nuked
[Floppy and CD-ROM drives]
fdd_01_type = 35_2hd
fdd_02_type = none
cdrom_01_host_drive = 0
cdrom_01_speed = 8
cdrom_01_type = 86B_CD-ROM_1.00
cdrom_01_bus_type = ide
cdrom_01_ide_channel = 1:0
[Hard disks]
hdd_01_parameters = 63, 16, 507, 0, ide, none
hdd_01_fn = dos622.img
hdd_01_ide_channel = 0:0
[Floppy images]
fdd_01_fn =
fdd_02_fn =
[Storage controllers]
hdc = IDE (PCI)
scsi_card = none
[Network]
net_type = none
[Ports (COM & LPT)]
serial1_enabled = 1
serial2_enabled = 0
lpt1_enabled = 1
lpt1_device = none
[Other peripherals]
bugger = off
postcard = off

121
test/README.txt Normal file
View file

@ -0,0 +1,121 @@
86Box Test Environment Setup
============================
This directory contains configuration files for testing the DOS
accelerated video driver demo under 86Box, an x86 hardware emulator.
The 86box.cfg is configured for:
- Intel Pentium 75 MHz (Award 430FX chipset)
- 16 MB RAM
- S3 Trio64 with 2 MB VRAM
- Sound Blaster 16
- IDE hard disk (504 MB image)
- 3.5" 1.44 MB floppy drive
- IDE CD-ROM
Step 1: Install 86Box
---------------------
Download 86Box from https://86box.net/ and extract it to a
directory of your choice. You also need the ROM set -- place
the roms/ folder alongside the 86Box executable.
Step 2: Create a Hard Disk Image
--------------------------------
Use 86Box's built-in disk creation or an external tool:
- In 86Box: Settings > Hard Disks > New
- Create a 504 MB image named "dos622.img"
- Or use: dd if=/dev/zero of=dos622.img bs=1M count=504
The 86box.cfg expects the image at:
dos622.img (in the same directory as 86box.cfg)
Step 3: Install DOS 6.22
-------------------------
1. Copy 86box.cfg to your 86Box working directory (or point
86Box at this directory with the --vmpath flag).
2. Obtain MS-DOS 6.22 floppy images (disk1.img, disk2.img, disk3.img).
3. Start 86Box. Insert disk1.img in the floppy drive:
Settings > Floppy & CD-ROM > Floppy 1 > select disk1.img
4. Boot from floppy (the machine should boot from A: by default).
5. Follow the DOS setup process:
- FDISK: create a primary partition using all space, set active
- Reboot from floppy after FDISK
- FORMAT C: /S
- Run SETUP from the DOS disks
6. Swap floppy images when prompted for disk 2 and disk 3.
7. After setup completes, remove the floppy image and reboot
to verify DOS boots from the hard drive.
Step 4: Install CWSDPMI
-----------------------
The demo is a DJGPP (32-bit protected mode) executable and needs
a DPMI host. Download CWSDPMI from:
http://sandmann.dotster.com/cwsdpmi/
Copy CWSDPMI.EXE to C:\ on the disk image. DJGPP executables
will load it automatically when no other DPMI host is present.
Alternatively, you can use CWSDPR0.EXE for ring-0 operation,
which provides direct hardware access without virtualization
overhead.
Step 5: Copy the Demo
----------------------
Mount the disk image and copy these files to C:\:
demo.exe - the compiled demo executable
cwsdpmi.exe - DPMI host (see Step 4)
You can mount the image on Linux with:
sudo mount -o loop,offset=32256 dos622.img /mnt
Or use mtools:
mcopy -i dos622.img@@32256 demo.exe ::
mcopy -i dos622.img@@32256 cwsdpmi.exe ::
Also copy rundemo.bat for convenience:
mcopy -i dos622.img@@32256 rundemo.bat ::
Step 6: Run the Demo
--------------------
Boot the machine in 86Box and at the C:\> prompt:
C:\>RUNDEMO
Or run directly:
C:\>DEMO 640 480 16
Other supported modes (depending on VRAM):
C:\>DEMO 800 600 16
C:\>DEMO 640 480 32
C:\>DEMO 1024 768 8
Controls:
SPACE - cycle to next demo
B - run benchmark
ESC - exit
Troubleshooting
---------------
- "No supported video hardware found": Verify 86box.cfg has
the S3 Trio64 selected. Check that PCI is enabled.
- Black screen or garbled display: The S3 driver may not support
the requested mode at the configured VRAM size. Try a lower
resolution or color depth.
- "Load error: no DPMI": CWSDPMI.EXE is missing or not in the
PATH. Copy it to the same directory as DEMO.EXE.
- Demo runs but acceleration looks wrong: Some 86Box versions
have incomplete S3 acceleration emulation. Try updating to
the latest 86Box release.

3
test/rundemo.bat Normal file
View file

@ -0,0 +1,3 @@
@ECHO OFF
REM Run the accelerated video driver demo at 640x480 16-bit color
DEMO.EXE 640 480 16

630
trident.c Normal file
View file

@ -0,0 +1,630 @@
// trident.c -- Trident TGUI9440/9660/9680 accelerated video driver
//
// Supports the Trident TGUI family: TGUI9440, TGUI9660, TGUI9680,
// ProVidia 9685, Blade3D, and CyberBlade. These were common PCI
// chips in low-cost 1990s desktop and laptop systems.
//
// The TGUI 2D engine provides:
// - Solid rectangle fill (pattern source)
// - Screen-to-screen BitBLT
// - CPU-to-screen blit (host data transfer)
// - Hardware cursor (64x64)
//
// Register access:
// The GER (Graphics Engine Register) set uses I/O ports in the
// 0x2120-0x214F range. Operations are programmed by writing
// coordinates, dimensions, ROP, and command byte, then the engine
// executes asynchronously. Status is polled at 0x2120.
#include "accelVid.h"
#include "vgaCommon.h"
#include "pci.h"
#include <pc.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/nearptr.h>
// ============================================================
// Trident vendor/device IDs
// ============================================================
#define TRIDENT_VENDOR_ID 0x1023
#define TRIDENT_TGUI9440 0x9440
#define TRIDENT_TGUI9660 0x9660
#define TRIDENT_TGUI9680 0x9680
#define TRIDENT_PROVIDIA 0x9685
#define TRIDENT_BLADE3D 0x9880
#define TRIDENT_CYBERBLADE 0x9910
static const uint16_t sTridentDeviceIds[] = {
TRIDENT_VENDOR_ID, TRIDENT_TGUI9440,
TRIDENT_VENDOR_ID, TRIDENT_TGUI9660,
TRIDENT_VENDOR_ID, TRIDENT_TGUI9680,
TRIDENT_VENDOR_ID, TRIDENT_PROVIDIA,
TRIDENT_VENDOR_ID, TRIDENT_BLADE3D,
TRIDENT_VENDOR_ID, TRIDENT_CYBERBLADE,
0, 0
};
// ============================================================
// GER (Graphics Engine Register) ports
// ============================================================
#define GER_STATUS 0x2120 // word: bit 0 = engine busy
#define GER_OPERMODE 0x2122 // word: bits 2:0 = bpp encoding
#define GER_COMMAND 0x2124 // byte: command register
#define GER_ROP 0x2125 // byte: raster operation
#define GER_FG_COLOR 0x2128 // dword: foreground color
#define GER_BG_COLOR 0x212C // dword: background color
#define GER_PAT_ADDR 0x2130 // dword: pattern address
#define GER_SRC_X 0x2138 // word: source X
#define GER_SRC_Y 0x213A // word: source Y
#define GER_DST_X 0x213C // word: destination X
#define GER_DST_Y 0x213E // word: destination Y
#define GER_DIM_X 0x2140 // word: width - 1
#define GER_DIM_Y 0x2142 // word: height - 1
#define GER_STYLE 0x2144 // dword: line style/pattern
#define GER_CKEY 0x2148 // dword: color key
// ============================================================
// GER status bits
// ============================================================
#define GER_STATUS_BUSY 0x0001
// ============================================================
// GER command byte encoding
// ============================================================
//
// Bit 0: X direction (0=left, 1=right)
// Bit 1: Y direction (0=up, 1=down)
// Bits 3:2: source select (00=video, 01=system, 10=pattern)
// Bit 4: draw enable (must be set)
// Bit 5: mono source
// Bits 7:6: command type (00=bitblt)
#define GER_CMD_X_RIGHT 0x01
#define GER_CMD_X_LEFT 0x00
#define GER_CMD_Y_DOWN 0x02
#define GER_CMD_Y_UP 0x00
#define GER_CMD_SRC_VIDEO 0x00
#define GER_CMD_SRC_SYSTEM 0x04
#define GER_CMD_SRC_PATTERN 0x08
#define GER_CMD_DRAW 0x10
#define GER_CMD_MONO 0x20
#define GER_CMD_BITBLT 0x00
// Composite commands
#define GER_CMD_SOLID_FILL (GER_CMD_BITBLT | GER_CMD_SRC_PATTERN | GER_CMD_DRAW | GER_CMD_X_RIGHT | GER_CMD_Y_DOWN)
#define GER_CMD_SCRBLT_FWD (GER_CMD_BITBLT | GER_CMD_SRC_VIDEO | GER_CMD_DRAW | GER_CMD_X_RIGHT | GER_CMD_Y_DOWN)
#define GER_CMD_HOSTBLT (GER_CMD_BITBLT | GER_CMD_SRC_SYSTEM | GER_CMD_DRAW | GER_CMD_X_RIGHT | GER_CMD_Y_DOWN)
// ============================================================
// GER opermode bpp encoding (bits 2:0)
// ============================================================
#define GER_BPP_8 0x00
#define GER_BPP_16 0x01
#define GER_BPP_32 0x02
// ============================================================
// ROPs for GER engine
// ============================================================
#define TGUI_ROP_COPY 0xCC
#define TGUI_ROP_PAT_COPY 0xF0
// ============================================================
// Hardware cursor
// ============================================================
//
// 64x64 cursor stored at end of VRAM. Each row is 16 bytes:
// 8 bytes AND mask followed by 8 bytes XOR mask.
// Enable via CRTC extended register 0x50 bit 7.
// Position via CRTC registers 0x40-0x43.
#define TGUI_CURSOR_SIZE 64
#define TGUI_CURSOR_BYTES (TGUI_CURSOR_SIZE * 16) // 1024 bytes
// ============================================================
// CRTC extended registers for cursor
// ============================================================
#define TGUI_CRTC_CURSOR_X_LO 0x40
#define TGUI_CRTC_CURSOR_X_HI 0x41
#define TGUI_CRTC_CURSOR_Y_LO 0x42
#define TGUI_CRTC_CURSOR_Y_HI 0x43
#define TGUI_CRTC_CURSOR_CTRL 0x50
// ============================================================
// Miscellaneous
// ============================================================
#define TGUI_MAX_IDLE_WAIT 1000000
// ============================================================
// Private driver state
// ============================================================
typedef struct {
uint32_t lfbPhysAddr;
uint32_t vramSize;
uint32_t cursorOffset;
int32_t bytesPerPixel;
int32_t screenPitch;
uint16_t chipId;
} TridentPrivateT;
// ============================================================
// Prototypes
// ============================================================
static void tgBitBlt(AccelDriverT *drv, int32_t srcX, int32_t srcY, int32_t dstX, int32_t dstY, int32_t w, int32_t h);
static bool tgDetect(AccelDriverT *drv);
static uint8_t tgGetBppMode(int32_t bytesPerPixel);
static void tgHostBlit(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h);
static bool tgInit(AccelDriverT *drv, const AccelModeRequestT *req);
static void tgMoveCursor(AccelDriverT *drv, int32_t x, int32_t y);
static void tgRectFill(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, uint32_t color);
static void tgSetCursor(AccelDriverT *drv, const HwCursorImageT *image);
static void tgShowCursor(AccelDriverT *drv, bool visible);
static void tgShutdown(AccelDriverT *drv);
static void tgUnlockRegs(void);
static void tgWaitIdle(AccelDriverT *drv);
// ============================================================
// Driver instance
// ============================================================
static TridentPrivateT sTridentPrivate;
static AccelDriverT sTridentDriver = {
.name = "Trident TGUI",
.chipFamily = "trident",
.caps = 0,
.privData = &sTridentPrivate,
.detect = tgDetect,
.init = tgInit,
.shutdown = tgShutdown,
.waitIdle = tgWaitIdle,
.setClip = NULL,
.rectFill = tgRectFill,
.rectFillPat = NULL,
.bitBlt = tgBitBlt,
.hostBlit = tgHostBlit,
.colorExpand = NULL,
.lineDraw = NULL,
.setCursor = tgSetCursor,
.moveCursor = tgMoveCursor,
.showCursor = tgShowCursor,
};
// ============================================================
// tridentRegisterDriver
// ============================================================
void tridentRegisterDriver(void) {
accelRegisterDriver(&sTridentDriver);
}
// ============================================================
// tgBitBlt
// ============================================================
//
// Screen-to-screen BitBLT. Direction bits are set to handle
// overlapping source/destination regions correctly.
static void tgBitBlt(AccelDriverT *drv, int32_t srcX, int32_t srcY, int32_t dstX, int32_t dstY, int32_t w, int32_t h) {
if (w <= 0 || h <= 0) {
return;
}
tgWaitIdle(drv);
TridentPrivateT *priv = (TridentPrivateT *)drv->privData;
// Determine copy direction for overlap handling
uint8_t cmd = GER_CMD_BITBLT | GER_CMD_SRC_VIDEO | GER_CMD_DRAW;
int32_t sx = srcX;
int32_t sy = srcY;
int32_t dx = dstX;
int32_t dy = dstY;
if (dstY > srcY || (dstY == srcY && dstX > srcX)) {
// Copy bottom-to-top, right-to-left
sx += w - 1;
sy += h - 1;
dx += w - 1;
dy += h - 1;
cmd |= GER_CMD_X_LEFT | GER_CMD_Y_UP;
} else {
// Copy top-to-bottom, left-to-right
cmd |= GER_CMD_X_RIGHT | GER_CMD_Y_DOWN;
}
// Set operation mode (bpp)
outportw(GER_OPERMODE, tgGetBppMode(priv->bytesPerPixel));
// ROP: copy
outportb(GER_ROP, TGUI_ROP_COPY);
// Source coordinates
outportw(GER_SRC_X, sx);
outportw(GER_SRC_Y, sy);
// Destination coordinates
outportw(GER_DST_X, dx);
outportw(GER_DST_Y, dy);
// Dimensions (width - 1, height - 1)
outportw(GER_DIM_X, w - 1);
outportw(GER_DIM_Y, h - 1);
// Fire command
outportb(GER_COMMAND, cmd);
}
// ============================================================
// tgDetect
// ============================================================
static bool tgDetect(AccelDriverT *drv) {
int32_t matchIdx;
if (!pciFindDeviceList(sTridentDeviceIds, &drv->pciDev, &matchIdx)) {
return false;
}
TridentPrivateT *priv = (TridentPrivateT *)drv->privData;
priv->chipId = drv->pciDev.deviceId;
switch (drv->pciDev.deviceId) {
case TRIDENT_TGUI9440:
drv->name = "Trident TGUI9440";
break;
case TRIDENT_TGUI9660:
drv->name = "Trident TGUI9660";
break;
case TRIDENT_TGUI9680:
drv->name = "Trident TGUI9680";
break;
case TRIDENT_PROVIDIA:
drv->name = "Trident ProVidia 9685";
break;
case TRIDENT_BLADE3D:
drv->name = "Trident Blade3D";
break;
case TRIDENT_CYBERBLADE:
drv->name = "Trident CyberBlade";
break;
default:
drv->name = "Trident TGUI";
break;
}
return true;
}
// ============================================================
// tgGetBppMode
// ============================================================
//
// Return the GER_OPERMODE bpp encoding for the given bytes per pixel.
static uint8_t tgGetBppMode(int32_t bytesPerPixel) {
switch (bytesPerPixel) {
case 2:
return GER_BPP_16;
case 4:
return GER_BPP_32;
default:
return GER_BPP_8;
}
}
// ============================================================
// tgHostBlit
// ============================================================
//
// CPU-to-screen blit. Sets source select to system/CPU and feeds
// pixel data through the GER data port. Each scanline of source
// data is written as a series of 32-bit dwords.
static void tgHostBlit(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h) {
if (w <= 0 || h <= 0) {
return;
}
TridentPrivateT *priv = (TridentPrivateT *)drv->privData;
int32_t rowBytes = w * priv->bytesPerPixel;
int32_t padBytes = (rowBytes + 3) & ~3;
int32_t dwordsPerRow = padBytes / 4;
tgWaitIdle(drv);
// Set operation mode (bpp)
outportw(GER_OPERMODE, tgGetBppMode(priv->bytesPerPixel));
// ROP: copy
outportb(GER_ROP, TGUI_ROP_COPY);
// Source coordinates (not meaningful for host data, set to 0)
outportw(GER_SRC_X, 0);
outportw(GER_SRC_Y, 0);
// Destination coordinates
outportw(GER_DST_X, dstX);
outportw(GER_DST_Y, dstY);
// Dimensions
outportw(GER_DIM_X, w - 1);
outportw(GER_DIM_Y, h - 1);
// Fire host blit command
outportb(GER_COMMAND, GER_CMD_HOSTBLT);
// Feed pixel data row by row as dwords
for (int32_t row = 0; row < h; row++) {
const uint8_t *rowData = srcBuf + row * srcPitch;
for (int32_t d = 0; d < dwordsPerRow; d++) {
int32_t base = d * 4;
uint32_t dword = 0;
for (int32_t b = 0; b < 4; b++) {
int32_t idx = base + b;
uint8_t byte = (idx < rowBytes) ? rowData[idx] : 0;
dword |= (uint32_t)byte << (b * 8);
}
outportl(GER_SRC_X, dword);
}
}
}
// ============================================================
// tgInit
// ============================================================
static bool tgInit(AccelDriverT *drv, const AccelModeRequestT *req) {
TridentPrivateT *priv = (TridentPrivateT *)drv->privData;
// Get LFB physical address from PCI BAR0
uint32_t bar0 = pciRead32(drv->pciDev.bus, drv->pciDev.dev,
drv->pciDev.func, PCI_BAR0);
priv->lfbPhysAddr = bar0 & 0xFFFFFFF0;
priv->vramSize = pciSizeBar(drv->pciDev.bus, drv->pciDev.dev,
drv->pciDev.func, PCI_BAR0);
// Unlock Trident extended registers
tgUnlockRegs();
// Find and set VESA mode
VesaModeResultT vesa;
if (!vesaFindAndSetMode(req->width, req->height, req->bpp, &vesa)) {
return false;
}
// Map LFB via DPMI
DpmiMappingT lfbMap;
if (!dpmiMapFramebuffer(priv->lfbPhysAddr, priv->vramSize, &lfbMap)) {
vgaRestoreTextMode();
return false;
}
// Fill in driver mode info
priv->bytesPerPixel = (vesa.bpp + 7) / 8;
priv->screenPitch = vesa.pitch;
drv->mode.width = vesa.width;
drv->mode.height = vesa.height;
drv->mode.bpp = vesa.bpp;
drv->mode.pitch = vesa.pitch;
drv->mode.framebuffer = lfbMap.ptr;
drv->mode.vramSize = priv->vramSize;
drv->mode.offscreenBase = vesa.pitch * vesa.height;
// Re-unlock after mode set (VESA BIOS may re-lock)
tgUnlockRegs();
// Set GER operation mode for current bpp
outportw(GER_OPERMODE, tgGetBppMode(priv->bytesPerPixel));
// Set up hardware cursor at end of VRAM
priv->cursorOffset = priv->vramSize - TGUI_CURSOR_BYTES;
priv->cursorOffset &= ~(uint32_t)(TGUI_CURSOR_BYTES - 1);
// Set cursor start address via CRTC extended registers
// The cursor address is stored as a byte offset divided by 1024
uint32_t cursorAddrReg = priv->cursorOffset / 1024;
vgaCrtcWrite(0x44, cursorAddrReg & 0xFF);
vgaCrtcWrite(0x45, (cursorAddrReg >> 8) & 0xFF);
drv->caps = ACAP_RECT_FILL | ACAP_BITBLT | ACAP_HOST_BLIT | ACAP_HW_CURSOR;
tgWaitIdle(drv);
return true;
}
// ============================================================
// tgMoveCursor
// ============================================================
//
// Set the hardware cursor position via CRTC extended registers
// 0x40-0x43. X is at 0x40/0x41, Y is at 0x42/0x43.
static void tgMoveCursor(AccelDriverT *drv, int32_t x, int32_t y) {
(void)drv;
if (x < 0) { x = 0; }
if (y < 0) { y = 0; }
vgaCrtcWrite(TGUI_CRTC_CURSOR_X_LO, x & 0xFF);
vgaCrtcWrite(TGUI_CRTC_CURSOR_X_HI, (x >> 8) & 0x07);
vgaCrtcWrite(TGUI_CRTC_CURSOR_Y_LO, y & 0xFF);
vgaCrtcWrite(TGUI_CRTC_CURSOR_Y_HI, (y >> 8) & 0x07);
}
// ============================================================
// tgRectFill
// ============================================================
//
// Solid rectangle fill using the GER engine in pattern source mode.
// The foreground color register provides the fill color, and the
// ROP is set to pattern copy (0xF0).
static void tgRectFill(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, uint32_t color) {
if (w <= 0 || h <= 0) {
return;
}
TridentPrivateT *priv = (TridentPrivateT *)drv->privData;
tgWaitIdle(drv);
// Set operation mode (bpp)
outportw(GER_OPERMODE, tgGetBppMode(priv->bytesPerPixel));
// Foreground color for the fill
outportl(GER_FG_COLOR, color);
// ROP: pattern copy (solid fill uses fg color as pattern)
outportb(GER_ROP, TGUI_ROP_PAT_COPY);
// Destination coordinates
outportw(GER_DST_X, x);
outportw(GER_DST_Y, y);
// Dimensions (width - 1, height - 1)
outportw(GER_DIM_X, w - 1);
outportw(GER_DIM_Y, h - 1);
// Fire solid fill command
outportb(GER_COMMAND, GER_CMD_SOLID_FILL);
}
// ============================================================
// tgSetCursor
// ============================================================
//
// Upload a cursor image to VRAM at the cursor offset. The TGUI
// cursor format is 64x64 with 16 bytes per row: 8 bytes AND mask
// followed by 8 bytes XOR mask.
static void tgSetCursor(AccelDriverT *drv, const HwCursorImageT *image) {
TridentPrivateT *priv = (TridentPrivateT *)drv->privData;
if (!image) {
tgShowCursor(drv, false);
return;
}
tgWaitIdle(drv);
uint8_t *cursorMem = drv->mode.framebuffer + priv->cursorOffset;
for (int32_t row = 0; row < TGUI_CURSOR_SIZE; row++) {
for (int32_t col = 0; col < 8; col++) {
int32_t srcIdx = row * 8 + col;
uint8_t andByte;
uint8_t xorByte;
if (row < image->height && col < (image->width + 7) / 8) {
andByte = image->andMask[srcIdx];
xorByte = image->xorMask[srcIdx];
} else {
// Transparent: AND=0xFF, XOR=0x00
andByte = 0xFF;
xorByte = 0x00;
}
cursorMem[row * 16 + col] = andByte;
cursorMem[row * 16 + col + 8] = xorByte;
}
}
}
// ============================================================
// tgShowCursor
// ============================================================
//
// Enable or disable the hardware cursor via CRTC extended
// register 0x50, bit 7.
static void tgShowCursor(AccelDriverT *drv, bool visible) {
(void)drv;
uint8_t val = vgaCrtcRead(TGUI_CRTC_CURSOR_CTRL);
if (visible) {
val |= 0x80;
} else {
val &= ~0x80;
}
vgaCrtcWrite(TGUI_CRTC_CURSOR_CTRL, val);
}
// ============================================================
// tgShutdown
// ============================================================
static void tgShutdown(AccelDriverT *drv) {
tgShowCursor(drv, false);
tgWaitIdle(drv);
vgaRestoreTextMode();
__djgpp_nearptr_disable();
}
// ============================================================
// tgUnlockRegs
// ============================================================
//
// Unlock Trident extended registers. Reading SR0B returns the
// chip version/ID and simultaneously unlocks the extended
// sequencer and CRTC registers. Then writing 0x01 to SR0E
// enables new-mode registers on TGUI chips.
static void tgUnlockRegs(void) {
// Read SR0B to unlock extensions (returns chip ID)
outportb(VGA_SEQ_INDEX, 0x0B);
(void)inportb(VGA_SEQ_DATA);
// Enable new-mode TGUI registers
outportb(VGA_SEQ_INDEX, 0x0E);
outportb(VGA_SEQ_DATA, 0x01);
}
// ============================================================
// tgWaitIdle
// ============================================================
//
// Wait for the GER engine to finish. Polls the status register
// at 0x2120 until bit 0 (busy) clears.
static void tgWaitIdle(AccelDriverT *drv) {
(void)drv;
for (int32_t i = 0; i < TGUI_MAX_IDLE_WAIT; i++) {
if (!(inportw(GER_STATUS) & GER_STATUS_BUSY)) {
return;
}
}
}

698
tsengW32.c Normal file
View file

@ -0,0 +1,698 @@
// tsengW32.c -- Tseng ET4000/W32p accelerated video driver
//
// Supports the Tseng Labs ET4000/W32 family: W32, W32i, W32p rev A/B/C/D.
// These chips were common in ISA/VLB and early PCI systems of the early
// 1990s, offering good 2D acceleration for their era.
//
// The W32 ACL (Accelerator) engine provides:
// - Solid rectangle fill
// - 8x8 pattern fill (mono and color)
// - Screen-to-screen BitBLT
// - CPU-to-screen color expansion
// - Bresenham line draw (W32p only)
// - Hardware cursor (64x64 on W32p, not on W32/W32i)
//
// Register access:
// The ACL registers are accessed via I/O ports in the 0x21xx range
// after unlocking with a key sequence. The ACL uses a different
// programming model from S3 or ATI -- operations are set up by
// writing source/destination addresses, dimensions, and mix/ROP
// to indexed registers, then triggered by writing to the
// accelerator control register.
//
// On the W32p, an MMU (Memory Management Unit) provides four
// apertures at the end of the linear address space that can be
// used for CPU-to-screen data transfer, avoiding I/O port
// overhead for host blits.
#include "accelVid.h"
#include "vgaCommon.h"
#include "pci.h"
#include <pc.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/nearptr.h>
// ============================================================
// Tseng vendor/device IDs
// ============================================================
#define TSENG_VENDOR_ID 0x100C
#define TSENG_W32 0x3202
#define TSENG_W32I 0x3205
#define TSENG_W32P_A 0x3206
#define TSENG_W32P_B 0x3207
#define TSENG_W32P_C 0x3208
#define TSENG_W32P_D 0x4702
static const uint16_t sTsengDeviceIds[] = {
TSENG_VENDOR_ID, TSENG_W32,
TSENG_VENDOR_ID, TSENG_W32I,
TSENG_VENDOR_ID, TSENG_W32P_A,
TSENG_VENDOR_ID, TSENG_W32P_B,
TSENG_VENDOR_ID, TSENG_W32P_C,
TSENG_VENDOR_ID, TSENG_W32P_D,
0, 0
};
// ============================================================
// Tseng ACL register ports
// ============================================================
//
// The ACL registers are at I/O ports 0x2100-0x217F. They are
// accessed as indexed registers via a base+offset scheme.
#define ET_ACL_SUSPEND_TERM 0x2100 // suspend/terminate
#define ET_ACL_OPERATION_STATE 0x2101 // operation state (read)
#define ET_ACL_SYNC_ENABLE 0x2102 // sync enable
#define ET_ACL_INT_STATUS 0x2109 // interrupt status
#define ET_ACL_INT_MASK 0x210A // interrupt mask
// ACL setup registers
#define ET_ACL_PATTERN_ADDR 0x2110 // pattern address (3 bytes)
#define ET_ACL_SOURCE_ADDR 0x2114 // source address (3 bytes)
#define ET_ACL_PATTERN_Y_OFF 0x2118 // pattern Y offset
#define ET_ACL_SOURCE_Y_OFF 0x211A // source Y offset
#define ET_ACL_DEST_Y_OFF 0x211C // destination Y offset
// Virtual bus size affects transfer granularity
#define ET_ACL_VBUS_SIZE 0x2120 // virtual bus size
// X/Y count (dimensions)
#define ET_ACL_XY_DIR 0x2124 // X/Y direction
#define ET_ACL_X_COUNT 0x2128 // X count (width - 1, in bytes)
#define ET_ACL_Y_COUNT 0x212A // Y count (height - 1)
// Routing control
#define ET_ACL_ROUTING_CTRL 0x2126 // routing control
// Mix/ROP registers
#define ET_ACL_MIX_CONTROL 0x2127 // foreground/background source
#define ET_ACL_ROP 0x2130 // raster operation
// Destination address
#define ET_ACL_DEST_ADDR 0x2134 // destination address (3 bytes)
// Pixel depth control
#define ET_ACL_PIXEL_DEPTH 0x2138 // pixel depth (0=8, 1=15/16, 2=24, 3=32)
// CPU source data port (for host-to-screen)
#define ET_ACL_CPU_DATA 0x2140 // CPU data register (32-bit)
// ============================================================
// ACL direction bits (ET_ACL_XY_DIR)
// ============================================================
#define ET_DIR_X_POS 0x00
#define ET_DIR_X_NEG 0x01
#define ET_DIR_Y_POS 0x00
#define ET_DIR_Y_NEG 0x02
// ============================================================
// ACL routing control (ET_ACL_ROUTING_CTRL)
// ============================================================
#define ET_ROUTE_SRC_VRAM 0x00 // source from video memory
#define ET_ROUTE_SRC_CPU 0x02 // source from CPU
#define ET_ROUTE_SRC_PATTERN 0x04 // source from pattern
#define ET_ROUTE_SRC_COLOR_EXP 0x06 // source is mono -> color expand
#define ET_ROUTE_DST_VRAM 0x00 // destination to video memory
// ============================================================
// ACL mix control (ET_ACL_MIX_CONTROL)
// ============================================================
#define ET_MIX_FG_SRC 0x00 // foreground from source
#define ET_MIX_FG_PATTERN 0x04 // foreground from pattern
#define ET_MIX_FG_COLOR 0x08 // foreground from foreground color reg
#define ET_MIX_BG_SRC 0x00 // background from source
#define ET_MIX_BG_PATTERN 0x10 // background from pattern
#define ET_MIX_BG_COLOR 0x20 // background from background color reg
// ============================================================
// ACL operation state bits
// ============================================================
#define ET_ACCEL_BUSY 0x02 // accelerator busy
#define ET_ACCEL_CMD_READY 0x01 // ready for next command
// ============================================================
// ACL suspend/terminate control
// ============================================================
#define ET_ACL_START 0x00 // start/continue operation
#define ET_ACL_SUSPEND 0x01 // suspend
#define ET_ACL_TERMINATE 0x02 // terminate
// Common ROPs
#define ET_ROP_COPY 0xCC // dest = source
#define ET_ROP_PAT_COPY 0xF0 // dest = pattern
#define ET_ROP_ZERO 0x00
#define ET_ROP_ONE 0xFF
#define ET_ROP_XOR 0x66
// Hardware cursor
#define ET_HW_CURSOR_SIZE 64
#define ET_HW_CURSOR_BYTES 1024
// Maximum wait iterations
#define ET_MAX_IDLE_WAIT 1000000
// ============================================================
// Private driver state
// ============================================================
typedef struct {
uint32_t lfbPhysAddr;
uint32_t vramSize;
uint32_t cursorOffset;
int32_t bytesPerPixel;
int32_t screenPitch;
bool isW32p; // W32p has more features than W32/W32i
} TsengPrivateT;
// ============================================================
// Prototypes
// ============================================================
static void etBitBlt(AccelDriverT *drv, int32_t srcX, int32_t srcY, int32_t dstX, int32_t dstY, int32_t w, int32_t h);
static bool etDetect(AccelDriverT *drv);
static void etHostBlit(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h);
static bool etInit(AccelDriverT *drv, const AccelModeRequestT *req);
static void etMoveCursor(AccelDriverT *drv, int32_t x, int32_t y);
static void etRectFill(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, uint32_t color);
static void etSetCursor(AccelDriverT *drv, const HwCursorImageT *image);
static void etShowCursor(AccelDriverT *drv, bool visible);
static void etShutdown(AccelDriverT *drv);
static void etUnlockRegs(void);
static void etWaitIdle(AccelDriverT *drv);
// ============================================================
// Driver instance
// ============================================================
static TsengPrivateT sTsengPrivate;
static AccelDriverT sTsengDriver = {
.name = "Tseng ET4000/W32p",
.chipFamily = "tseng",
.caps = 0,
.privData = &sTsengPrivate,
.detect = etDetect,
.init = etInit,
.shutdown = etShutdown,
.waitIdle = etWaitIdle,
.setClip = NULL, // W32 has no hardware scissors
.rectFill = etRectFill,
.rectFillPat = NULL,
.bitBlt = etBitBlt,
.hostBlit = etHostBlit,
.colorExpand = NULL,
.lineDraw = NULL, // Line draw is complex on W32, omit for now
.setCursor = etSetCursor,
.moveCursor = etMoveCursor,
.showCursor = etShowCursor,
};
// ============================================================
// etRegisterDriver
// ============================================================
void etRegisterDriver(void) {
accelRegisterDriver(&sTsengDriver);
}
// ============================================================
// etBitBlt
// ============================================================
//
// Screen-to-screen BitBLT using the ACL engine. Source and
// destination are linear byte addresses in VRAM. Direction is
// controlled to handle overlapping regions.
static void etBitBlt(AccelDriverT *drv, int32_t srcX, int32_t srcY, int32_t dstX, int32_t dstY, int32_t w, int32_t h) {
TsengPrivateT *priv = (TsengPrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
int32_t bpp = priv->bytesPerPixel;
int32_t pitch = priv->screenPitch;
uint32_t srcAddr = srcY * pitch + srcX * bpp;
uint32_t dstAddr = dstY * pitch + dstX * bpp;
uint8_t direction = ET_DIR_X_POS | ET_DIR_Y_POS;
if (dstAddr > srcAddr) {
direction = ET_DIR_X_NEG | ET_DIR_Y_NEG;
srcAddr += (h - 1) * pitch + (w - 1) * bpp;
dstAddr += (h - 1) * pitch + (w - 1) * bpp;
}
int32_t widthBytes = w * bpp - 1;
etWaitIdle(drv);
// Set pixel depth
uint8_t pixDepth = 0;
if (bpp == 2) { pixDepth = 1; }
if (bpp == 4) { pixDepth = 3; }
outportb(ET_ACL_PIXEL_DEPTH, pixDepth);
// Source routing: VRAM to VRAM
outportb(ET_ACL_ROUTING_CTRL, ET_ROUTE_SRC_VRAM | ET_ROUTE_DST_VRAM);
// ROP: copy
outportb(ET_ACL_ROP, ET_ROP_COPY);
// Direction
outportb(ET_ACL_XY_DIR, direction);
// Source Y offset (pitch)
outportw(ET_ACL_SOURCE_Y_OFF, pitch - 1);
// Dest Y offset (pitch)
outportw(ET_ACL_DEST_Y_OFF, pitch - 1);
// X and Y counts
outportw(ET_ACL_X_COUNT, widthBytes);
outportw(ET_ACL_Y_COUNT, h - 1);
// Source address (24-bit)
outportb(ET_ACL_SOURCE_ADDR, srcAddr & 0xFF);
outportb(ET_ACL_SOURCE_ADDR + 1, (srcAddr >> 8) & 0xFF);
outportb(ET_ACL_SOURCE_ADDR + 2, (srcAddr >> 16) & 0xFF);
// Destination address (triggers operation)
outportb(ET_ACL_DEST_ADDR, dstAddr & 0xFF);
outportb(ET_ACL_DEST_ADDR + 1, (dstAddr >> 8) & 0xFF);
outportb(ET_ACL_DEST_ADDR + 2, (dstAddr >> 16) & 0xFF);
// Start
outportb(ET_ACL_SUSPEND_TERM, ET_ACL_START);
}
// ============================================================
// etDetect
// ============================================================
static bool etDetect(AccelDriverT *drv) {
int32_t matchIdx;
if (!pciFindDeviceList(sTsengDeviceIds, &drv->pciDev, &matchIdx)) {
return false;
}
TsengPrivateT *priv = (TsengPrivateT *)drv->privData;
switch (drv->pciDev.deviceId) {
case TSENG_W32:
drv->name = "Tseng ET4000/W32";
priv->isW32p = false;
break;
case TSENG_W32I:
drv->name = "Tseng ET4000/W32i";
priv->isW32p = false;
break;
case TSENG_W32P_A:
case TSENG_W32P_B:
case TSENG_W32P_C:
case TSENG_W32P_D:
drv->name = "Tseng ET4000/W32p";
priv->isW32p = true;
break;
default:
drv->name = "Tseng ET4000/W32";
priv->isW32p = false;
break;
}
return true;
}
// ============================================================
// etHostBlit
// ============================================================
//
// CPU-to-screen blit. Transfers pixel data from system memory to
// the framebuffer via the ACL engine. Source routing is set to CPU
// and data is fed as 32-bit dwords through ET_ACL_CPU_DATA. Each
// row of source pixels is packed into dwords with padding to a
// 4-byte boundary.
static void etHostBlit(AccelDriverT *drv, const uint8_t *srcBuf, int32_t srcPitch, int32_t dstX, int32_t dstY, int32_t w, int32_t h) {
TsengPrivateT *priv = (TsengPrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
int32_t bpp = priv->bytesPerPixel;
int32_t pitch = priv->screenPitch;
uint32_t dstAddr = dstY * pitch + dstX * bpp;
int32_t widthBytes = w * bpp - 1;
int32_t rowBytes = w * bpp;
int32_t padBytesPerRow = (rowBytes + 3) & ~3;
int32_t dwordsPerRow = padBytesPerRow / 4;
etWaitIdle(drv);
// Set pixel depth
uint8_t pixDepth = 0;
if (bpp == 2) { pixDepth = 1; }
if (bpp == 4) { pixDepth = 3; }
outportb(ET_ACL_PIXEL_DEPTH, pixDepth);
// Routing: source from CPU, destination to VRAM
outportb(ET_ACL_ROUTING_CTRL, ET_ROUTE_SRC_CPU | ET_ROUTE_DST_VRAM);
// ROP: copy
outportb(ET_ACL_ROP, ET_ROP_COPY);
// Direction: forward
outportb(ET_ACL_XY_DIR, ET_DIR_X_POS | ET_DIR_Y_POS);
// Dest Y offset (pitch)
outportw(ET_ACL_DEST_Y_OFF, pitch - 1);
// X and Y counts
outportw(ET_ACL_X_COUNT, widthBytes);
outportw(ET_ACL_Y_COUNT, h - 1);
// Destination address
outportb(ET_ACL_DEST_ADDR, dstAddr & 0xFF);
outportb(ET_ACL_DEST_ADDR + 1, (dstAddr >> 8) & 0xFF);
outportb(ET_ACL_DEST_ADDR + 2, (dstAddr >> 16) & 0xFF);
// Start
outportb(ET_ACL_SUSPEND_TERM, ET_ACL_START);
// Feed pixel data as dwords, row by row
for (int32_t row = 0; row < h; row++) {
const uint8_t *rowData = srcBuf + row * srcPitch;
for (int32_t d = 0; d < dwordsPerRow; d++) {
int32_t base = d * 4;
uint32_t dword = 0;
for (int32_t b = 0; b < 4; b++) {
int32_t idx = base + b;
uint8_t byte = (idx < rowBytes) ? rowData[idx] : 0;
dword |= (uint32_t)byte << (b * 8);
}
outportl(ET_ACL_CPU_DATA, dword);
}
}
}
// ============================================================
// etInit
// ============================================================
static bool etInit(AccelDriverT *drv, const AccelModeRequestT *req) {
TsengPrivateT *priv = (TsengPrivateT *)drv->privData;
// Get LFB from PCI BAR0
uint32_t bar0 = pciRead32(drv->pciDev.bus, drv->pciDev.dev,
drv->pciDev.func, PCI_BAR0);
priv->lfbPhysAddr = bar0 & 0xFFFFFFF0;
priv->vramSize = pciSizeBar(drv->pciDev.bus, drv->pciDev.dev,
drv->pciDev.func, PCI_BAR0);
// Unlock Tseng extended registers
etUnlockRegs();
// Find and set VESA mode
VesaModeResultT vesa;
if (!vesaFindAndSetMode(req->width, req->height, req->bpp, &vesa)) {
return false;
}
// Map LFB via DPMI
DpmiMappingT lfbMap;
if (!dpmiMapFramebuffer(priv->lfbPhysAddr, priv->vramSize, &lfbMap)) {
vgaRestoreTextMode();
return false;
}
// Fill in driver mode info
priv->bytesPerPixel = (vesa.bpp + 7) / 8;
priv->screenPitch = vesa.pitch;
drv->mode.width = vesa.width;
drv->mode.height = vesa.height;
drv->mode.bpp = vesa.bpp;
drv->mode.pitch = vesa.pitch;
drv->mode.framebuffer = lfbMap.ptr;
drv->mode.vramSize = priv->vramSize;
drv->mode.offscreenBase = vesa.pitch * vesa.height;
// Re-unlock after mode set
etUnlockRegs();
// Reset the ACL engine
outportb(ET_ACL_SUSPEND_TERM, ET_ACL_TERMINATE);
outportb(ET_ACL_SUSPEND_TERM, ET_ACL_START);
// Set up cursor at end of VRAM (W32p only)
if (priv->isW32p) {
priv->cursorOffset = priv->vramSize - ET_HW_CURSOR_BYTES;
priv->cursorOffset &= ~(ET_HW_CURSOR_BYTES - 1);
}
drv->caps = ACAP_RECT_FILL | ACAP_BITBLT | ACAP_HOST_BLIT;
if (priv->isW32p) {
drv->caps |= ACAP_HW_CURSOR;
}
etWaitIdle(drv);
return true;
}
// ============================================================
// etMoveCursor
// ============================================================
//
// The W32p hardware cursor position is set through CRTC extended
// registers (IMA port area). Cursor X is at CRTC index 0x40/0x41,
// cursor Y at 0x42/0x43.
static void etMoveCursor(AccelDriverT *drv, int32_t x, int32_t y) {
(void)drv;
if (x < 0) { x = 0; }
if (y < 0) { y = 0; }
// ET4000/W32p cursor position registers
outportb(0x217A, 0xE0); // cursor X low
outportb(0x217B, x & 0xFF);
outportb(0x217A, 0xE1); // cursor X high
outportb(0x217B, (x >> 8) & 0x07);
outportb(0x217A, 0xE2); // cursor Y low
outportb(0x217B, y & 0xFF);
outportb(0x217A, 0xE3); // cursor Y high
outportb(0x217B, (y >> 8) & 0x07);
}
// ============================================================
// etRectFill
// ============================================================
//
// Solid fill using the ACL engine. We write a single pixel of
// the fill color to an offscreen VRAM location and use it as
// the "source" for a replicated blit.
static void etRectFill(AccelDriverT *drv, int32_t x, int32_t y, int32_t w, int32_t h, uint32_t color) {
TsengPrivateT *priv = (TsengPrivateT *)drv->privData;
if (w <= 0 || h <= 0) {
return;
}
int32_t bpp = priv->bytesPerPixel;
int32_t pitch = priv->screenPitch;
// Write the fill color to an offscreen VRAM location for pattern source
// Use just past the visible screen area
uint32_t patAddr = priv->vramSize - 64; // safe offscreen area
uint8_t *fb = drv->mode.framebuffer;
etWaitIdle(drv);
// Write pattern pixel(s) to VRAM
for (int32_t i = 0; i < bpp; i++) {
fb[patAddr + i] = (color >> (i * 8)) & 0xFF;
}
uint32_t dstAddr = y * pitch + x * bpp;
int32_t widthBytes = w * bpp - 1;
// Set pixel depth
uint8_t pixDepth = 0;
if (bpp == 2) { pixDepth = 1; }
if (bpp == 4) { pixDepth = 3; }
outportb(ET_ACL_PIXEL_DEPTH, pixDepth);
// Routing: pattern fill
outportb(ET_ACL_ROUTING_CTRL, ET_ROUTE_SRC_PATTERN | ET_ROUTE_DST_VRAM);
// ROP: pattern copy
outportb(ET_ACL_ROP, ET_ROP_PAT_COPY);
// Direction: forward
outportb(ET_ACL_XY_DIR, ET_DIR_X_POS | ET_DIR_Y_POS);
// Pattern address and Y offset
outportb(ET_ACL_PATTERN_ADDR, patAddr & 0xFF);
outportb(ET_ACL_PATTERN_ADDR + 1, (patAddr >> 8) & 0xFF);
outportb(ET_ACL_PATTERN_ADDR + 2, (patAddr >> 16) & 0xFF);
outportw(ET_ACL_PATTERN_Y_OFF, 0); // single-line pattern
// Dest Y offset
outportw(ET_ACL_DEST_Y_OFF, pitch - 1);
// Dimensions
outportw(ET_ACL_X_COUNT, widthBytes);
outportw(ET_ACL_Y_COUNT, h - 1);
// Destination address (triggers operation)
outportb(ET_ACL_DEST_ADDR, dstAddr & 0xFF);
outportb(ET_ACL_DEST_ADDR + 1, (dstAddr >> 8) & 0xFF);
outportb(ET_ACL_DEST_ADDR + 2, (dstAddr >> 16) & 0xFF);
// Start
outportb(ET_ACL_SUSPEND_TERM, ET_ACL_START);
}
// ============================================================
// etSetCursor
// ============================================================
static void etSetCursor(AccelDriverT *drv, const HwCursorImageT *image) {
TsengPrivateT *priv = (TsengPrivateT *)drv->privData;
if (!priv->isW32p) {
return;
}
if (!image) {
etShowCursor(drv, false);
return;
}
etWaitIdle(drv);
uint8_t *cursorMem = drv->mode.framebuffer + priv->cursorOffset;
for (int32_t row = 0; row < ET_HW_CURSOR_SIZE; row++) {
for (int32_t byte = 0; byte < 8; byte++) {
int32_t srcIdx = row * 8 + byte;
uint8_t andByte;
uint8_t xorByte;
if (row < image->height && byte < (image->width + 7) / 8) {
andByte = image->andMask[srcIdx];
xorByte = image->xorMask[srcIdx];
} else {
andByte = 0xFF;
xorByte = 0x00;
}
cursorMem[row * 16 + byte] = andByte;
cursorMem[row * 16 + byte + 8] = xorByte;
}
}
// Set cursor address via IMA registers
uint32_t cursorAddr = priv->cursorOffset / 4; // in dword units
outportb(0x217A, 0xE8);
outportb(0x217B, cursorAddr & 0xFF);
outportb(0x217A, 0xE9);
outportb(0x217B, (cursorAddr >> 8) & 0xFF);
outportb(0x217A, 0xEA);
outportb(0x217B, (cursorAddr >> 16) & 0x0F);
}
// ============================================================
// etShowCursor
// ============================================================
static void etShowCursor(AccelDriverT *drv, bool visible) {
TsengPrivateT *priv = (TsengPrivateT *)drv->privData;
if (!priv->isW32p) {
return;
}
// Cursor control via IMA register 0xF7
outportb(0x217A, 0xF7);
uint8_t val = inportb(0x217B);
if (visible) {
val |= 0x80;
} else {
val &= ~0x80;
}
outportb(0x217A, 0xF7);
outportb(0x217B, val);
}
// ============================================================
// etShutdown
// ============================================================
static void etShutdown(AccelDriverT *drv) {
etShowCursor(drv, false);
outportb(ET_ACL_SUSPEND_TERM, ET_ACL_TERMINATE);
vgaRestoreTextMode();
__djgpp_nearptr_disable();
}
// ============================================================
// etUnlockRegs
// ============================================================
//
// Unlock Tseng extended registers.
// ET4000: write 0x03 to the "key" register at 0x3BF/0x3D8.
// This enables access to extended CRTC and attribute registers.
static void etUnlockRegs(void) {
outportb(0x3BF, 0x03);
outportb(0x3D8, 0xA0);
}
// ============================================================
// etWaitIdle
// ============================================================
//
// Wait for the ACL engine to finish. Poll the operation state
// register for the busy bit to clear.
static void etWaitIdle(AccelDriverT *drv) {
(void)drv;
for (int32_t i = 0; i < ET_MAX_IDLE_WAIT; i++) {
if (!(inportb(ET_ACL_OPERATION_STATE) & ET_ACCEL_BUSY)) {
return;
}
}
}

505
vgaCommon.c Normal file
View file

@ -0,0 +1,505 @@
// vgaCommon.c -- Shared VGA register programming
//
// Implements read/write access to the five standard VGA register
// groups. These are used by all chip-specific drivers for basic
// mode setup before enabling acceleration.
//
// Important timing note: on real hardware, some registers require
// specific sequencing (e.g. attribute controller must be reset via
// a read of Input Status 1 before writing the index). These
// functions handle the sequencing internally.
#include "vgaCommon.h"
#include "pci.h"
#include <dpmi.h>
#include <go32.h>
#include <pc.h>
#include <stdio.h>
#include <string.h>
#include <sys/farptr.h>
#include <sys/nearptr.h>
// VESA mode scoring weights (same as DVX)
#define MODE_SCORE_16BPP 100
#define MODE_SCORE_15BPP 90
#define MODE_SCORE_32BPP 85
#define MODE_SCORE_8BPP 70
#define MODE_SCORE_PREF_BPP 20
#define MODE_SCORE_EXACT_RES 10
// ============================================================
// Prototypes
// ============================================================
bool dpmiMapFramebuffer(uint32_t physAddr, uint32_t size, DpmiMappingT *mapping);
void dpmiUnmapFramebuffer(DpmiMappingT *mapping);
uint32_t pciSizeBar(uint8_t bus, uint8_t dev, uint8_t func, uint8_t barReg);
uint8_t vgaAttrRead(uint8_t index);
void vgaAttrReset(void);
void vgaAttrWrite(uint8_t index, uint8_t val);
void vgaBlankScreen(bool blank);
uint8_t vgaCrtcRead(uint8_t index);
void vgaCrtcLock(void);
void vgaCrtcUnlock(void);
void vgaCrtcWrite(uint8_t index, uint8_t val);
void vgaDacReadColor(uint8_t index, uint8_t *r, uint8_t *g, uint8_t *b);
void vgaDacWriteColor(uint8_t index, uint8_t r, uint8_t g, uint8_t b);
uint8_t vgaGfxRead(uint8_t index);
void vgaGfxWrite(uint8_t index, uint8_t val);
uint8_t vgaMiscRead(void);
void vgaMiscWrite(uint8_t val);
void vgaRestoreTextMode(void);
uint8_t vgaSeqRead(uint8_t index);
void vgaSeqWrite(uint8_t index, uint8_t val);
bool vesaFindAndSetMode(int32_t reqW, int32_t reqH, int32_t reqBpp, VesaModeResultT *result);
void vgaWaitVRetrace(void);
// ============================================================
// dpmiMapFramebuffer
// ============================================================
//
// Maps a physical address region into the DJGPP near pointer
// address space via DPMI. This is the three-step process that
// every driver needs:
// 1. Map physical address to linear address
// 2. Lock the pages to prevent swapping
// 3. Enable near pointers for direct C pointer access
//
// Returns true on success. On failure, mapping->ptr is NULL.
bool dpmiMapFramebuffer(uint32_t physAddr, uint32_t size, DpmiMappingT *mapping) {
__dpmi_meminfo info;
memset(mapping, 0, sizeof(*mapping));
info.address = physAddr;
info.size = size;
if (__dpmi_physical_address_mapping(&info) != 0) {
fprintf(stderr, "dpmiMap: Failed to map 0x%08lX (%lu bytes)\n",
(unsigned long)physAddr, (unsigned long)size);
return false;
}
__dpmi_meminfo lockInfo;
lockInfo.address = info.address;
lockInfo.size = size;
__dpmi_lock_linear_region(&lockInfo);
if (__djgpp_nearptr_enable() == 0) {
fprintf(stderr, "dpmiMap: Failed to enable near pointers\n");
return false;
}
mapping->ptr = (uint8_t *)(info.address + __djgpp_conventional_base);
mapping->linearAddr = info.address;
mapping->size = size;
return true;
}
// ============================================================
// dpmiUnmapFramebuffer
// ============================================================
void dpmiUnmapFramebuffer(DpmiMappingT *mapping) {
if (mapping->ptr) {
__djgpp_nearptr_disable();
mapping->ptr = NULL;
}
}
// ============================================================
// pciSizeBar
// ============================================================
//
// Determines the size of a PCI BAR by writing all 1s and reading
// back the mask. Saves and restores the original BAR value.
uint32_t pciSizeBar(uint8_t bus, uint8_t dev, uint8_t func, uint8_t barReg) {
uint32_t saved = pciRead32(bus, dev, func, barReg);
pciWrite32(bus, dev, func, barReg, 0xFFFFFFFF);
uint32_t mask = pciRead32(bus, dev, func, barReg);
pciWrite32(bus, dev, func, barReg, saved);
// Decode: invert the writable bits, add 1
mask &= 0xFFFFFFF0; // mask off type bits
if (mask == 0) {
return 0;
}
return (~mask) + 1;
}
// ============================================================
// vesaFindAndSetMode
// ============================================================
//
// Enumerates VESA VBE modes, scores them against the requested
// resolution and bpp, sets the best match with LFB enabled, and
// returns the mode details. This replaces ~150 lines of identical
// code in every driver.
bool vesaFindAndSetMode(int32_t reqW, int32_t reqH, int32_t reqBpp, VesaModeResultT *result) {
__dpmi_regs r;
memset(result, 0, sizeof(*result));
// Get VBE controller info
_farpokeb(_dos_ds, __tb + 0, 'V');
_farpokeb(_dos_ds, __tb + 1, 'B');
_farpokeb(_dos_ds, __tb + 2, 'E');
_farpokeb(_dos_ds, __tb + 3, '2');
memset(&r, 0, sizeof(r));
r.x.ax = 0x4F00;
r.x.es = __tb >> 4;
r.x.di = __tb & 0x0F;
__dpmi_int(0x10, &r);
if (r.x.ax != 0x004F) {
fprintf(stderr, "vesaFindAndSetMode: VBE not available\n");
return false;
}
// Copy mode list before 4F01h overwrites __tb
uint16_t modeListOff = _farpeekw(_dos_ds, __tb + 14);
uint16_t modeListSeg = _farpeekw(_dos_ds, __tb + 16);
uint32_t modeListAddr = ((uint32_t)modeListSeg << 4) + modeListOff;
uint16_t modes[256];
int32_t modeCount = 0;
for (int32_t i = 0; i < 256; i++) {
uint16_t mode = _farpeekw(_dos_ds, modeListAddr + i * 2);
if (mode == 0xFFFF) {
break;
}
modes[modeCount++] = mode;
}
// Score each mode and find the best
uint16_t bestMode = 0;
int32_t bestScore = -1;
for (int32_t i = 0; i < modeCount; i++) {
memset(&r, 0, sizeof(r));
r.x.ax = 0x4F01;
r.x.cx = modes[i];
r.x.es = __tb >> 4;
r.x.di = __tb & 0x0F;
__dpmi_int(0x10, &r);
if (r.x.ax != 0x004F) {
continue;
}
uint16_t attr = _farpeekw(_dos_ds, __tb + 0);
int32_t w = _farpeekw(_dos_ds, __tb + 18);
int32_t h = _farpeekw(_dos_ds, __tb + 20);
int32_t bpp = _farpeekb(_dos_ds, __tb + 25);
int32_t pitch = _farpeekw(_dos_ds, __tb + 16);
uint32_t phys = _farpeekl(_dos_ds, __tb + 40);
// Must have LFB and be a graphics mode
if (!(attr & 0x0080) || !(attr & 0x0010)) {
continue;
}
// Must meet requested resolution
if (w < reqW || h < reqH) {
continue;
}
// Only 8/15/16/32 bpp
if (bpp != 8 && bpp != 15 && bpp != 16 && bpp != 32) {
continue;
}
int32_t score = 0;
if (bpp == 16) { score = MODE_SCORE_16BPP; }
else if (bpp == 15) { score = MODE_SCORE_15BPP; }
else if (bpp == 32) { score = MODE_SCORE_32BPP; }
else { score = MODE_SCORE_8BPP; }
if (bpp == reqBpp) { score += MODE_SCORE_PREF_BPP; }
if (w == reqW && h == reqH) { score += MODE_SCORE_EXACT_RES; }
if (score > bestScore) {
bestScore = score;
bestMode = modes[i];
result->width = w;
result->height = h;
result->bpp = bpp;
result->pitch = pitch;
result->lfbPhysAddr = phys;
}
}
if (bestScore < 0) {
fprintf(stderr, "vesaFindAndSetMode: No suitable mode for %ldx%ldx%ld\n",
(long)reqW, (long)reqH, (long)reqBpp);
return false;
}
// Set the mode with LFB enabled (bit 14)
memset(&r, 0, sizeof(r));
r.x.ax = 0x4F02;
r.x.bx = bestMode | 0x4000; // bit 14 = enable LFB
__dpmi_int(0x10, &r);
if (r.x.ax != 0x004F) {
fprintf(stderr, "vesaFindAndSetMode: Failed to set mode 0x%04X\n", bestMode);
return false;
}
return true;
}
// ============================================================
// vgaAttrRead
// ============================================================
//
// The attribute controller is unusual: reading Input Status 1
// resets its flip-flop so the next write to 0x3C0 is treated as
// an index (not data). We must reset before every access.
uint8_t vgaAttrRead(uint8_t index) {
inportb(VGA_INPUT_STATUS_1);
outportb(VGA_ATTR_INDEX, index);
return inportb(VGA_ATTR_DATA_R);
}
// ============================================================
// vgaAttrReset
// ============================================================
//
// Resets the attribute controller flip-flop by reading Input
// Status 1. After this, the next write to 0x3C0 is an index write.
void vgaAttrReset(void) {
inportb(VGA_INPUT_STATUS_1);
}
// ============================================================
// vgaAttrWrite
// ============================================================
//
// Writes to the attribute controller. The flip-flop mechanism
// means we must: (1) read Input Status 1 to reset, (2) write
// the index to 0x3C0, (3) write the data to 0x3C0.
// Bit 5 of the index byte must be set to keep the palette
// address source enabled (otherwise the screen goes black).
void vgaAttrWrite(uint8_t index, uint8_t val) {
inportb(VGA_INPUT_STATUS_1);
outportb(VGA_ATTR_INDEX, index);
outportb(VGA_ATTR_DATA_W, val);
}
// ============================================================
// vgaBlankScreen
// ============================================================
//
// Toggles the screen on/off by setting bit 5 of the sequencer
// clocking mode register. Blanking prevents visible garbage
// during mode transitions.
void vgaBlankScreen(bool blank) {
uint8_t val = vgaSeqRead(VGA_SEQ_CLOCK_MODE);
if (blank) {
val |= VGA_SEQ_SCREEN_OFF;
} else {
val &= ~VGA_SEQ_SCREEN_OFF;
}
vgaSeqWrite(VGA_SEQ_CLOCK_MODE, val);
}
// ============================================================
// vgaCrtcLock
// ============================================================
//
// Re-enables CRTC write protection by setting bit 7 of the
// vertical sync end register.
void vgaCrtcLock(void) {
uint8_t val = vgaCrtcRead(VGA_CRTC_V_SYNC_END);
vgaCrtcWrite(VGA_CRTC_V_SYNC_END, val | 0x80);
}
// ============================================================
// vgaCrtcRead
// ============================================================
uint8_t vgaCrtcRead(uint8_t index) {
outportb(VGA_CRTC_INDEX, index);
return inportb(VGA_CRTC_DATA);
}
// ============================================================
// vgaCrtcUnlock
// ============================================================
//
// Disables CRTC write protection. Registers 0x00-0x07 of the
// CRTC are protected by bit 7 of the vertical sync end register
// (0x11). Clearing this bit allows writing to those registers.
void vgaCrtcUnlock(void) {
uint8_t val = vgaCrtcRead(VGA_CRTC_V_SYNC_END);
vgaCrtcWrite(VGA_CRTC_V_SYNC_END, val & 0x7F);
}
// ============================================================
// vgaCrtcWrite
// ============================================================
void vgaCrtcWrite(uint8_t index, uint8_t val) {
outportb(VGA_CRTC_INDEX, index);
outportb(VGA_CRTC_DATA, val);
}
// ============================================================
// vgaDacReadColor
// ============================================================
//
// Read one DAC palette entry. Write the index to 0x3C7, then
// read three bytes (R, G, B) from 0x3C9. DAC values are 6-bit
// (0-63) on standard VGA, 8-bit on some SVGA cards.
void vgaDacReadColor(uint8_t index, uint8_t *r, uint8_t *g, uint8_t *b) {
outportb(VGA_DAC_READ_ADDR, index);
*r = inportb(VGA_DAC_DATA);
*g = inportb(VGA_DAC_DATA);
*b = inportb(VGA_DAC_DATA);
}
// ============================================================
// vgaDacWriteColor
// ============================================================
//
// Write one DAC palette entry. Write the starting index to 0x3C8,
// then write three bytes (R, G, B) to 0x3C9.
void vgaDacWriteColor(uint8_t index, uint8_t r, uint8_t g, uint8_t b) {
outportb(VGA_DAC_WRITE_ADDR, index);
outportb(VGA_DAC_DATA, r);
outportb(VGA_DAC_DATA, g);
outportb(VGA_DAC_DATA, b);
}
// ============================================================
// vgaGfxRead
// ============================================================
uint8_t vgaGfxRead(uint8_t index) {
outportb(VGA_GFX_INDEX, index);
return inportb(VGA_GFX_DATA);
}
// ============================================================
// vgaGfxWrite
// ============================================================
void vgaGfxWrite(uint8_t index, uint8_t val) {
outportb(VGA_GFX_INDEX, index);
outportb(VGA_GFX_DATA, val);
}
// ============================================================
// vgaMiscRead
// ============================================================
uint8_t vgaMiscRead(void) {
return inportb(VGA_MISC_OUT_R);
}
// ============================================================
// vgaMiscWrite
// ============================================================
void vgaMiscWrite(uint8_t val) {
outportb(VGA_MISC_OUT_W, val);
}
// ============================================================
// vgaRestoreTextMode
// ============================================================
//
// Restores VGA text mode 3 (80x25, 16 color). Uses INT 10h
// because manually reprogramming all VGA registers for text mode
// is error-prone and varies by chipset. The BIOS handles it
// correctly for all VGA-compatible cards.
void vgaRestoreTextMode(void) {
__dpmi_regs r;
memset(&r, 0, sizeof(r));
r.x.ax = 0x0003;
__dpmi_int(0x10, &r);
}
// ============================================================
// vgaSeqRead
// ============================================================
uint8_t vgaSeqRead(uint8_t index) {
outportb(VGA_SEQ_INDEX, index);
return inportb(VGA_SEQ_DATA);
}
// ============================================================
// vgaSeqWrite
// ============================================================
void vgaSeqWrite(uint8_t index, uint8_t val) {
outportb(VGA_SEQ_INDEX, index);
outportb(VGA_SEQ_DATA, val);
}
// ============================================================
// vgaWaitVRetrace
// ============================================================
//
// Waits for the start of the next vertical retrace by spinning
// on bit 3 of Input Status 1 (port 0x3DA). First waits for bit
// to clear (if we're currently in retrace), then waits for it
// to set (start of next retrace).
void vgaWaitVRetrace(void) {
// Wait for any current retrace to end
while (inportb(VGA_INPUT_STATUS_1) & 0x08) {
// spin
}
// Wait for next retrace to start
while (!(inportb(VGA_INPUT_STATUS_1) & 0x08)) {
// spin
}
}

198
vgaCommon.h Normal file
View file

@ -0,0 +1,198 @@
// vgaCommon.h -- Shared VGA register programming for DOS/DJGPP
//
// Provides low-level access to the standard VGA register sets that
// are common across all VGA-compatible video cards. Every chipset
// driver needs these for basic mode setup before enabling its
// chip-specific acceleration extensions.
//
// The five standard VGA register groups:
// - Miscellaneous Output (0x3C2 write, 0x3CC read)
// - Sequencer (0x3C4/0x3C5)
// - CRTC (0x3D4/0x3D5 for color, 0x3B4/0x3B5 for mono)
// - Graphics Controller (0x3CE/0x3CF)
// - Attribute Controller (0x3C0/0x3C1, toggle via 0x3DA read)
//
// All functions use DJGPP's inportb/outportb for port I/O.
#ifndef VGA_COMMON_H
#define VGA_COMMON_H
#include <stdint.h>
#include <stdbool.h>
// ============================================================
// VGA I/O port addresses
// ============================================================
// Miscellaneous output register
#define VGA_MISC_OUT_W 0x3C2 // write
#define VGA_MISC_OUT_R 0x3CC // read
// Input status registers
#define VGA_INPUT_STATUS_0 0x3C2
#define VGA_INPUT_STATUS_1 0x3DA // color mode
#define VGA_INPUT_STATUS_1M 0x3BA // mono mode
// Sequencer
#define VGA_SEQ_INDEX 0x3C4
#define VGA_SEQ_DATA 0x3C5
// CRTC (color mode addresses -- we always use color)
#define VGA_CRTC_INDEX 0x3D4
#define VGA_CRTC_DATA 0x3D5
// Graphics Controller
#define VGA_GFX_INDEX 0x3CE
#define VGA_GFX_DATA 0x3CF
// Attribute Controller (index and data share 0x3C0)
#define VGA_ATTR_INDEX 0x3C0
#define VGA_ATTR_DATA_W 0x3C0
#define VGA_ATTR_DATA_R 0x3C1
// DAC (palette)
#define VGA_DAC_READ_ADDR 0x3C7
#define VGA_DAC_WRITE_ADDR 0x3C8
#define VGA_DAC_DATA 0x3C9
#define VGA_DAC_STATE 0x3C7
// Feature control
#define VGA_FEATURE_W 0x3DA // write (color mode)
#define VGA_FEATURE_R 0x3CA // read
// ============================================================
// Sequencer register indices
// ============================================================
#define VGA_SEQ_RESET 0x00
#define VGA_SEQ_CLOCK_MODE 0x01
#define VGA_SEQ_PLANE_MASK 0x02
#define VGA_SEQ_CHAR_MAP 0x03
#define VGA_SEQ_MEM_MODE 0x04
// Sequencer clock mode bits
#define VGA_SEQ_SCREEN_OFF 0x20 // bit 5: blank the screen
// ============================================================
// CRTC register indices
// ============================================================
#define VGA_CRTC_H_TOTAL 0x00
#define VGA_CRTC_H_DISP_END 0x01
#define VGA_CRTC_H_BLANK_START 0x02
#define VGA_CRTC_H_BLANK_END 0x03
#define VGA_CRTC_H_SYNC_START 0x04
#define VGA_CRTC_H_SYNC_END 0x05
#define VGA_CRTC_V_TOTAL 0x06
#define VGA_CRTC_OVERFLOW 0x07
#define VGA_CRTC_PRESET_ROW 0x08
#define VGA_CRTC_MAX_SCAN 0x09
#define VGA_CRTC_CURSOR_START 0x0A
#define VGA_CRTC_CURSOR_END 0x0B
#define VGA_CRTC_START_ADDR_HI 0x0C
#define VGA_CRTC_START_ADDR_LO 0x0D
#define VGA_CRTC_CURSOR_HI 0x0E
#define VGA_CRTC_CURSOR_LO 0x0F
#define VGA_CRTC_V_SYNC_START 0x10
#define VGA_CRTC_V_SYNC_END 0x11
#define VGA_CRTC_V_DISP_END 0x12
#define VGA_CRTC_OFFSET 0x13
#define VGA_CRTC_UNDERLINE 0x14
#define VGA_CRTC_V_BLANK_START 0x15
#define VGA_CRTC_V_BLANK_END 0x16
#define VGA_CRTC_MODE_CTRL 0x17
#define VGA_CRTC_LINE_COMPARE 0x18
// ============================================================
// Graphics controller register indices
// ============================================================
#define VGA_GFX_SET_RESET 0x00
#define VGA_GFX_ENABLE_SET_RESET 0x01
#define VGA_GFX_COLOR_COMPARE 0x02
#define VGA_GFX_DATA_ROTATE 0x03
#define VGA_GFX_READ_MAP_SEL 0x04
#define VGA_GFX_MODE 0x05
#define VGA_GFX_MISC 0x06
#define VGA_GFX_COLOR_DONT_CARE 0x07
#define VGA_GFX_BIT_MASK 0x08
// ============================================================
// VESA mode result (returned by vesaFindAndSetMode)
// ============================================================
typedef struct {
int32_t width;
int32_t height;
int32_t bpp;
int32_t pitch;
uint32_t lfbPhysAddr; // physical address of LFB from VBE
} VesaModeResultT;
// ============================================================
// DPMI LFB mapping result (returned by dpmiMapFramebuffer)
// ============================================================
typedef struct {
uint8_t *ptr; // near pointer to mapped region
uint32_t linearAddr; // linear address (for unmapping)
uint32_t size; // mapped size in bytes
} DpmiMappingT;
// ============================================================
// Prototypes
// ============================================================
// Find the best VESA VBE mode matching the requested resolution
// and bpp, set it with LFB enabled, and return the mode details.
// Returns true on success. This replaces ~150 lines of duplicated
// code in every driver.
bool vesaFindAndSetMode(int32_t reqW, int32_t reqH, int32_t reqBpp, VesaModeResultT *result);
// Map a physical address region into the DJGPP near pointer space
// via DPMI. Handles physical address mapping, page locking, and
// near pointer enable. Returns true on success.
bool dpmiMapFramebuffer(uint32_t physAddr, uint32_t size, DpmiMappingT *mapping);
// Unmap a previously mapped framebuffer region and disable near
// pointers. Safe to call with a zeroed mapping struct.
void dpmiUnmapFramebuffer(DpmiMappingT *mapping);
// Size a PCI BAR by writing all 1s and reading back. Returns the
// decoded size in bytes. Saves and restores the original BAR value.
uint32_t pciSizeBar(uint8_t bus, uint8_t dev, uint8_t func, uint8_t barReg);
// Read/write individual VGA register sets
uint8_t vgaAttrRead(uint8_t index);
void vgaAttrReset(void);
void vgaAttrWrite(uint8_t index, uint8_t val);
uint8_t vgaCrtcRead(uint8_t index);
void vgaCrtcWrite(uint8_t index, uint8_t val);
uint8_t vgaGfxRead(uint8_t index);
void vgaGfxWrite(uint8_t index, uint8_t val);
uint8_t vgaMiscRead(void);
void vgaMiscWrite(uint8_t val);
uint8_t vgaSeqRead(uint8_t index);
void vgaSeqWrite(uint8_t index, uint8_t val);
// CRTC register protection: some CRTC registers are write-protected
// by bit 7 of the V_SYNC_END register. These functions unlock/lock.
void vgaCrtcLock(void);
void vgaCrtcUnlock(void);
// Palette (DAC) operations
void vgaDacReadColor(uint8_t index, uint8_t *r, uint8_t *g, uint8_t *b);
void vgaDacWriteColor(uint8_t index, uint8_t r, uint8_t g, uint8_t b);
// Restore VGA text mode (mode 3). Uses INT 10h for reliability
// across all chipsets.
void vgaRestoreTextMode(void);
// Wait for vertical retrace. Spins on Input Status 1 bit 3.
// Useful for timing-sensitive register writes and tear-free updates.
void vgaWaitVRetrace(void);
// Enable/disable VGA display output by toggling sequencer clocking
// mode bit 5. Used during mode transitions to prevent screen garbage.
void vgaBlankScreen(bool blank);
#endif // VGA_COMMON_H