diff --git a/examples/a23d2/src/a23d2.c b/examples/a23d2/src/a23d2.c index 859de75..4c64cb0 100644 --- a/examples/a23d2/src/a23d2.c +++ b/examples/a23d2/src/a23d2.c @@ -26,13 +26,13 @@ #include // For memcopy -cameraT *_camera = (cameraT *)0x200; // Simulation copy of camera. -cameraT *_cameraInDatabase; +volatile cameraT *_camera = (cameraT *)CAMERA_SHARED_START; // Simulation copy of camera. +volatile cameraT *_cameraInDatabase; +volatile byte *_pointer; uint16_t _drawlist; uint16_t _drawlistInDatabase; -byte *_pointer; uint16_t _bytes; uint8_t _x1; uint8_t _y1; @@ -41,6 +41,7 @@ uint8_t _y2; bool _useColor; byte _mmu; byte _ram; +//float _trig; #define SEGMENT_A23D2 @@ -49,6 +50,30 @@ byte _ram; // There's a lot of global use in here. We can't use the virtual stack. +#if 0 +void a23d2Cos(void) { + // We need to manually page 0x54000 into 0x6000. + // This isn't actually large enough for A2-3D2 but what we should only + // lose Apple ][ stuff we're not using anyway. + POKE(MMU_MEM_BANK_3, A23D2_FAR_BLOCK); + + // Map 0-359 into 0-255. + _tdata = _tdata % 359; + _y1 = (360/256) * _tdata; + + // Call COSEX. + POKE(A23D2_TDATA, _y1); + __attribute__((leaf)) asm volatile("jsr %[addy]":: [addy] "i"(A23D2_COSEX)); + + // Convert to float. + _trig = (float)PEEKW(A23D2_TDATA) / 32768; + + // Restore memory map. + POKE(MMU_MEM_BANK_3, 3); +} +#endif + + void a23d2Draw(void) { _pointer = (byte *)_drawlist; @@ -106,23 +131,34 @@ void a23d2Init(void) { // lose Apple ][ stuff we're not using anyway. POKE(MMU_MEM_BANK_3, A23D2_FAR_BLOCK); - // We're going to clobber from 0x80fb to 0x8101. Back it up. - memcpy((byte *)0x2f7, (byte *)0x80fb, 0x06); + // We're going to clobber from 0x80fb to 0x8101 with our setup database. + // Usually we'd swap in bank 4, but we'd have to save that, too. Back it up. + scdMemCpy((byte *)(SCRATCH_END - 0x06), (byte *)A23D2_TEST_DATABASE, 0x06); // Initialize A2-3D2 so we can use the "fast entry point" (NXTPT) when rendering. _bytes = 0; - _pointer = (byte *)0x80fb; // Standard location for test database in A2-3D2. - _pointer[_bytes++] = SCRSZ; // Screen size. 256x240. Center is 0,0. + _pointer = (byte *)A23D2_TEST_DATABASE; // Standard location for test database in A2-3D2. + _pointer[_bytes++] = SCRSZ; // Screen size. 256x240. Center is 0,0. _pointer[_bytes++] = 255; _pointer[_bytes++] = 239; _pointer[_bytes++] = 0; _pointer[_bytes++] = 0; - _pointer[_bytes++] = END; // Setup complete! + _pointer[_bytes++] = END; // Setup complete! - asm("jsr 0x606c"); // Call ENTRYS. This preserves the ZP for us. + // Save ZP that A2-3D2 is going to clobber. + scdMemCpy((byte *)COMPILER_ZP_SAVE, (byte *)COMPILER_ZP_START, COMPILER_ZP_LENGTH); + + // Call ENTRYN. This sets up A2-3D2. + __attribute__((leaf)) asm volatile("jsr %[addy]":: [addy] "i"(A23D2_ENTRYN)); + + // Save A2-3D2s ZP for later. + scdMemCpy((byte *)A23D2_ZP_SAVE, (byte *)A23D2_ZP_START, A23D2_ZP_LENGTH); + + // Restore ZP that A2-3D2 clobbered. + scdMemCpy((byte *)COMPILER_ZP_START, (byte *)COMPILER_ZP_SAVE, COMPILER_ZP_LENGTH); // Put back the RAM we clobbered. - memcpy((byte *)0x80fb, (byte *)0x2f7, 0x06); + scdMemCpy((byte *)A23D2_TEST_DATABASE, (byte *)(SCRATCH_END - 0x06), 0x06); // Move our 3D/2D data buffer at 0x56000 into slot 4. POKE(MMU_MEM_BANK_4, DATABASE_FAR_BLOCK); @@ -167,12 +203,49 @@ void a23d2Render(void) { _cameraInDatabase->b = _camera->b; _cameraInDatabase->h = _camera->h; - memcpy((byte *)0x29d, (byte *)0x60, 0x62); // Save the ZP we're going to clobber. - POKEW(0x9b, DATABASE); // Set IBP. - asm("jsr 0x6118":::"a","x","y","c","v"); // Call NXTPT. - memcpy((byte *)0x60, (byte *)0x29d, 0x62); // Put the ZP back. + // Save ZP that A2-3D2 is going to clobber. + scdMemCpy((byte *)COMPILER_ZP_SAVE, (byte *)COMPILER_ZP_START, COMPILER_ZP_LENGTH); + + // Restore A2-3D2s ZP. + scdMemCpy((byte *)A23D2_ZP_START, (byte *)A23D2_ZP_SAVE, A23D2_ZP_LENGTH); + + // Set IBP. + POKEW(A23D2_IBP, DATABASE); + + // Call NXTPT. + __attribute__((leaf)) asm volatile("jsr %[addy]":: [addy] "i"(A23D2_NXTPT) : "a","x","y","c","v"); // Call NXTPT. + + // Save A2-3D2s ZP for later. + scdMemCpy((byte *)A23D2_ZP_SAVE, (byte *)A23D2_ZP_START, A23D2_ZP_LENGTH); + + // Restore ZP that A2-3D2 clobbered. + scdMemCpy((byte *)COMPILER_ZP_START, (byte *)COMPILER_ZP_SAVE, COMPILER_ZP_LENGTH); // Restore memory map. POKE(MMU_MEM_BANK_4, 4); POKE(MMU_MEM_BANK_3, 3); } + + +#if 0 +void a23d2Sin(void) { + // We need to manually page 0x54000 into 0x6000. + // This isn't actually large enough for A2-3D2 but what we should only + // lose Apple ][ stuff we're not using anyway. + POKE(MMU_MEM_BANK_3, A23D2_FAR_BLOCK); + + // Map 0-359 into 0-255. + _tdata = _tdata % 359; + _y1 = (360/256) * _tdata; + + // Call SINEX. + POKE(A23D2_TDATA, _y1); + __attribute__((leaf)) asm volatile("jsr %[addy]":: [addy] "i"(A23D2_SINEX)); + + // Convert to float. + _trig = (float)PEEKW(A23D2_TDATA) / 32768; + + // Restore memory map. + POKE(MMU_MEM_BANK_3, 3); +} +#endif diff --git a/examples/a23d2/src/a23d2.h b/examples/a23d2/src/a23d2.h index c66ae2a..bb59b86 100644 --- a/examples/a23d2/src/a23d2.h +++ b/examples/a23d2/src/a23d2.h @@ -30,6 +30,16 @@ typedef uint8_t byte; #endif +//#define scdMemCpy memcpy +#define scdMemCpy(d,s,l) \ + ({ \ + volatile byte *dp = (d); \ + volatile byte *sp = (s); \ + uint16_t i = 0; \ + for (i=0; i<(l); i++) *dp++ = *sp++; \ + }) + + #define A23D2_FAR_BLOCK 42 #define DATABASE_FAR_BLOCK 43 @@ -42,6 +52,31 @@ typedef uint8_t byte; #define DRAWLIST_P1 0x9800 +// A2-3D2 Function Addresses. +#define A23D2_ENTRYN 0x6090 +#define A23D2_NXTPT 0x6118 +#define A23D2_SINEX 0x61f6 +#define A23D2_COSEX 0x620f + +// A2-3D2 Data Addresses. +#define A23D2_TEST_DATABASE 0x80fb +#define A23D2_TDATA 0x613e +#define A23D2_IBP 0x9b + +// Stuff A2-3D2 is going to clobber that our compiler may want. +#define COMPILER_ZP_START 0x60 // To 0xC2 +#define COMPILER_ZP_LENGTH 0x62 +#define A23D2_ZP_START COMPILER_ZP_START +#define A23D2_ZP_LENGTH COMPILER_ZP_LENGTH + +// Our Scratch Addresses. +#define CAMERA_SHARED_START 0x200 // To 0x209 +#define CAMERA_SHARED_LENGTH 0x9 +#define SCRATCH_END 0x2ff +#define COMPILER_ZP_SAVE (CAMERA_SHARED_START + CAMERA_SHARED_LENGTH) +#define A23D2_ZP_SAVE (COMPILER_ZP_SAVE + COMPILER_ZP_LENGTH) + + // A2-3D1 Commands. Commented out items are Apple ][ only. #define PNT 0x00 // xLSB, xMSB, yLSB, yMSB, zLSB, zMSB - Define 3D Point #define SPNT 0x01 // xLSB, xMSB, yLSB, yMSB, zLSB, zMSB - Define 3D Start Point @@ -78,7 +113,7 @@ typedef uint8_t byte; #define END 0x79 // End of Database -typedef struct cameraS { +typedef struct cameraS { // 9 bytes. int16_t x; int16_t y; int16_t z; @@ -88,24 +123,27 @@ typedef struct cameraS { } cameraT; -extern cameraT *_camera; -extern cameraT *_cameraInDatabase; +extern volatile cameraT *_camera; +extern volatile cameraT *_cameraInDatabase; +extern volatile byte *_pointer; extern uint16_t _drawlist; extern uint16_t _drawlistInDatabase; -extern byte *_pointer; extern uint16_t _bytes; extern uint8_t _x1; +#define _tdata _x1 // Alias. extern uint8_t _y1; extern uint8_t _x2; extern uint8_t _y2; extern bool _useColor; extern byte _mmu; extern byte _ram; +extern float _trig; -// NOTE: -// There are no function prototypes in this header. -// This library lives in it's own overlay segment and the overlay tool -// will generate prototypes as well as trampoline macros for us. +void a23d2Cos(void); +void a23d2Draw(void); +void a23d2Init(void); +void a23d2Render(void); +void a23d2Sin(void); diff --git a/examples/a23d2/src/main.c b/examples/a23d2/src/main.c index 28aed62..dae0df9 100644 --- a/examples/a23d2/src/main.c +++ b/examples/a23d2/src/main.c @@ -113,6 +113,11 @@ * code calls into slots 2 to 4 has a program counter between 0xa000 and * 0xbfff - well out of the way of the 3D stuff. * + * When enabling compiler optimizations, both the compiler and A2-3D2 want + * use of the remaining zero page. We maintain copies of the area that + * gets clobbered by each and make sure the right one is in place at the + * right time. + * * The last thing to worry about is data needed by both the simulator and * A2-3D2. This is basically only the camera position and rotation. These * 10 bytes will be stored directly into RAM from 0x200 to 0x20a overwriting diff --git a/f256lib/f256.h b/f256lib/f256.h index fa23486..8ed628d 100644 --- a/f256lib/f256.h +++ b/f256lib/f256.h @@ -218,7 +218,7 @@ typedef struct colorS { // Address (three bytes) //#define PEEKA -#define POKEA(addy, value) POKE(addy, value & 0xFF); POKE(addy + 1, (value >> 8) & 0xFF); POKE(addy + 2, (value >> 16) & 0xFF) +#define POKEA(addy, value) POKE(addy, value & 0xFF); POKE(addy + 1, (value >> 8) & 0xFF); POKE(addy + 2, (value >> 16) & 0xFF) // Double-word (four bytes) #define PEEKD(addy) ((uint32_t)*(volatile uint32_t *)(addy)) diff --git a/f256lib/f_dma.c b/f256lib/f_dma.c index 954146f..e468d61 100644 --- a/f256lib/f_dma.c +++ b/f256lib/f_dma.c @@ -29,7 +29,7 @@ #endif -static void dmaWait(void); +//static void dmaWait(void); void dmaFill(uint32_t start, uint32_t length, byte value) { @@ -69,9 +69,10 @@ void dma2dFill(uint32_t start, uint16_t width, uint16_t height, uint16_t stride, } +/* static void dmaWait(void) { - //***FIX*** This whole block seems unneeded if DMA halts the CPU while it transfers. + // ***FIX*** This whole block seems unneeded if DMA halts the CPU while it transfers. // First, wait for DMA to be complete. while (PEEK(DMA_STATUS) & DMA_STAT_BUSY) @@ -83,6 +84,7 @@ static void dmaWait(void) { // Then wait for a VBL because two DMAs per frame will crash. //graphicsWaitVerticalBlank(); } +*/ #endif diff --git a/tools/overlay/overlay.c b/tools/overlay/overlay.c index 0b39663..fbdecae 100644 --- a/tools/overlay/overlay.c +++ b/tools/overlay/overlay.c @@ -231,8 +231,8 @@ void parseCFile(char *filename, char *targetFile, FILE *trampoline, char *trampo if (strcmp(arguments, "void") == 0) arguments[0] = 0; // Create trampoline function. - fprintf(out, "__attribute__((optnone))\n"); - //fprintf(out, "__attribute__((noinline))\n"); + fprintf(out, "#pragma clang optimize off\n"); + fprintf(out, "__attribute__((noinline))\n"); fprintf(out, "%s%c%s {\n", buffer, isPointer ? '*' : ' ', start); fprintf(out, "\tvolatile unsigned char ___mmu = (unsigned char)*(volatile unsigned char *)%#06x;\n", swapSlot); if (strcmp(buffer, "void") != 0) fprintf(out, "\t%s%cr;\n", buffer, isPointer ? '*' : ' '); @@ -242,7 +242,8 @@ void parseCFile(char *filename, char *targetFile, FILE *trampoline, char *trampo fprintf(out, "FAR%d_%s(%s);\n", _currentBank, functionName, arguments); fprintf(out, "\t*(volatile unsigned char *)%#06x = ___mmu;\n", swapSlot); if (strcmp(buffer, "void") != 0) fprintf(out, "\treturn r;"); - fprintf(out, "}\n\n"); + fprintf(out, "}\n"); + fprintf(out, "#pragma clang optimize on\n\n"); // Write out new function definition. fprintf(out, "__attribute__((noinline, section(\".block%d\")))\n", _currentBank);