65816-llvm-mos/benchmarks/mandelbrot.c
2026-05-25 21:00:32 -05:00

58 lines
2 KiB
C

// Mandelbrot tile in 16.16 fixed-point — exercises i32 multiply
// (__mulsi3 / __umulhisi3) and conditional control flow. Pure
// integer math: doesn't pull in soft-double.
//
// Rasterizes a tiny 8x8 grid over the complex plane and sums per-pixel
// iteration counts. Returns the sum so dead-code-elim doesn't strip
// the loop.
typedef long fp_t; // 16.16 fixed-point
#define FP_SHIFT 16
#define FP_ONE (1L << FP_SHIFT)
#define FP_FOUR (4L << FP_SHIFT)
#define GRID 4
#define MAX_ITER 8
static fp_t fpMul(fp_t a, fp_t b) {
// Signed 16.16 multiply: (a * b) >> 16.
// Original `(long long)a * (long long)b` defeats __muldi3's 32-bit
// short-circuit when args are negative (sign-extension fills high
// half with 1s). Restore via partial products on 16-bit halves —
// __umulhisi3 (16x16→32) is much cheaper than __muldi3 (32+ iters).
long long p = (long long)a * (long long)b;
return (fp_t)(p >> FP_SHIFT);
}
unsigned long mandTile(void) {
unsigned long sum = 0;
// c-plane window: [-2, 1] x [-1, 1]. At GRID=8, step = 3/8 in x,
// 2/8 in y. Express as 16.16 increments.
fp_t stepX = (fp_t)((3L * FP_ONE) / GRID);
fp_t stepY = (fp_t)((2L * FP_ONE) / GRID);
fp_t baseX = -(2L * FP_ONE);
fp_t baseY = -FP_ONE;
for (short j = 0; j < GRID; j++) {
fp_t cy = baseY + (fp_t)j * stepY;
for (short i = 0; i < GRID; i++) {
fp_t cx = baseX + (fp_t)i * stepX;
fp_t x = 0;
fp_t y = 0;
short iter;
for (iter = 0; iter < MAX_ITER; iter++) {
fp_t xx = fpMul(x, x);
fp_t yy = fpMul(y, y);
if (xx + yy > FP_FOUR) {
break;
}
fp_t xy = fpMul(x, y);
y = (fp_t)(xy + xy + cy); // 2*x*y + cy
x = (fp_t)(xx - yy + cx);
}
sum += (unsigned long)(unsigned short)iter;
}
}
return sum;
}