58 lines
2 KiB
C
58 lines
2 KiB
C
// Mandelbrot tile in 16.16 fixed-point — exercises i32 multiply
|
|
// (__mulsi3 / __umulhisi3) and conditional control flow. Pure
|
|
// integer math: doesn't pull in soft-double.
|
|
//
|
|
// Rasterizes a tiny 8x8 grid over the complex plane and sums per-pixel
|
|
// iteration counts. Returns the sum so dead-code-elim doesn't strip
|
|
// the loop.
|
|
|
|
typedef long fp_t; // 16.16 fixed-point
|
|
#define FP_SHIFT 16
|
|
#define FP_ONE (1L << FP_SHIFT)
|
|
#define FP_FOUR (4L << FP_SHIFT)
|
|
|
|
#define GRID 4
|
|
#define MAX_ITER 8
|
|
|
|
|
|
static fp_t fpMul(fp_t a, fp_t b) {
|
|
// Signed 16.16 multiply: (a * b) >> 16.
|
|
// Original `(long long)a * (long long)b` defeats __muldi3's 32-bit
|
|
// short-circuit when args are negative (sign-extension fills high
|
|
// half with 1s). Restore via partial products on 16-bit halves —
|
|
// __umulhisi3 (16x16→32) is much cheaper than __muldi3 (32+ iters).
|
|
long long p = (long long)a * (long long)b;
|
|
return (fp_t)(p >> FP_SHIFT);
|
|
}
|
|
|
|
|
|
unsigned long mandTile(void) {
|
|
unsigned long sum = 0;
|
|
// c-plane window: [-2, 1] x [-1, 1]. At GRID=8, step = 3/8 in x,
|
|
// 2/8 in y. Express as 16.16 increments.
|
|
fp_t stepX = (fp_t)((3L * FP_ONE) / GRID);
|
|
fp_t stepY = (fp_t)((2L * FP_ONE) / GRID);
|
|
fp_t baseX = -(2L * FP_ONE);
|
|
fp_t baseY = -FP_ONE;
|
|
for (short j = 0; j < GRID; j++) {
|
|
fp_t cy = baseY + (fp_t)j * stepY;
|
|
for (short i = 0; i < GRID; i++) {
|
|
fp_t cx = baseX + (fp_t)i * stepX;
|
|
fp_t x = 0;
|
|
fp_t y = 0;
|
|
short iter;
|
|
for (iter = 0; iter < MAX_ITER; iter++) {
|
|
fp_t xx = fpMul(x, x);
|
|
fp_t yy = fpMul(y, y);
|
|
if (xx + yy > FP_FOUR) {
|
|
break;
|
|
}
|
|
fp_t xy = fpMul(x, y);
|
|
y = (fp_t)(xy + xy + cy); // 2*x*y + cy
|
|
x = (fp_t)(xx - yy + cx);
|
|
}
|
|
sum += (unsigned long)(unsigned short)iter;
|
|
}
|
|
}
|
|
return sum;
|
|
}
|