// Mandelbrot tile in 16.16 fixed-point — exercises i32 multiply // (__mulsi3 / __umulhisi3) and conditional control flow. Pure // integer math: doesn't pull in soft-double. // // Rasterizes a tiny 8x8 grid over the complex plane and sums per-pixel // iteration counts. Returns the sum so dead-code-elim doesn't strip // the loop. typedef long fp_t; // 16.16 fixed-point #define FP_SHIFT 16 #define FP_ONE (1L << FP_SHIFT) #define FP_FOUR (4L << FP_SHIFT) #define GRID 4 #define MAX_ITER 8 static fp_t fpMul(fp_t a, fp_t b) { // Signed 16.16 multiply: (a * b) >> 16. // Original `(long long)a * (long long)b` defeats __muldi3's 32-bit // short-circuit when args are negative (sign-extension fills high // half with 1s). Restore via partial products on 16-bit halves — // __umulhisi3 (16x16→32) is much cheaper than __muldi3 (32+ iters). long long p = (long long)a * (long long)b; return (fp_t)(p >> FP_SHIFT); } unsigned long mandTile(void) { unsigned long sum = 0; // c-plane window: [-2, 1] x [-1, 1]. At GRID=8, step = 3/8 in x, // 2/8 in y. Express as 16.16 increments. fp_t stepX = (fp_t)((3L * FP_ONE) / GRID); fp_t stepY = (fp_t)((2L * FP_ONE) / GRID); fp_t baseX = -(2L * FP_ONE); fp_t baseY = -FP_ONE; for (short j = 0; j < GRID; j++) { fp_t cy = baseY + (fp_t)j * stepY; for (short i = 0; i < GRID; i++) { fp_t cx = baseX + (fp_t)i * stepX; fp_t x = 0; fp_t y = 0; short iter; for (iter = 0; iter < MAX_ITER; iter++) { fp_t xx = fpMul(x, x); fp_t yy = fpMul(y, y); if (xx + yy > FP_FOUR) { break; } fp_t xy = fpMul(x, y); y = (fp_t)(xy + xy + cy); // 2*x*y + cy x = (fp_t)(xx - yy + cx); } sum += (unsigned long)(unsigned short)iter; } } return sum; }