// Security library: DH key exchange + XTEA-CTR cipher for DJGPP // // Diffie-Hellman uses the RFC 2409 Group 2 (1024-bit) safe prime with // Montgomery multiplication for modular exponentiation. Private exponents // are 256 bits for fast computation on 486-class hardware. // // XTEA in CTR mode provides symmetric encryption. No lookup tables, // no key schedule — just shifts, adds, and XORs. #include #include #include #include #include #include #include #include "security.h" // ======================================================================== // Internal defines // ======================================================================== #define BN_BITS 1024 #define BN_WORDS (BN_BITS / 32) #define BN_BYTES (BN_BITS / 8) #define DH_PRIVATE_BITS 256 #define DH_PRIVATE_BYTES (DH_PRIVATE_BITS / 8) #define XTEA_ROUNDS 32 #define XTEA_DELTA 0x9E3779B9 // ======================================================================== // Types // ======================================================================== typedef struct { uint32_t w[BN_WORDS]; } BigNumT; struct SecDhS { BigNumT privateKey; BigNumT publicKey; BigNumT sharedSecret; bool hasKeys; bool hasSecret; }; struct SecCipherS { uint32_t key[4]; uint32_t nonce[2]; uint32_t counter[2]; }; typedef struct { uint32_t key[4]; uint32_t counter[2]; bool seeded; } RngStateT; // ======================================================================== // Static globals // ======================================================================== // RFC 2409 Group 2 (1024-bit MODP) prime, little-endian word order static const BigNumT sDhPrime = { .w = { 0x39E38FAF, 0xCDB1CEDC, 0x51FF5DB8, 0x85E28A20, 0x1E9C284F, 0x2BB72AE0, 0x60F89D81, 0x4E664FD5, 0x45E6F3A1, 0x92F2129E, 0xB8E51B21, 0x35C7D431, 0x14A0C959, 0x137E2179, 0x5BE0CD19, 0x7A51F1D7, 0xF25F1468, 0x302B0A6D, 0xCD3A431B, 0xEF9519B3, 0x8E3404DD, 0x514A0879, 0x3B139B22, 0x020BBEA6, 0x8A67CC74, 0x29024E08, 0x80DC1CD1, 0xC4C6628B, 0x2168C234, 0xC90FDAA2, 0xFFFFFFFF, 0xFFFFFFFF }}; // Generator g = 2 static const BigNumT sDhGenerator = { .w = { 2 } }; // Montgomery constants (computed lazily) static BigNumT sDhR2; // R^2 mod p static uint32_t sDhM0Inv; // -p[0]^(-1) mod 2^32 static bool sDhInited = false; // RNG state static RngStateT sRng = { .seeded = false }; // ======================================================================== // Static prototypes (alphabetical) // ======================================================================== static int bnAdd(BigNumT *result, const BigNumT *a, const BigNumT *b); static int bnBit(const BigNumT *a, int n); static int bnBitLength(const BigNumT *a); static void bnClear(BigNumT *a); static int bnCmp(const BigNumT *a, const BigNumT *b); static void bnCopy(BigNumT *dst, const BigNumT *src); static void bnFromBytes(BigNumT *a, const uint8_t *buf); static void bnModExp(BigNumT *result, const BigNumT *base, const BigNumT *exp, const BigNumT *mod, uint32_t m0inv, const BigNumT *r2); static void bnMontMul(BigNumT *result, const BigNumT *a, const BigNumT *b, const BigNumT *mod, uint32_t m0inv); static void bnSet(BigNumT *a, uint32_t val); static int bnShiftLeft1(BigNumT *a); static int bnSub(BigNumT *result, const BigNumT *a, const BigNumT *b); static void bnToBytes(uint8_t *buf, const BigNumT *a); static uint32_t computeM0Inv(uint32_t m0); static void computeR2(BigNumT *r2, const BigNumT *m); static void dhInit(void); static void secureZero(void *ptr, int len); static void xteaEncryptBlock(uint32_t v[2], const uint32_t key[4]); // ======================================================================== // BigNum functions (alphabetical) // ======================================================================== static int __attribute__((unused)) bnAdd(BigNumT *result, const BigNumT *a, const BigNumT *b) { uint64_t carry = 0; for (int i = 0; i < BN_WORDS; i++) { uint64_t sum = (uint64_t)a->w[i] + b->w[i] + carry; result->w[i] = (uint32_t)sum; carry = sum >> 32; } return (int)carry; } static int bnBit(const BigNumT *a, int n) { return (a->w[n / 32] >> (n % 32)) & 1; } static int bnBitLength(const BigNumT *a) { for (int i = BN_WORDS - 1; i >= 0; i--) { if (a->w[i]) { uint32_t v = a->w[i]; int bits = i * 32; while (v) { bits++; v >>= 1; } return bits; } } return 0; } static void bnClear(BigNumT *a) { memset(a->w, 0, sizeof(a->w)); } static int bnCmp(const BigNumT *a, const BigNumT *b) { for (int i = BN_WORDS - 1; i >= 0; i--) { if (a->w[i] > b->w[i]) { return 1; } if (a->w[i] < b->w[i]) { return -1; } } return 0; } static void bnCopy(BigNumT *dst, const BigNumT *src) { memcpy(dst->w, src->w, sizeof(dst->w)); } static void bnFromBytes(BigNumT *a, const uint8_t *buf) { for (int i = 0; i < BN_WORDS; i++) { int j = (BN_WORDS - 1 - i) * 4; a->w[i] = ((uint32_t)buf[j] << 24) | ((uint32_t)buf[j + 1] << 16) | ((uint32_t)buf[j + 2] << 8) | (uint32_t)buf[j + 3]; } } static void bnModExp(BigNumT *result, const BigNumT *base, const BigNumT *exp, const BigNumT *mod, uint32_t m0inv, const BigNumT *r2) { BigNumT montBase; BigNumT montResult; BigNumT one; int bits; bool started; // Convert base to Montgomery form: montBase = base * R mod m bnMontMul(&montBase, base, r2, mod, m0inv); // Initialize montResult to 1 in Montgomery form (= R mod m) bnClear(&one); one.w[0] = 1; bnMontMul(&montResult, &one, r2, mod, m0inv); // Left-to-right binary square-and-multiply bits = bnBitLength(exp); started = false; for (int i = bits - 1; i >= 0; i--) { if (started) { bnMontMul(&montResult, &montResult, &montResult, mod, m0inv); } if (bnBit(exp, i)) { if (!started) { bnCopy(&montResult, &montBase); started = true; } else { bnMontMul(&montResult, &montResult, &montBase, mod, m0inv); } } } // Convert back from Montgomery form: result = montResult * 1 * R^(-1) mod m bnClear(&one); one.w[0] = 1; bnMontMul(result, &montResult, &one, mod, m0inv); } static void bnMontMul(BigNumT *result, const BigNumT *a, const BigNumT *b, const BigNumT *mod, uint32_t m0inv) { uint32_t t[BN_WORDS + 1]; uint32_t u; uint64_t carry; uint64_t prod; uint64_t sum; memset(t, 0, sizeof(t)); for (int i = 0; i < BN_WORDS; i++) { // Step 1: t += a[i] * b carry = 0; for (int j = 0; j < BN_WORDS; j++) { prod = (uint64_t)a->w[i] * b->w[j] + t[j] + carry; t[j] = (uint32_t)prod; carry = prod >> 32; } t[BN_WORDS] += (uint32_t)carry; // Step 2: Montgomery reduction factor u = t[0] * m0inv; // Step 3: t = (t + u * mod) >> 32 // First word: result is zero by construction, take carry only prod = (uint64_t)u * mod->w[0] + t[0]; carry = prod >> 32; // Remaining words: shift result left by one position for (int j = 1; j < BN_WORDS; j++) { prod = (uint64_t)u * mod->w[j] + t[j] + carry; t[j - 1] = (uint32_t)prod; carry = prod >> 32; } sum = (uint64_t)t[BN_WORDS] + carry; t[BN_WORDS - 1] = (uint32_t)sum; t[BN_WORDS] = (uint32_t)(sum >> 32); } // Copy result memcpy(result->w, t, BN_WORDS * sizeof(uint32_t)); // Conditional subtract if result >= mod if (t[BN_WORDS] || bnCmp(result, mod) >= 0) { bnSub(result, result, mod); } } static void bnSet(BigNumT *a, uint32_t val) { bnClear(a); a->w[0] = val; } static int bnShiftLeft1(BigNumT *a) { uint32_t carry = 0; for (int i = 0; i < BN_WORDS; i++) { uint32_t newCarry = a->w[i] >> 31; a->w[i] = (a->w[i] << 1) | carry; carry = newCarry; } return carry; } static int bnSub(BigNumT *result, const BigNumT *a, const BigNumT *b) { uint64_t borrow = 0; for (int i = 0; i < BN_WORDS; i++) { uint64_t diff = (uint64_t)a->w[i] - b->w[i] - borrow; result->w[i] = (uint32_t)diff; borrow = (diff >> 63) & 1; } return (int)borrow; } static void bnToBytes(uint8_t *buf, const BigNumT *a) { for (int i = 0; i < BN_WORDS; i++) { int j = (BN_WORDS - 1 - i) * 4; uint32_t w = a->w[i]; buf[j] = (uint8_t)(w >> 24); buf[j + 1] = (uint8_t)(w >> 16); buf[j + 2] = (uint8_t)(w >> 8); buf[j + 3] = (uint8_t)(w); } } // ======================================================================== // Helper functions (alphabetical) // ======================================================================== static uint32_t computeM0Inv(uint32_t m0) { // Newton's method: compute m0^(-1) mod 2^32 // Converges quadratically: 1 → 2 → 4 → 8 → 16 → 32 correct bits uint32_t x = 1; for (int i = 0; i < 5; i++) { x = x * (2 - m0 * x); } // Return -m0^(-1) mod 2^32 return ~x + 1; } static void computeR2(BigNumT *r2, const BigNumT *m) { // Compute R^2 mod m where R = 2^1024 // Method: start with 1, double 2048 times, reduce mod m each step bnSet(r2, 1); for (int i = 0; i < 2 * BN_BITS; i++) { bnShiftLeft1(r2); if (bnCmp(r2, m) >= 0) { bnSub(r2, r2, m); } } } static void dhInit(void) { if (sDhInited) { return; } sDhM0Inv = computeM0Inv(sDhPrime.w[0]); computeR2(&sDhR2, &sDhPrime); sDhInited = true; } static void secureZero(void *ptr, int len) { // Volatile prevents the compiler from optimizing away the zeroing volatile uint8_t *p = (volatile uint8_t *)ptr; for (int i = 0; i < len; i++) { p[i] = 0; } } static void xteaEncryptBlock(uint32_t v[2], const uint32_t key[4]) { uint32_t v0 = v[0]; uint32_t v1 = v[1]; uint32_t sum = 0; for (int i = 0; i < XTEA_ROUNDS; i++) { v0 += (((v1 << 4) ^ (v1 >> 5)) + v1) ^ (sum + key[sum & 3]); sum += XTEA_DELTA; v1 += (((v0 << 4) ^ (v0 >> 5)) + v0) ^ (sum + key[(sum >> 11) & 3]); } v[0] = v0; v[1] = v1; } // ======================================================================== // RNG functions (alphabetical) // ======================================================================== void secRngAddEntropy(const uint8_t *data, int len) { // XOR additional entropy into the key for (int i = 0; i < len; i++) { ((uint8_t *)sRng.key)[i % 16] ^= data[i]; } // Re-mix: encrypt the key with itself uint32_t block[2]; block[0] = sRng.key[0] ^ sRng.key[2]; block[1] = sRng.key[1] ^ sRng.key[3]; xteaEncryptBlock(block, sRng.key); sRng.key[0] ^= block[0]; sRng.key[1] ^= block[1]; block[0] = sRng.key[2] ^ sRng.key[0]; block[1] = sRng.key[3] ^ sRng.key[1]; xteaEncryptBlock(block, sRng.key); sRng.key[2] ^= block[0]; sRng.key[3] ^= block[1]; } void secRngBytes(uint8_t *buf, int len) { // Auto-seed from hardware if never seeded if (!sRng.seeded) { uint8_t entropy[16]; int got = secRngGatherEntropy(entropy, sizeof(entropy)); secRngSeed(entropy, got); } uint32_t block[2]; int pos = 0; while (pos < len) { block[0] = sRng.counter[0]; block[1] = sRng.counter[1]; xteaEncryptBlock(block, sRng.key); int take = len - pos; if (take > 8) { take = 8; } memcpy(buf + pos, block, take); pos += take; // Increment counter if (++sRng.counter[0] == 0) { sRng.counter[1]++; } } } int secRngGatherEntropy(uint8_t *buf, int len) { int out = 0; // Read PIT channel 0 counter (1.193 MHz, ~10 bits of entropy in LSBs) outportb(0x43, 0x00); uint8_t pitLo = inportb(0x40); uint8_t pitHi = inportb(0x40); // BIOS tick count (18.2 Hz) uint32_t ticks = _farpeekl(_dos_ds, 0x46C); if (out < len) { buf[out++] = pitLo; } if (out < len) { buf[out++] = pitHi; } if (out < len) { buf[out++] = (uint8_t)(ticks); } if (out < len) { buf[out++] = (uint8_t)(ticks >> 8); } if (out < len) { buf[out++] = (uint8_t)(ticks >> 16); } if (out < len) { buf[out++] = (uint8_t)(ticks >> 24); } // Second PIT reading for jitter outportb(0x43, 0x00); pitLo = inportb(0x40); pitHi = inportb(0x40); if (out < len) { buf[out++] = pitLo; } if (out < len) { buf[out++] = pitHi; } return out; } void secRngSeed(const uint8_t *entropy, int len) { memset(&sRng, 0, sizeof(sRng)); // XOR-fold entropy into the key for (int i = 0; i < len; i++) { ((uint8_t *)sRng.key)[i % 16] ^= entropy[i]; } // Derive counter from key bits sRng.counter[0] = sRng.key[2] ^ sRng.key[0]; sRng.counter[1] = sRng.key[3] ^ sRng.key[1]; sRng.seeded = true; // Mix state by generating and discarding 64 bytes uint8_t discard[64]; sRng.seeded = true; // prevent recursion in secRngBytes secRngBytes(discard, sizeof(discard)); secureZero(discard, sizeof(discard)); } // ======================================================================== // DH functions (alphabetical) // ======================================================================== int secDhComputeSecret(SecDhT *dh, const uint8_t *remotePub, int len) { BigNumT remote; BigNumT two; if (!dh || !remotePub) { return SEC_ERR_PARAM; } if (len != SEC_DH_KEY_SIZE) { return SEC_ERR_PARAM; } if (!dh->hasKeys) { return SEC_ERR_NOT_READY; } dhInit(); bnFromBytes(&remote, remotePub); // Validate remote public key: must be in range [2, p-2] bnSet(&two, 2); if (bnCmp(&remote, &two) < 0 || bnCmp(&remote, &sDhPrime) >= 0) { secureZero(&remote, sizeof(remote)); return SEC_ERR_PARAM; } // shared = remote^private mod p bnModExp(&dh->sharedSecret, &remote, &dh->privateKey, &sDhPrime, sDhM0Inv, &sDhR2); dh->hasSecret = true; secureZero(&remote, sizeof(remote)); return SEC_SUCCESS; } SecDhT *secDhCreate(void) { SecDhT *dh = (SecDhT *)calloc(1, sizeof(SecDhT)); return dh; } int secDhDeriveKey(SecDhT *dh, uint8_t *key, int keyLen) { uint8_t secretBytes[BN_BYTES]; if (!dh || !key || keyLen <= 0) { return SEC_ERR_PARAM; } if (!dh->hasSecret) { return SEC_ERR_NOT_READY; } if (keyLen > BN_BYTES) { keyLen = BN_BYTES; } bnToBytes(secretBytes, &dh->sharedSecret); // XOR-fold 128-byte shared secret down to keyLen bytes memset(key, 0, keyLen); for (int i = 0; i < BN_BYTES; i++) { key[i % keyLen] ^= secretBytes[i]; } secureZero(secretBytes, sizeof(secretBytes)); return SEC_SUCCESS; } void secDhDestroy(SecDhT *dh) { if (dh) { secureZero(dh, sizeof(SecDhT)); free(dh); } } int secDhGenerateKeys(SecDhT *dh) { if (!dh) { return SEC_ERR_PARAM; } dhInit(); // Generate 256-bit random private key bnClear(&dh->privateKey); secRngBytes((uint8_t *)dh->privateKey.w, DH_PRIVATE_BYTES); // Ensure private key >= 2 if (bnBitLength(&dh->privateKey) <= 1) { dh->privateKey.w[0] = 2; } // public = g^private mod p bnModExp(&dh->publicKey, &sDhGenerator, &dh->privateKey, &sDhPrime, sDhM0Inv, &sDhR2); dh->hasKeys = true; dh->hasSecret = false; return SEC_SUCCESS; } int secDhGetPublicKey(SecDhT *dh, uint8_t *buf, int *len) { if (!dh || !buf || !len) { return SEC_ERR_PARAM; } if (*len < SEC_DH_KEY_SIZE) { return SEC_ERR_PARAM; } if (!dh->hasKeys) { return SEC_ERR_NOT_READY; } bnToBytes(buf, &dh->publicKey); *len = SEC_DH_KEY_SIZE; return SEC_SUCCESS; } // ======================================================================== // Cipher functions (alphabetical) // ======================================================================== SecCipherT *secCipherCreate(const uint8_t *key) { SecCipherT *c; if (!key) { return 0; } c = (SecCipherT *)calloc(1, sizeof(SecCipherT)); if (!c) { return 0; } memcpy(c->key, key, SEC_XTEA_KEY_SIZE); return c; } void secCipherCrypt(SecCipherT *c, uint8_t *data, int len) { uint32_t block[2]; uint8_t *keystream; int pos; int take; if (!c || !data || len <= 0) { return; } keystream = (uint8_t *)block; pos = 0; while (pos < len) { // Encrypt counter to generate keystream block[0] = c->counter[0]; block[1] = c->counter[1]; xteaEncryptBlock(block, c->key); // XOR keystream with data take = len - pos; if (take > 8) { take = 8; } for (int i = 0; i < take; i++) { data[pos + i] ^= keystream[i]; } pos += take; // Increment counter if (++c->counter[0] == 0) { c->counter[1]++; } } } void secCipherDestroy(SecCipherT *c) { if (c) { secureZero(c, sizeof(SecCipherT)); free(c); } } void secCipherSetNonce(SecCipherT *c, uint32_t nonceLo, uint32_t nonceHi) { if (!c) { return; } c->nonce[0] = nonceLo; c->nonce[1] = nonceHi; c->counter[0] = nonceLo; c->counter[1] = nonceHi; }