DVX_GUI/security/security.c

689 lines
18 KiB
C

// Security library: DH key exchange + XTEA-CTR cipher for DJGPP
//
// Diffie-Hellman uses the RFC 2409 Group 2 (1024-bit) safe prime with
// Montgomery multiplication for modular exponentiation. Private exponents
// are 256 bits for fast computation on 486-class hardware.
//
// XTEA in CTR mode provides symmetric encryption. No lookup tables,
// no key schedule — just shifts, adds, and XORs.
#include <stdint.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <pc.h>
#include <sys/farptr.h>
#include <go32.h>
#include "security.h"
// ========================================================================
// Internal defines
// ========================================================================
#define BN_BITS 1024
#define BN_WORDS (BN_BITS / 32)
#define BN_BYTES (BN_BITS / 8)
#define DH_PRIVATE_BITS 256
#define DH_PRIVATE_BYTES (DH_PRIVATE_BITS / 8)
#define XTEA_ROUNDS 32
#define XTEA_DELTA 0x9E3779B9
// ========================================================================
// Types
// ========================================================================
typedef struct {
uint32_t w[BN_WORDS];
} BigNumT;
struct SecDhS {
BigNumT privateKey;
BigNumT publicKey;
BigNumT sharedSecret;
bool hasKeys;
bool hasSecret;
};
struct SecCipherS {
uint32_t key[4];
uint32_t nonce[2];
uint32_t counter[2];
};
typedef struct {
uint32_t key[4];
uint32_t counter[2];
bool seeded;
} RngStateT;
// ========================================================================
// Static globals
// ========================================================================
// RFC 2409 Group 2 (1024-bit MODP) prime, little-endian word order
static const BigNumT sDhPrime = { .w = {
0x39E38FAF, 0xCDB1CEDC, 0x51FF5DB8, 0x85E28A20,
0x1E9C284F, 0x2BB72AE0, 0x60F89D81, 0x4E664FD5,
0x45E6F3A1, 0x92F2129E, 0xB8E51B21, 0x35C7D431,
0x14A0C959, 0x137E2179, 0x5BE0CD19, 0x7A51F1D7,
0xF25F1468, 0x302B0A6D, 0xCD3A431B, 0xEF9519B3,
0x8E3404DD, 0x514A0879, 0x3B139B22, 0x020BBEA6,
0x8A67CC74, 0x29024E08, 0x80DC1CD1, 0xC4C6628B,
0x2168C234, 0xC90FDAA2, 0xFFFFFFFF, 0xFFFFFFFF
}};
// Generator g = 2
static const BigNumT sDhGenerator = { .w = { 2 } };
// Montgomery constants (computed lazily)
static BigNumT sDhR2; // R^2 mod p
static uint32_t sDhM0Inv; // -p[0]^(-1) mod 2^32
static bool sDhInited = false;
// RNG state
static RngStateT sRng = { .seeded = false };
// ========================================================================
// Static prototypes (alphabetical)
// ========================================================================
static int bnAdd(BigNumT *result, const BigNumT *a, const BigNumT *b);
static int bnBit(const BigNumT *a, int n);
static int bnBitLength(const BigNumT *a);
static void bnClear(BigNumT *a);
static int bnCmp(const BigNumT *a, const BigNumT *b);
static void bnCopy(BigNumT *dst, const BigNumT *src);
static void bnFromBytes(BigNumT *a, const uint8_t *buf);
static void bnModExp(BigNumT *result, const BigNumT *base, const BigNumT *exp, const BigNumT *mod, uint32_t m0inv, const BigNumT *r2);
static void bnMontMul(BigNumT *result, const BigNumT *a, const BigNumT *b, const BigNumT *mod, uint32_t m0inv);
static void bnSet(BigNumT *a, uint32_t val);
static int bnShiftLeft1(BigNumT *a);
static int bnSub(BigNumT *result, const BigNumT *a, const BigNumT *b);
static void bnToBytes(uint8_t *buf, const BigNumT *a);
static uint32_t computeM0Inv(uint32_t m0);
static void computeR2(BigNumT *r2, const BigNumT *m);
static void dhInit(void);
static void secureZero(void *ptr, int len);
static void xteaEncryptBlock(uint32_t v[2], const uint32_t key[4]);
// ========================================================================
// BigNum functions (alphabetical)
// ========================================================================
static int __attribute__((unused)) bnAdd(BigNumT *result, const BigNumT *a, const BigNumT *b) {
uint64_t carry = 0;
for (int i = 0; i < BN_WORDS; i++) {
uint64_t sum = (uint64_t)a->w[i] + b->w[i] + carry;
result->w[i] = (uint32_t)sum;
carry = sum >> 32;
}
return (int)carry;
}
static int bnBit(const BigNumT *a, int n) {
return (a->w[n / 32] >> (n % 32)) & 1;
}
static int bnBitLength(const BigNumT *a) {
for (int i = BN_WORDS - 1; i >= 0; i--) {
if (a->w[i]) {
uint32_t v = a->w[i];
int bits = i * 32;
while (v) {
bits++;
v >>= 1;
}
return bits;
}
}
return 0;
}
static void bnClear(BigNumT *a) {
memset(a->w, 0, sizeof(a->w));
}
static int bnCmp(const BigNumT *a, const BigNumT *b) {
for (int i = BN_WORDS - 1; i >= 0; i--) {
if (a->w[i] > b->w[i]) {
return 1;
}
if (a->w[i] < b->w[i]) {
return -1;
}
}
return 0;
}
static void bnCopy(BigNumT *dst, const BigNumT *src) {
memcpy(dst->w, src->w, sizeof(dst->w));
}
static void bnFromBytes(BigNumT *a, const uint8_t *buf) {
for (int i = 0; i < BN_WORDS; i++) {
int j = (BN_WORDS - 1 - i) * 4;
a->w[i] = ((uint32_t)buf[j] << 24) |
((uint32_t)buf[j + 1] << 16) |
((uint32_t)buf[j + 2] << 8) |
(uint32_t)buf[j + 3];
}
}
static void bnModExp(BigNumT *result, const BigNumT *base, const BigNumT *exp, const BigNumT *mod, uint32_t m0inv, const BigNumT *r2) {
BigNumT montBase;
BigNumT montResult;
BigNumT one;
int bits;
bool started;
// Convert base to Montgomery form: montBase = base * R mod m
bnMontMul(&montBase, base, r2, mod, m0inv);
// Initialize montResult to 1 in Montgomery form (= R mod m)
bnClear(&one);
one.w[0] = 1;
bnMontMul(&montResult, &one, r2, mod, m0inv);
// Left-to-right binary square-and-multiply
bits = bnBitLength(exp);
started = false;
for (int i = bits - 1; i >= 0; i--) {
if (started) {
bnMontMul(&montResult, &montResult, &montResult, mod, m0inv);
}
if (bnBit(exp, i)) {
if (!started) {
bnCopy(&montResult, &montBase);
started = true;
} else {
bnMontMul(&montResult, &montResult, &montBase, mod, m0inv);
}
}
}
// Convert back from Montgomery form: result = montResult * 1 * R^(-1) mod m
bnClear(&one);
one.w[0] = 1;
bnMontMul(result, &montResult, &one, mod, m0inv);
}
static void bnMontMul(BigNumT *result, const BigNumT *a, const BigNumT *b, const BigNumT *mod, uint32_t m0inv) {
uint32_t t[BN_WORDS + 1];
uint32_t u;
uint64_t carry;
uint64_t prod;
uint64_t sum;
memset(t, 0, sizeof(t));
for (int i = 0; i < BN_WORDS; i++) {
// Step 1: t += a[i] * b
carry = 0;
for (int j = 0; j < BN_WORDS; j++) {
prod = (uint64_t)a->w[i] * b->w[j] + t[j] + carry;
t[j] = (uint32_t)prod;
carry = prod >> 32;
}
t[BN_WORDS] += (uint32_t)carry;
// Step 2: Montgomery reduction factor
u = t[0] * m0inv;
// Step 3: t = (t + u * mod) >> 32
// First word: result is zero by construction, take carry only
prod = (uint64_t)u * mod->w[0] + t[0];
carry = prod >> 32;
// Remaining words: shift result left by one position
for (int j = 1; j < BN_WORDS; j++) {
prod = (uint64_t)u * mod->w[j] + t[j] + carry;
t[j - 1] = (uint32_t)prod;
carry = prod >> 32;
}
sum = (uint64_t)t[BN_WORDS] + carry;
t[BN_WORDS - 1] = (uint32_t)sum;
t[BN_WORDS] = (uint32_t)(sum >> 32);
}
// Copy result
memcpy(result->w, t, BN_WORDS * sizeof(uint32_t));
// Conditional subtract if result >= mod
if (t[BN_WORDS] || bnCmp(result, mod) >= 0) {
bnSub(result, result, mod);
}
}
static void bnSet(BigNumT *a, uint32_t val) {
bnClear(a);
a->w[0] = val;
}
static int bnShiftLeft1(BigNumT *a) {
uint32_t carry = 0;
for (int i = 0; i < BN_WORDS; i++) {
uint32_t newCarry = a->w[i] >> 31;
a->w[i] = (a->w[i] << 1) | carry;
carry = newCarry;
}
return carry;
}
static int bnSub(BigNumT *result, const BigNumT *a, const BigNumT *b) {
uint64_t borrow = 0;
for (int i = 0; i < BN_WORDS; i++) {
uint64_t diff = (uint64_t)a->w[i] - b->w[i] - borrow;
result->w[i] = (uint32_t)diff;
borrow = (diff >> 63) & 1;
}
return (int)borrow;
}
static void bnToBytes(uint8_t *buf, const BigNumT *a) {
for (int i = 0; i < BN_WORDS; i++) {
int j = (BN_WORDS - 1 - i) * 4;
uint32_t w = a->w[i];
buf[j] = (uint8_t)(w >> 24);
buf[j + 1] = (uint8_t)(w >> 16);
buf[j + 2] = (uint8_t)(w >> 8);
buf[j + 3] = (uint8_t)(w);
}
}
// ========================================================================
// Helper functions (alphabetical)
// ========================================================================
static uint32_t computeM0Inv(uint32_t m0) {
// Newton's method: compute m0^(-1) mod 2^32
// Converges quadratically: 1 → 2 → 4 → 8 → 16 → 32 correct bits
uint32_t x = 1;
for (int i = 0; i < 5; i++) {
x = x * (2 - m0 * x);
}
// Return -m0^(-1) mod 2^32
return ~x + 1;
}
static void computeR2(BigNumT *r2, const BigNumT *m) {
// Compute R^2 mod m where R = 2^1024
// Method: start with 1, double 2048 times, reduce mod m each step
bnSet(r2, 1);
for (int i = 0; i < 2 * BN_BITS; i++) {
int carry = bnShiftLeft1(r2);
if (carry || bnCmp(r2, m) >= 0) {
bnSub(r2, r2, m);
}
}
}
static void dhInit(void) {
if (sDhInited) {
return;
}
sDhM0Inv = computeM0Inv(sDhPrime.w[0]);
computeR2(&sDhR2, &sDhPrime);
sDhInited = true;
}
static void secureZero(void *ptr, int len) {
// Volatile prevents the compiler from optimizing away the zeroing
volatile uint8_t *p = (volatile uint8_t *)ptr;
for (int i = 0; i < len; i++) {
p[i] = 0;
}
}
static void xteaEncryptBlock(uint32_t v[2], const uint32_t key[4]) {
uint32_t v0 = v[0];
uint32_t v1 = v[1];
uint32_t sum = 0;
for (int i = 0; i < XTEA_ROUNDS; i++) {
v0 += (((v1 << 4) ^ (v1 >> 5)) + v1) ^ (sum + key[sum & 3]);
sum += XTEA_DELTA;
v1 += (((v0 << 4) ^ (v0 >> 5)) + v0) ^ (sum + key[(sum >> 11) & 3]);
}
v[0] = v0;
v[1] = v1;
}
// ========================================================================
// RNG functions (alphabetical)
// ========================================================================
void secRngAddEntropy(const uint8_t *data, int len) {
// XOR additional entropy into the key
for (int i = 0; i < len; i++) {
((uint8_t *)sRng.key)[i % 16] ^= data[i];
}
// Re-mix: encrypt the key with itself
uint32_t block[2];
block[0] = sRng.key[0] ^ sRng.key[2];
block[1] = sRng.key[1] ^ sRng.key[3];
xteaEncryptBlock(block, sRng.key);
sRng.key[0] ^= block[0];
sRng.key[1] ^= block[1];
block[0] = sRng.key[2] ^ sRng.key[0];
block[1] = sRng.key[3] ^ sRng.key[1];
xteaEncryptBlock(block, sRng.key);
sRng.key[2] ^= block[0];
sRng.key[3] ^= block[1];
}
void secRngBytes(uint8_t *buf, int len) {
// Auto-seed from hardware if never seeded
if (!sRng.seeded) {
uint8_t entropy[16];
int got = secRngGatherEntropy(entropy, sizeof(entropy));
secRngSeed(entropy, got);
}
uint32_t block[2];
int pos = 0;
while (pos < len) {
block[0] = sRng.counter[0];
block[1] = sRng.counter[1];
xteaEncryptBlock(block, sRng.key);
int take = len - pos;
if (take > 8) {
take = 8;
}
memcpy(buf + pos, block, take);
pos += take;
// Increment counter
if (++sRng.counter[0] == 0) {
sRng.counter[1]++;
}
}
}
int secRngGatherEntropy(uint8_t *buf, int len) {
int out = 0;
// Read PIT channel 0 counter (1.193 MHz, ~10 bits of entropy in LSBs)
outportb(0x43, 0x00);
uint8_t pitLo = inportb(0x40);
uint8_t pitHi = inportb(0x40);
// BIOS tick count (18.2 Hz)
uint32_t ticks = _farpeekl(_dos_ds, 0x46C);
if (out < len) { buf[out++] = pitLo; }
if (out < len) { buf[out++] = pitHi; }
if (out < len) { buf[out++] = (uint8_t)(ticks); }
if (out < len) { buf[out++] = (uint8_t)(ticks >> 8); }
if (out < len) { buf[out++] = (uint8_t)(ticks >> 16); }
if (out < len) { buf[out++] = (uint8_t)(ticks >> 24); }
// Second PIT reading for jitter
outportb(0x43, 0x00);
pitLo = inportb(0x40);
pitHi = inportb(0x40);
if (out < len) { buf[out++] = pitLo; }
if (out < len) { buf[out++] = pitHi; }
return out;
}
void secRngSeed(const uint8_t *entropy, int len) {
memset(&sRng, 0, sizeof(sRng));
// XOR-fold entropy into the key
for (int i = 0; i < len; i++) {
((uint8_t *)sRng.key)[i % 16] ^= entropy[i];
}
// Derive counter from key bits
sRng.counter[0] = sRng.key[2] ^ sRng.key[0];
sRng.counter[1] = sRng.key[3] ^ sRng.key[1];
sRng.seeded = true;
// Mix state by generating and discarding 64 bytes
uint8_t discard[64];
sRng.seeded = true; // prevent recursion in secRngBytes
secRngBytes(discard, sizeof(discard));
secureZero(discard, sizeof(discard));
}
// ========================================================================
// DH functions (alphabetical)
// ========================================================================
int secDhComputeSecret(SecDhT *dh, const uint8_t *remotePub, int len) {
BigNumT remote;
BigNumT two;
if (!dh || !remotePub) {
return SEC_ERR_PARAM;
}
if (len != SEC_DH_KEY_SIZE) {
return SEC_ERR_PARAM;
}
if (!dh->hasKeys) {
return SEC_ERR_NOT_READY;
}
dhInit();
bnFromBytes(&remote, remotePub);
// Validate remote public key: must be in range [2, p-2]
bnSet(&two, 2);
if (bnCmp(&remote, &two) < 0 || bnCmp(&remote, &sDhPrime) >= 0) {
secureZero(&remote, sizeof(remote));
return SEC_ERR_PARAM;
}
// shared = remote^private mod p
bnModExp(&dh->sharedSecret, &remote, &dh->privateKey, &sDhPrime, sDhM0Inv, &sDhR2);
dh->hasSecret = true;
secureZero(&remote, sizeof(remote));
return SEC_SUCCESS;
}
SecDhT *secDhCreate(void) {
SecDhT *dh = (SecDhT *)calloc(1, sizeof(SecDhT));
return dh;
}
int secDhDeriveKey(SecDhT *dh, uint8_t *key, int keyLen) {
uint8_t secretBytes[BN_BYTES];
if (!dh || !key || keyLen <= 0) {
return SEC_ERR_PARAM;
}
if (!dh->hasSecret) {
return SEC_ERR_NOT_READY;
}
if (keyLen > BN_BYTES) {
keyLen = BN_BYTES;
}
bnToBytes(secretBytes, &dh->sharedSecret);
// XOR-fold 128-byte shared secret down to keyLen bytes
memset(key, 0, keyLen);
for (int i = 0; i < BN_BYTES; i++) {
key[i % keyLen] ^= secretBytes[i];
}
secureZero(secretBytes, sizeof(secretBytes));
return SEC_SUCCESS;
}
void secDhDestroy(SecDhT *dh) {
if (dh) {
secureZero(dh, sizeof(SecDhT));
free(dh);
}
}
int secDhGenerateKeys(SecDhT *dh) {
if (!dh) {
return SEC_ERR_PARAM;
}
dhInit();
// Generate 256-bit random private key
bnClear(&dh->privateKey);
secRngBytes((uint8_t *)dh->privateKey.w, DH_PRIVATE_BYTES);
// Ensure private key >= 2
if (bnBitLength(&dh->privateKey) <= 1) {
dh->privateKey.w[0] = 2;
}
// public = g^private mod p
bnModExp(&dh->publicKey, &sDhGenerator, &dh->privateKey, &sDhPrime, sDhM0Inv, &sDhR2);
dh->hasKeys = true;
dh->hasSecret = false;
return SEC_SUCCESS;
}
int secDhGetPublicKey(SecDhT *dh, uint8_t *buf, int *len) {
if (!dh || !buf || !len) {
return SEC_ERR_PARAM;
}
if (*len < SEC_DH_KEY_SIZE) {
return SEC_ERR_PARAM;
}
if (!dh->hasKeys) {
return SEC_ERR_NOT_READY;
}
bnToBytes(buf, &dh->publicKey);
*len = SEC_DH_KEY_SIZE;
return SEC_SUCCESS;
}
// ========================================================================
// Cipher functions (alphabetical)
// ========================================================================
SecCipherT *secCipherCreate(const uint8_t *key) {
SecCipherT *c;
if (!key) {
return 0;
}
c = (SecCipherT *)calloc(1, sizeof(SecCipherT));
if (!c) {
return 0;
}
memcpy(c->key, key, SEC_XTEA_KEY_SIZE);
return c;
}
void secCipherCrypt(SecCipherT *c, uint8_t *data, int len) {
uint32_t block[2];
uint8_t *keystream;
int pos;
int take;
if (!c || !data || len <= 0) {
return;
}
keystream = (uint8_t *)block;
pos = 0;
while (pos < len) {
// Encrypt counter to generate keystream
block[0] = c->counter[0];
block[1] = c->counter[1];
xteaEncryptBlock(block, c->key);
// XOR keystream with data
take = len - pos;
if (take > 8) {
take = 8;
}
for (int i = 0; i < take; i++) {
data[pos + i] ^= keystream[i];
}
pos += take;
// Increment counter
if (++c->counter[0] == 0) {
c->counter[1]++;
}
}
}
void secCipherDestroy(SecCipherT *c) {
if (c) {
secureZero(c, sizeof(SecCipherT));
free(c);
}
}
void secCipherSetNonce(SecCipherT *c, uint32_t nonceLo, uint32_t nonceHi) {
if (!c) {
return;
}
c->nonce[0] = nonceLo;
c->nonce[1] = nonceHi;
c->counter[0] = nonceLo;
c->counter[1] = nonceHi;
}