singe/thirdparty/uthash/tests/hashscan.c
2023-10-23 19:38:18 -05:00

678 lines
22 KiB
C

/*
Copyright (c) 2005-2022, Troy D. Hanson https://troydhanson.github.io/uthash/
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <string.h>
#include <errno.h>
#include <stdlib.h>
#include <stdio.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <inttypes.h>
#include <sys/types.h> /* on OSX, must come before ptrace.h */
#include <sys/ptrace.h>
#include <unistd.h>
#include <sys/wait.h>
#include <assert.h>
#ifdef __FreeBSD__
#include <sys/param.h> /* MAXPATHLEN */
#include <vm/vm.h> /* VM_PROT_* flags */
#endif
#if defined(PT_ATTACH) && !defined(PTRACE_ATTACH)
#define PTRACE_ATTACH PT_ATTACH
#define PTRACE_DETACH PT_DETACH
#endif
/* need this defined so offsetof can give us bloom offsets in UT_hash_table */
#define HASH_BLOOM 16
#include "uthash.h"
#ifdef __FreeBSD__
typedef struct {
void *start;
void *end;
} vma_t;
#else
typedef struct {
off_t start;
off_t end;
char perms[4]; /* rwxp */
char device[5]; /* fd:01 or 00:00 */
} vma_t;
#endif
const uint32_t sig = HASH_SIGNATURE;
int verbose=0;
int getkeys=0;
#define vv(...) do {if (verbose>0) printf(__VA_ARGS__);} while(0)
#define vvv(...) do {if (verbose>1) printf(__VA_ARGS__);} while(0)
/* these id's are arbitrary, only meaningful within this file */
#define JEN 1
#define BER 2
#define SFH 3
#define SAX 4
#define FNV 5
#define OAT 6
#define NUM_HASH_FUNCS 7 /* includes id 0, the non-function */
const char *hash_fcns[] = {"???","JEN","BER","SFH","SAX","FNV","OAT"};
/* given a peer key/len/hashv, reverse engineer its hash function */
static int infer_hash_function(char *key, size_t keylen, uint32_t hashv)
{
uint32_t ohashv;
/* BER SAX FNV OAT JEN SFH */
HASH_JEN(key,keylen,ohashv);
if (ohashv == hashv) {
return JEN;
}
HASH_BER(key,keylen,ohashv);
if (ohashv == hashv) {
return BER;
}
HASH_SFH(key,keylen,ohashv);
if (ohashv == hashv) {
return SFH;
}
HASH_SAX(key,keylen,ohashv);
if (ohashv == hashv) {
return SAX;
}
HASH_FNV(key,keylen,ohashv);
if (ohashv == hashv) {
return FNV;
}
HASH_OAT(key,keylen,ohashv);
if (ohashv == hashv) {
return OAT;
}
return 0;
}
/* read peer's memory from addr for len bytes, store into our dst */
#ifdef __FreeBSD__
static int read_mem(void *dst, pid_t pid, void *start, size_t len)
{
struct ptrace_io_desc io_desc;
int ret;
io_desc.piod_op = PIOD_READ_D;
io_desc.piod_offs = start;
io_desc.piod_addr = dst;
io_desc.piod_len = len;
ret = ptrace(PT_IO, pid, (void *) &io_desc, 0);
if (ret) {
vv("read_mem: ptrace failed: %s\n", strerror(errno));
return -1;
} else if (io_desc.piod_len != len) {
vv("read_mem: short read!\n");
return -1;
}
return 0;
}
#else
static int read_mem(void *dst, int fd, off_t start, size_t len)
{
int rc;
size_t bytes_read=0;
if (lseek(fd, start, SEEK_SET) == (off_t)-1) {
fprintf(stderr, "lseek failed: %s\n", strerror(errno));
return -1;
}
while ( len && ((rc=read(fd, (char*)dst+bytes_read, len)) > 0)) {
len -= rc;
bytes_read += rc;
}
if (rc==-1) {
vv("read_mem failed (%s)\n",strerror(errno));
}
if ((len != 0 && rc >= 0)) {
vv("INTERNAL ERROR\n");
}
return (rc == -1) ? -1 : 0;
}
#endif
/* later compensate for possible presence of bloom filter */
static char *tbl_from_sig_addr(char *sig)
{
return (sig - offsetof(UT_hash_table,signature));
}
#define HS_BIT_TEST(v,i) (v[i/8] & (1U << (i%8)))
static void found(int fd, char* peer_sig, pid_t pid)
{
UT_hash_table *tbl=NULL;
UT_hash_bucket *bkts=NULL;
UT_hash_handle hh;
size_t i, bloom_len, bloom_bitlen, bloom_on_bits=0,bloom_off_bits=0;
char *peer_tbl, *peer_bloom_sig, *peer_bloom_nbits, *peer_bloombv_ptr,
*peer_bloombv, *peer_bkts, *peer_hh, *key=NULL;
const char *peer_key;
const char *hash_fcn = NULL;
unsigned char *bloombv=NULL;
static int fileno=0;
char keyfile[50];
unsigned char bloom_nbits=0;
int keyfd=-1, mode=S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH,
hash_fcn_hits[NUM_HASH_FUNCS], hash_fcn_winner;
unsigned max_chain=0;
uint32_t bloomsig;
int has_bloom_filter_fields = 0;
for(i=0; i < NUM_HASH_FUNCS; i++) {
hash_fcn_hits[i]=0;
}
if (getkeys) {
snprintf(keyfile, sizeof(keyfile), "/tmp/%u-%u.key", (unsigned)pid,fileno++);
if ( (keyfd = open(keyfile, O_WRONLY|O_CREAT|O_TRUNC, mode)) == -1) {
fprintf(stderr, "can't open %s: %s\n", keyfile, strerror(errno));
exit(-1);
}
}
vv("found signature at peer %p\n", (void*)peer_sig);
peer_tbl = tbl_from_sig_addr(peer_sig);
vvv("reading table at peer %p\n", (void*)peer_tbl);
if ( (tbl = (UT_hash_table*)malloc(sizeof(UT_hash_table))) == NULL) {
fprintf(stderr, "out of memory\n");
exit(-1);
}
#ifdef __FreeBSD__
if (read_mem(tbl, pid, (void *)peer_tbl, sizeof(UT_hash_table)) != 0) {
#else
if (read_mem(tbl, fd, (off_t)peer_tbl, sizeof(UT_hash_table)) != 0) {
#endif
fprintf(stderr, "failed to read peer memory\n");
goto done;
}
/* got the table. how about the buckets */
peer_bkts = (char*)tbl->buckets;
vvv("reading %u buckets at peer %p\n", tbl->num_buckets, (void*)peer_bkts);
bkts = (UT_hash_bucket*)malloc(sizeof(UT_hash_bucket)*tbl->num_buckets);
if (bkts == NULL) {
fprintf(stderr, "out of memory\n");
goto done;
}
#ifdef __FreeBSD__
if (read_mem(bkts, pid, (void *)peer_bkts, sizeof(UT_hash_bucket)*tbl->num_buckets) != 0) {
#else
if (read_mem(bkts, fd, (off_t)peer_bkts, sizeof(UT_hash_bucket)*tbl->num_buckets) != 0) {
#endif
fprintf(stderr, "failed to read peer memory\n");
goto done;
}
vvv("scanning %u peer buckets\n", tbl->num_buckets);
for(i=0; i < tbl->num_buckets; i++) {
vvv("bucket %u has %u items\n", (unsigned)i, (unsigned)(bkts[i].count));
if (bkts[i].count > max_chain) {
max_chain = bkts[i].count;
}
if (bkts[i].expand_mult) {
vvv(" bucket %u has expand_mult %u\n", (unsigned)i, (unsigned)(bkts[i].expand_mult));
}
vvv("scanning bucket %u chain:\n", (unsigned)i);
peer_hh = (char*)bkts[i].hh_head;
while(peer_hh) {
#ifdef __FreeBSD__
if (read_mem(&hh, pid, (void *)peer_hh, sizeof(hh)) != 0) {
#else
if (read_mem(&hh, fd, (off_t)peer_hh, sizeof(hh)) != 0) {
#endif
fprintf(stderr, "failed to read peer memory\n");
goto done;
}
if ((char*)hh.tbl != peer_tbl) {
goto done;
}
peer_hh = (char*)hh.hh_next;
peer_key = (const char*)(hh.key);
/* malloc space to read the key, and read it */
if ( (key = (char*)malloc(sizeof(hh.keylen))) == NULL) {
fprintf(stderr, "out of memory\n");
exit(-1);
}
#ifdef __FreeBSD__
if (read_mem(key, pid, (void*)peer_key, hh.keylen) != 0) {
#else
if (read_mem(key, fd, (off_t)peer_key, hh.keylen) != 0) {
#endif
fprintf(stderr, "failed to read peer memory\n");
goto done;
}
hash_fcn_hits[infer_hash_function(key,hh.keylen,hh.hashv)]++;
/* write the key if requested */
if (getkeys) {
write(keyfd, &hh.keylen, sizeof(unsigned));
write(keyfd, key, hh.keylen);
}
free(key);
key=NULL;
}
}
/* does it have a bloom filter? */
peer_bloom_sig = peer_tbl + offsetof(UT_hash_table, bloom_sig);
peer_bloombv_ptr = peer_tbl + offsetof(UT_hash_table, bloom_bv);
peer_bloom_nbits = peer_tbl + offsetof(UT_hash_table, bloom_nbits);
vvv("looking for bloom signature at peer %p\n", (void*)peer_bloom_sig);
#ifdef __FreeBSD__
if ((read_mem(&bloomsig, pid, (void *)peer_bloom_sig, sizeof(uint32_t)) == 0) &&
(bloomsig == HASH_BLOOM_SIGNATURE)) {
#else
if ((read_mem(&bloomsig, fd, (off_t)peer_bloom_sig, sizeof(uint32_t)) == 0) &&
(bloomsig == HASH_BLOOM_SIGNATURE)) {
#endif
vvv("bloom signature (%x) found\n",bloomsig);
/* bloom found. get at bv, nbits */
#ifdef __FreeBSD__
if (read_mem(&bloom_nbits, pid, (void *)peer_bloom_nbits, sizeof(char)) == 0) {
#else
if (read_mem(&bloom_nbits, fd, (off_t)peer_bloom_nbits, sizeof(char)) == 0) {
#endif
/* scan bloom filter, calculate saturation */
bloom_bitlen = (1ULL << bloom_nbits);
bloom_len = (bloom_bitlen / 8) + ((bloom_bitlen % 8) ? 1 : 0);
vvv("bloom bitlen is %u, bloom_bytelen is %u\n", (unsigned)bloom_bitlen, (unsigned)bloom_len);
if ( (bloombv = (unsigned char*)malloc(bloom_len)) == NULL) {
fprintf(stderr, "out of memory\n");
exit(-1);
}
/* read the address of the bitvector in the peer, then read the bv itself */
#ifdef __FreeBSD__
if ((read_mem(&peer_bloombv, pid, (void *)peer_bloombv_ptr, sizeof(void*)) == 0) &&
(read_mem(bloombv, pid, (void *)peer_bloombv, bloom_len) == 0)) {
#else
if ((read_mem(&peer_bloombv, fd, (off_t)peer_bloombv_ptr, sizeof(void*)) == 0) &&
(read_mem(bloombv, fd, (off_t)peer_bloombv, bloom_len) == 0)) {
#endif
/* calculate saturation */
vvv("read peer bloom bitvector from %p (%u bytes)\n", (void*)peer_bloombv, (unsigned)bloom_len);
for(i=0; i < bloom_bitlen; i++) {
if (HS_BIT_TEST(bloombv,(unsigned)i)) {
/* vvv("bit %u set\n",(unsigned)i); */
bloom_on_bits++;
} else {
bloom_off_bits++;
}
}
has_bloom_filter_fields = 1;
vvv("there were %u on_bits among %u total bits\n", (unsigned)bloom_on_bits, (unsigned)bloom_bitlen);
}
}
}
/* choose apparent hash function */
hash_fcn_winner=0;
for(i=0; i<NUM_HASH_FUNCS; i++) {
if (hash_fcn_hits[i] > hash_fcn_hits[hash_fcn_winner]) {
hash_fcn_winner=i;
}
}
hash_fcn = hash_fcns[hash_fcn_winner];
/*
Address ideal items buckets mc fl bloom sat fcn keys saved to
------------------ ----- -------- -------- -- -- ----- ----- --- -------------
0x10aa4090 98% 10000000 32000000 10 ok BER /tmp/9110-0.key
0x10abcdef 100% 10000000 32000000 9 NX 27 12% BER /tmp/9110-1.key
*/
printf("Address ideal items buckets mc fl bloom sat fcn keys saved to\n");
printf("------------------ ----- -------- -------- -- -- ----- ----- --- -------------\n");
if (has_bloom_filter_fields) {
printf("%-18p %4.0f%% %8u %8u %2u %2s %5u %4.0f%c %3s %s\n",
(void*)peer_tbl,
(tbl->num_items - tbl->nonideal_items) * 100.0 / tbl->num_items,
tbl->num_items,
tbl->num_buckets,
max_chain,
tbl->noexpand ? "NX" : "ok",
bloom_nbits,
bloom_on_bits * 100.0 / bloom_bitlen, '%',
hash_fcn,
(getkeys ? keyfile : ""));
} else {
printf("%-18p %4.0f%% %8u %8u %2u %2s %5s %4s%c %3s %s\n",
(void*)peer_tbl,
(tbl->num_items - tbl->nonideal_items) * 100.0 / tbl->num_items,
tbl->num_items,
tbl->num_buckets,
max_chain,
tbl->noexpand ? "NX" : "ok",
"",
"", ' ',
hash_fcn,
(getkeys ? keyfile : ""));
}
#if 0
printf("read peer tbl:\n");
printf("num_buckets: %u\n", tbl->num_buckets);
printf("num_items: %u\n", tbl->num_items);
printf("nonideal_items: %u (%.2f%%)\n", tbl->nonideal_items,
tbl->nonideal_items*100.0/tbl->num_items);
printf("expand: %s\n", tbl->noexpand ? "inhibited": "normal");
if (getkeys) {
printf("keys written to %s\n", keyfile);
}
#endif
done:
if (bkts) {
free(bkts);
}
if (tbl) {
free(tbl);
}
if (key) {
free(key);
}
if (keyfd != -1) {
close(keyfd);
}
if (bloombv) {
free(bloombv);
}
}
#ifdef __FreeBSD__
static void sigscan(pid_t pid, void *start, void *end, uint32_t sig)
{
struct ptrace_io_desc io_desc;
int page_size = getpagesize();
char *buf;
char *pos;
/* make sure page_size is a multiple of the signature size, code below assumes this */
assert(page_size % sizeof(sig) == 0);
buf = malloc(page_size);
if (buf == NULL) {
fprintf(stderr, "malloc failed in sigscan()\n");
return;
}
io_desc.piod_op = PIOD_READ_D;
io_desc.piod_offs = start;
io_desc.piod_addr = buf;
io_desc.piod_len = page_size;
/* read in one page after another and search sig */
while(!ptrace(PT_IO, pid, (void *) &io_desc, 0)) {
if (io_desc.piod_len != page_size) {
fprintf(stderr, "PT_IO returned less than page size in sigscan()\n");
return;
}
/* iterate over the the page using the signature size and look for the sig */
for (pos = buf; pos < (buf + page_size); pos += sizeof(sig)) {
if (*(uint32_t *) pos == sig) {
found(pid, (char *) io_desc.piod_offs + (pos - buf), pid);
}
}
/*
* 'end' is inclusive (the address of the last valid byte), so if the current offset
* plus a page is beyond 'end', we're already done. since all vm map entries consist
* of entire pages and 'end' is inclusive, current offset plus one page should point
* exactly one byte beyond 'end'. this is assert()ed below to be on the safe side.
*/
if (io_desc.piod_offs + page_size > end) {
assert(io_desc.piod_offs + page_size == (end + 1));
break;
}
/* advance to the next page */
io_desc.piod_offs += page_size;
}
}
#else
static void sigscan(int fd, off_t start, off_t end, uint32_t sig, pid_t pid)
{
int rlen;
uint32_t u;
off_t at=0;
if (lseek(fd, start, SEEK_SET) == (off_t)-1) {
fprintf(stderr, "lseek failed: %s\n", strerror(errno));
return;
}
while ( (rlen = read(fd,&u,sizeof(u))) == sizeof(u)) {
if (!memcmp(&u,&sig,sizeof(u))) {
found(fd, (char*)(start+at),pid);
}
at += sizeof(u);
if ((off_t)(at + sizeof(u)) > end-start) {
break;
}
}
if (rlen == -1) {
//fprintf(stderr,"read failed: %s\n", strerror(errno));
//exit(-1);
}
}
#endif
#ifdef __FreeBSD__
static int scan(pid_t pid)
{
vma_t *vmas=NULL, vma;
unsigned i, num_vmas = 0;
int ret;
struct ptrace_vm_entry vm_entry;
char path[MAXPATHLEN];
vv("attaching to peer\n");
if (ptrace(PT_ATTACH,pid,NULL,0) == -1) {
fprintf(stderr,"failed to attach to %u: %s\n", (unsigned)pid, strerror(errno));
exit(EXIT_FAILURE);
}
vv("waiting for peer to suspend temporarily\n");
if (waitpid(pid,NULL,0) != pid) {
fprintf(stderr,"failed to wait for pid %u: %s\n",(unsigned)pid, strerror(errno));
goto die;
}
/* read memory map using ptrace */
vv("listing peer virtual memory areas\n");
vm_entry.pve_entry = 0;
vm_entry.pve_path = path; /* not used but required to make vm_entry.pve_pathlen work */
while(1) {
/* set pve_pathlen every turn, it gets overwritten by ptrace */
vm_entry.pve_pathlen = MAXPATHLEN;
errno = 0;
ret = ptrace(PT_VM_ENTRY, pid, (void *) &vm_entry, 0);
if (ret) {
if (errno == ENOENT) {
/* we've reached the last entry */
break;
}
fprintf(stderr, "fetching vm map entry failed: %s (%i)\n", strerror(errno), errno);
goto die;
}
vvv("vmmap entry: start: %p, end: %p", (void *) vm_entry.pve_start, (void *) vm_entry.pve_end);
/* skip unreadable or vnode-backed entries */
if (!(vm_entry.pve_prot & VM_PROT_READ) || vm_entry.pve_pathlen > 0) {
vvv(" -> skipped (not readable or vnode-backed)\n");
vm_entry.pve_path[0] = 0;
continue;
}
/* useful entry, add to list */
vvv(" -> will be scanned\n");
vma.start = (void *)vm_entry.pve_start;
vma.end = (void *)vm_entry.pve_end;
vmas = (vma_t *) realloc(vmas, (num_vmas + 1) * sizeof(vma_t));
if (vmas == NULL) {
exit(-1);
}
vmas[num_vmas++] = vma;
}
vv("peer has %u virtual memory areas\n", num_vmas);
/* look for the hash signature */
vv("scanning peer memory for hash table signatures\n");
for(i=0; i<num_vmas; i++) {
vma = vmas[i];
sigscan(pid, vma.start, vma.end, sig);
}
die:
vv("detaching and resuming peer\n");
if (ptrace(PT_DETACH, pid, NULL, 0) == -1) {
fprintf(stderr,"failed to detach from %u: %s\n", (unsigned)pid, strerror(errno));
}
return 0;
}
# else
static int scan(pid_t pid)
{
FILE *mapf;
char mapfile[30], memfile[30], line[100];
vma_t *vmas=NULL, vma;
unsigned i, num_vmas = 0;
int memfd;
void *pstart, *pend, *unused;
/* attach to the target process and wait for it to suspend */
vv("attaching to peer\n");
if (ptrace(PTRACE_ATTACH, pid, NULL, 0) == -1) {
fprintf(stderr,"failed to attach to %u: %s\n", (unsigned)pid, strerror(errno));
exit(-1);
}
vv("waiting for peer to suspend temporarily\n");
if (waitpid(pid,NULL,0) != pid) {
fprintf(stderr,"failed to wait for pid %u: %s\n",(unsigned)pid, strerror(errno));
goto die;
}
/* get ready to open its memory map. this gives us its valid memory areas */
snprintf(mapfile,sizeof(mapfile),"/proc/%u/maps",(unsigned)pid);
snprintf(memfile,sizeof(memfile),"/proc/%u/mem", (unsigned)pid);
vv("opening peer memory map [%s]\n", mapfile);
if ( (mapf = fopen(mapfile,"r")) == NULL) {
fprintf(stderr,"failed to open %s: %s\n", mapfile, strerror(errno));
goto die;
}
vv("listing peer virtual memory areas\n");
while(fgets(line,sizeof(line),mapf)) {
if (sscanf(line, "%p-%p %4c %p %5c", &pstart, &pend, vma.perms,
&unused, vma.device) == 5) {
vma.start = (off_t)pstart;
vma.end = (off_t)pend;
if (vma.perms[0] != 'r') {
continue; /* only readable vma's */
}
if (memcmp(vma.device,"fd",2)==0) {
continue; /* skip mapped files */
}
vmas = (vma_t*)realloc(vmas, (num_vmas+1) * sizeof(vma_t));
if (vmas == NULL) {
exit(-1);
}
vmas[num_vmas++] = vma;
}
}
vv("peer has %u virtual memory areas\n",num_vmas);
fclose(mapf);
/* ok, open up its memory and start looking around in there */
vv("opening peer memory\n");
if ( (memfd=open(memfile,O_RDONLY)) == -1) {
fprintf(stderr,"failed to open %s: %s\n", memfile, strerror(errno));
goto die;
}
/* look for the hash signature */
vv("scanning peer memory for hash table signatures\n");
for(i=0; i<num_vmas; i++) {
vma = vmas[i];
pstart = (void*)vma.start;
pend = (void*)vma.end;
/*fprintf(stderr,"scanning %p-%p %.4s %.5s\n", pstart, pend,
vma.perms, vma.device);*/
sigscan(memfd, vma.start, vma.end, sig, pid);
}
/* done. close memory and detach. this resumes the target process */
close(memfd);
die:
vv("detaching and resuming peer\n");
if (ptrace(PTRACE_DETACH, pid, NULL, 0) == -1) {
fprintf(stderr,"failed to detach from %u: %s\n", (unsigned)pid, strerror(errno));
}
return 0;
}
#endif
static int usage(const char *prog)
{
fprintf(stderr,"usage: %s [-v] [-k] <pid>\n", prog);
return -1;
}
int main(int argc, char *argv[])
{
int opt;
while ( (opt = getopt(argc, argv, "kv")) != -1) {
switch (opt) {
case 'v':
verbose++;
break;
case 'k':
getkeys++;
break;
default:
return usage(argv[0]);
}
}
if (optind < argc) {
pid_t pid = atoi(argv[optind++]);
return scan(pid);
} else {
return usage(argv[0]);
}
}