From cf7cd8be60c254b44b444c97dcb238d7cf3afd4c Mon Sep 17 00:00:00 2001 From: Brett Weiland Date: Tue, 21 Sep 2021 10:50:33 -0500 Subject: palloc smp safe (testing required, NOT pfree) --- src/page_backup.c | 465 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 465 insertions(+) create mode 100644 src/page_backup.c (limited to 'src/page_backup.c') diff --git a/src/page_backup.c b/src/page_backup.c new file mode 100644 index 0000000..cf6834a --- /dev/null +++ b/src/page_backup.c @@ -0,0 +1,465 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +//just using char because c is a lil bitch and won't let us use void +extern char _kernel_shared_zone_begin; + + + +// PAGE MAPPING +#define PAGEMAP_LOCATION 0x10000 + +#define MAX_BUDDY_ORDER 8 +#define PALLOC_AUTO_BLEVEL MAX_BUDDY_ORDER + +typedef struct phys_map { + struct phys_map *next; + unsigned int max_buddy; + + bool mutex; //we might improve the speed of this later + + uint64_t bsize[MAX_BUDDY_ORDER]; + uint64_t *buddy[MAX_BUDDY_ORDER]; +} pmap_t; + +static pmap_t *first_pmap; + +#define MEM_AVAILABLE 1 +#define MEM_RESERVED 2 +#define MEM_APCI_RECLAIMABLE 3 +#define MEM_APCI_NVS 4 +#define MEM_BAD 5 + +// ZONES +#define ZONE_MAP_PLOC 0x7000 +#define ZONE_MAP PHYS_TO_VIRT(ZONE_MAP_PLOC) + + + +//reorganized (moved) from header +typedef struct __attribute__((packed)) { + unsigned int present : 1; // present, must be one when accessed. + unsigned int read_write : 1; // if set to one, read and write is set + unsigned int user : 1; // For seperating CPL 0-2 and 3+ + unsigned int writethrough_cache : 1; // honestly maybe I should look into caching + unsigned int cachable : 1; // hardware chaching. 0 is enabled, whats the worst that could happen? + unsigned int accessed : 1; // we'll never use any of these! + unsigned int zg0 : 1; // needs to be (and will be) zero'd + unsigned int size : 1; // if set to 1, this entry points to physical memory + unsigned int zg1 : 1; // needs to be (and will be) zero'd + unsigned int software_marks : 3; // available for our own use, I doubt we'll use it in such a simple thing + + uintptr_t base_ptr : 40; + unsigned int avail:11; + unsigned int no_exec:1; +} page_table; + + +struct memory_table { + void *base; + uint64_t length; + uint32_t type; + uint32_t ACPI; +} __attribute__((packed)); + +static bool NX_capable; +static bool huge_page_capable; + + +void get_mem_capabilities() { + uint32_t unused, edx; + __get_cpuid(0x80000001, &unused, &unused, &unused, &edx); + huge_page_capable = (edx >> 26) & 1; + NX_capable = (edx >> 20) & 1; +} + +void fix_stack() { + struct stack_frame *frame; + + asm("addq rsp, %0\n" + "addq rbp, %0\n" + "mov %0, rbp" + :"=r"(frame) + :"r"(PA_OFFSET)); + + while(frame->next != 0) { + printf("%p\n", frame->function_base); + frame->next = PHYS_TO_VIRT((void *)frame->next); + frame = frame->next; + } +} + + +void unmap_lowmem() { + //[future] + //eventually, you should use the function that unmaps pages when you write it + page_table *entry = (page_table *)PAGEMAP_LOCATION; + entry[0].present = 0; +} + +void debug_pzone() { + struct memory_table *memtable = (void *)ZONE_MAP; + printf(" __________________________________________________________________________\n"); + printf("| type\tstart\t\t\tend\t\t\tsize\t\t |\n"); + printf("|--------------------------------------------------------------------------|\n"); + for(unsigned int i = 0; memtable[i].length > 0; i++) { + printf("| %u %u\t0x%p\t0x%p\t0x%p |\n", memtable[i].type, memtable[i].ACPI, memtable[i].base, (memtable[i].base + memtable[i].length), memtable[i].length); + } + printf("----------------------------------------------------------------------------\n"); +} + +void ram_stresser() { + struct memory_table *memtable = (void *)ZONE_MAP - PA_OFFSET; + memtable[6].length = 0x10000; +} + +void debug_pmap() { + pmap_t *pmap = first_pmap; + int pmap_i = 0, order; + uint64_t blong_i, bbit_i, buddy_chunksize, omit_cnt; + printf("Maximum buddy order: %u (up to %#x sized chunks)\n", MAX_BUDDY_ORDER, ((0x1000 << MAX_BUDDY_ORDER) - 1)); + for(; pmap != 0; pmap = pmap->next) { + printf("Table %u:\n" + "\tPhysical/pmap start:\t%#p\n" + "\tTable Size:\t%u\n", pmap_i, pmap, + (uint64_t)(pmap->buddy[MAX_BUDDY_ORDER - 1] + pmap->bsize[MAX_BUDDY_ORDER - 1]) - (uint64_t)pmap); + + for(order = 0; order <= MAX_BUDDY_ORDER - 1; order++) { + buddy_chunksize = (0x1000 << order); //TODO just put it in the for loop + printf("\tbuddy[%u]:\n" + "\t\tAddress:\t%#p\n" + "\t\tSize:\t\t%u\n" + "\t\tBuddies:\t\t\n", order, pmap->buddy[order], pmap->bsize[order]); + + omit_cnt = 0; + + for(blong_i = 0; blong_i < pmap->bsize[order]; blong_i++) { + for(bbit_i = 0; bbit_i < 64; bbit_i++) { + if((pmap->buddy[order][blong_i]) & ((uint64_t)1 << bbit_i)) { + if((omit_cnt < 20) || (blong_i == pmap->bsize[order] - 1)) { + printf("address %#p\tbit %u: %p\t is free\n", + pmap->buddy[order] + blong_i, + bbit_i, + ((uint64_t)pmap - PA_OFFSET) + ((((blong_i * 64) + bbit_i) * buddy_chunksize))); + } + omit_cnt++; + if(omit_cnt == 20) { + printf("\t\t\t[more entries ommited]\n"); + } + } + } + } + } + pmap_i++; + } +} + +//TODO I know you don't want to, but you need to thoroughly check this. +void pfree(void *addr, size_t size) { + int blevel = 0; + uint64_t *onbyte; //the byte out buddy resides on in the current level + uint64_t page_bitloc; // how many bits we are away from buddy[0]. Helps calculate bitshifts + int bbitlen; //length of free'd area in current level + int lshift; //lshift is how many bits we shift over, rightbit is what it sounds like dumbass + pmap_t *pmap = first_pmap; + + /* note: there's no security check to see if the page is actually allocated, + * or if we are freeing the table itself. + * This should be okay, as only the kernel will be calling it. + * If it gets too messy we can always come back. + */ + + + if(((uintptr_t)addr & 4095) || (size & 4095)) { + PANIC(KERNEL_PANIC_INVALID_PFREE); + return; //TODO [minor] some more specificity, not a huge deal + } + size /= 0x1000; + for(; pmap != 0; pmap = pmap->next) { + page_bitloc = (addr - (void *)pmap) / 0x1000; + onbyte = pmap->buddy[0] + (page_bitloc / 64); + if((addr >= (void *)pmap) && onbyte < pmap->buddy[1]) break; + } + + while(blevel < MAX_BUDDY_ORDER) { + lshift = (page_bitloc / (1 << blevel)) & 63; + onbyte = pmap->buddy[blevel] + ((page_bitloc / 64) / (1 << blevel)); + bbitlen = size / (1 << blevel); + + + //TODO clean up this part ------------------------------------------------------------- (below) + if(bbitlen <= 1) { + if(lshift & 1) { + if((*onbyte >> (lshift - 1)) & 1) { + *onbyte &= ~(((uint64_t)1 << (lshift - 1)) | ((uint64_t)1 << lshift)); + size += (1 << blevel); + page_bitloc -= (1 << blevel); + bbitlen = size / (1 << blevel); + } + } + else if((*onbyte >> (lshift + 1)) & 1) { + *onbyte &= ~(((uint64_t)1 << (lshift + 1)) | ((uint64_t)1 << lshift)); + size += (1 << blevel); + bbitlen = size / (1 << blevel); + } + } + else if(((lshift + bbitlen) & 1) && ((*onbyte >> (lshift + bbitlen)) & 1)) { + *onbyte ^= ((uint64_t)1 << (lshift + bbitlen)); + size += (1 << blevel); + bbitlen = size / (1 << blevel); + } + //TODO clean up this part ------------------------------------------------------------- (above) + + if((!((size - 1) & size)) && (bbitlen != 1)) { + blevel = 63 - __builtin_clzl(size); + } + else { + if(bbitlen <= 1) { + *onbyte |= ((uint64_t)1 << lshift); + break; + } else if(bbitlen & 1) { //check me + size -= (1 << blevel); + *onbyte |= ((uint64_t)1 << (bbitlen + lshift)); + } + blevel++; + } + } +} + + +void *palloc(size_t size) { + bool self_alloc; + int min_blevel, blevel; + uint64_t bbit, unshifted_entry, threshold, bloc; //TODO move when you've confirmed casting stuff + uint64_t buddy_i, *ret, *bentry; + int itercount; + pmap_t *pmap = first_pmap; + + + if(size == 0) return 0; + if(size & 4095) { + size = DIV_ROUND_UP(size, 0x1000); + } + else { + size = size / 0x1000; + } + + //checking if pmap has been initilized; if not we've been called to self allocate + //the first buddy should never be allocated; that's where our pmap lives + if(pmap->buddy[pmap->max_buddy][0] & 1) { + self_alloc = true; + min_blevel = pmap->max_buddy; + } + else { + //log(size, 2) + self_alloc = false; + min_blevel = 63 - __builtin_clzl(size); + if(size & (size - 1)) min_blevel++; + if(min_blevel > MAX_BUDDY_ORDER - 1) return 0; + } + + for(blevel = min_blevel; blevel < MAX_BUDDY_ORDER; blevel++) { + for(pmap = first_pmap; pmap != 0; pmap = pmap->next) { + //pmap->mutex = true; + /** + if(!maps_transversed && get_set_mutex(&pmap->mutex)) { + //change get_coreid once we multithread + asm("mov al, 1\n" + "mov cl, %0\n" + "shl al, cl\n" + "lock or [%1], al\n" + ::"r"(get_coreid()), "m"(pmap->threads_searched) + :); + } + **/ + + for(buddy_i = 0; buddy_i < pmap->bsize[blevel]; buddy_i++) { + if(pmap->buddy[blevel][buddy_i] > (uint64_t)0) { //found buddy + bentry = &pmap->buddy[blevel][buddy_i]; + bbit = __builtin_ctzl(*bentry); + bloc = bbit; + + *bentry ^= (uint64_t)1 << bbit; + + ret = (((buddy_i * 64) + bbit) * (0x1000 << blevel)) + (void *)pmap; + + threshold = 0b11; + + itercount = 1; + for(blevel--; blevel >= 0; blevel--) { + bentry = pmap->buddy[blevel] + ((bentry - pmap->buddy[blevel + 1]) * 2); + itercount++; + if(bloc >= 32) bentry += 1; + bloc = (bloc * 2) & 63; // will be the amount we need to shift + bbit = ceil((float)size / (1 << blevel)); + + + unshifted_entry = ((uint64_t)1 << bbit) & threshold; + if(unshifted_entry) { + threshold = ((uint64_t)1 << (bbit * 2)) - 1; + } + else { + threshold = (threshold << 2) | threshold; + } + *bentry |= (unshifted_entry << bloc); + } + if(!self_alloc) bzero(ret, size * 0x1000); //TODO do we really need to bezero here? + return ret; + } + } + } + } + return 0; +} + + +//returns size of pages needed +size_t map_complete_physical() { + uint64_t total_mem; + unsigned int pdpe_cnt, pde_cnt, pde_max_i; + int zone_i, entry_i; + struct memory_table *zones = (void *)ZONE_MAP_PLOC; + + + page_table *pml4 = (page_table *)PAGEMAP_LOCATION; + page_table *pdpe = (page_table *)&_kernel_shared_zone_begin; + page_table *pde; + + for(zone_i = 0; zones[zone_i].length > 0; zone_i++); + total_mem = (uint64_t)zones[zone_i - 1].base + zones[zone_i - 1].length; + + pdpe_cnt = (total_mem + (0x40000000 - 1)) / 0x40000000; + + + entry_i = (PA_OFFSET >> 39) & 0x1ff; + pml4[entry_i].base_ptr = (uintptr_t)&_kernel_shared_zone_begin >> 12; + pml4[entry_i].read_write = 1; + pml4[entry_i].user = 0; + pml4[entry_i].size = 0; + pml4[entry_i].no_exec = 1; + pml4[entry_i].present = 1; + + if(huge_page_capable) { + for(int pdpe_i = 0; pdpe_i < pdpe_cnt; pdpe_i++) { + pdpe[pdpe_i].base_ptr = pdpe_i << 18; + pdpe[pdpe_i].read_write = 1; + pdpe[pdpe_i].user = 0; + pdpe[pdpe_i].size = 1; + pdpe[pdpe_i].no_exec = NX_capable; + pdpe[pdpe_i].present = 1; + } + return pdpe_cnt * 0x1000; + } + else { + pde_cnt = (total_mem + 0x100000) / 0x200000; + for(int pdpe_i = 0; pdpe_i < pdpe_cnt; pdpe_i++) { + pde = (page_table *)(&_kernel_shared_zone_begin + (pdpe_cnt * 0x1000) + (pdpe_i * 0x1000)); + + if((pdpe_i < pdpe_cnt - 1) || (!(pde_cnt & 511))) { + pde_max_i = 512; + } + else { + pde_max_i = pde_cnt & 511; + } + + pdpe[pdpe_i].base_ptr = (uintptr_t)pde >> 12; + pdpe[pdpe_i].read_write = 1; + pdpe[pdpe_i].user = 0; + pdpe[pdpe_i].size = 0; + pdpe[pdpe_i].no_exec = NX_capable; + pdpe[pdpe_i].present = 1; + + for(int pde_i = 0; pde_i < pde_max_i; pde_i++) { + pde[pde_i].base_ptr = ((pdpe_i << 9) + pde_i) << 9; + pde[pde_i].read_write = 1; + pde[pde_i].user = 0; + pde[pde_i].size = 1; + pde[pde_i].no_exec = NX_capable; + pde[pde_i].present = 1; + } + } + return (pdpe_cnt * 2) * 0x1000; + } +} + +pmap_t *init_pmap(size_t pagetable_size) { + pmap_t *pmap, *last_pmap; + struct memory_table *zones = (void *)ZONE_MAP; + int budorder, zone_i; + uint64_t pmap_size, pmap_bbitsize, zone_size; + bool first_pmap_i = true; + + + + for(zone_i = 0; zones[zone_i].length > 0; zone_i++) { + if((zones[zone_i].type == MEM_AVAILABLE) && (zones[zone_i].ACPI & 1) && + zones[zone_i].length >= (0x2000)) { + printf("found allocatable map at %p\n", zones[zone_i].base); + last_pmap = pmap; + if(zones[zone_i].base == (void *)0x100000) { + zone_size = zones[zone_i].length - (((uint64_t)&_kernel_shared_zone_begin - 0x100000) + pagetable_size); + pmap = PHYS_TO_VIRT((void *)&_kernel_shared_zone_begin + pagetable_size); + } + else { + zone_size = zones[zone_i].length; + pmap = PHYS_TO_VIRT(zones[zone_i].base); + } + if(first_pmap_i) { + pmap->next = NULL; + first_pmap_i = false; + } + else { + pmap->next = last_pmap; + } + + pmap->mutex = false; + + for(budorder = 0; budorder < MAX_BUDDY_ORDER; budorder++) { + pmap_bbitsize = zone_size / (0x1000 << budorder); + pmap->bsize[budorder] = DIV_ROUND_UP(pmap_bbitsize , 64); + if(budorder) { + pmap->buddy[budorder] = pmap->buddy[budorder - 1] + pmap->bsize[budorder - 1]; + } + else { + pmap->buddy[0] = (void *)pmap + sizeof(*pmap); + } + if(budorder < MAX_BUDDY_ORDER - 1) { + bzero(pmap->buddy[budorder], pmap->bsize[budorder] * 8); + if(pmap_bbitsize & 1) { + pmap->buddy[budorder][pmap->bsize[budorder] - 1] = + ((uint64_t)1 << ((pmap_bbitsize - 1) & 63)); + } + if(pmap_bbitsize == 1) { + pmap->max_buddy = budorder; + for(budorder++; budorder < MAX_BUDDY_ORDER; budorder++) { + pmap->buddy[budorder] = 0; + pmap->bsize[budorder] = 0; + } + break; + } + } + else { + pmap->max_buddy = MAX_BUDDY_ORDER - 1; + memset(pmap->buddy[budorder], UINT8_MAX, pmap->bsize[budorder] * 8); + if((pmap_bbitsize / 64) != (pmap->bsize[budorder])) { + pmap->buddy[budorder][pmap->bsize[budorder] - 1] = + (((uint64_t)1 << (pmap_bbitsize & 63)) - 1); + } + } + } + + pmap_size = (uint64_t)(pmap->buddy[pmap->max_buddy] + pmap->bsize[pmap->max_buddy]) - (uint64_t)pmap; + first_pmap = pmap; //we spoof palloc into allocating from the specific required pmap. + palloc(pmap_size); + } + } + return pmap; +} -- cgit v1.2.3