#define KAL_EXPORT_RAW_MUTEX_CMDS
#include <MemoryMgr.h>
#include <string.h>
#include "memmap.h"
#include "printf.h"
#include "boot.h"
#include "pace.h"
#include "sony.h"
#include "heap.h"
#include "ral.h"
#include "dal.h"
#include "kal.h"
#include "mpu.h"




#define RAL_NUM_MODULES_BELOW_R9	5	//dal, boot, ui, TAL on tapwave (reserved for future compat with zodiac), rePalmTab

struct DispatchTableStruct {
	void* dispatchTable;
	uint32_t moduleId;
};

struct BelowR9 {					//structure BELOW R9 for every module
	struct RalModuleDescr self;		//info on current module
	uint32_t useCount;				//ref count on module
	uint32_t gcMarker;				// "sometimes called lastDispatchTablPt" - not sure about this one
	uint32_t *(sysTables[RAL_NUM_MODULES_BELOW_R9]);		//DAL,Boot,UI tables
};

struct ModAmdcHeader {
	uint32_t	jumpToEntry;
	uint32_t	entryptJumptableOfst;
	uint16_t	modIdLo;	//mod id * 4
	uint16_t	modIdHi;	//mod id * 32
	uint32_t	revision;
	uint32_t	numExportedFuncs;
	uint32_t	dbType;		//unused
	uint32_t	dbCrid;		//unused
	uint32_t	rsrcType;	//unused
	uint16_t	rsrcId;
	uint16_t	flags;		//TBD
};

#define RAL_MODULE_FLAG_PATCHEABLE_MODULE		0x0001			//patcheable. If not set, module cannot be patched using RALPatchEntry
#define RAL_MODULE_FLAG_SYSTEM_MODULE			0x0002			//usually set only for DAL/BOOT/UI (and Zodiac's TAL). these are implicitly patcheable
#define RAL_MODULE_FLAG_UNKNOWN					0x0004			//seen sometimes, code seems to never check for it...


struct ModAmdiHeader {
	uint32_t	spaceBelowR9;	//should be 0x28 always and assumed as such (we actually assume more for TAL & rePalm tables)
	uint32_t	spaceAboveR9;	//should be >= 0x10, if no amdi resource, is assumed as such
	uint32_t	unknown2;
	uint32_t	unknown3;
	uint32_t	unknown4;
	uint32_t	unknown5;
	uint32_t	unknown6;
	uint32_t	unknown7;
	uint32_t	unknown8;
};

struct ModAmddHeader {
	uint32_t	unknown0;
	uint32_t	something;		//loaded first at mod load time
};

struct ModDependency {
	struct ModDependency *next;
	uint32_t me				: 15;
	uint32_t needed			: 15;
};

#define RAL_MODULE_REF_NO_INVALID			0xFFFFFFFFUL
#define RAL_MODULE_ID_INVALID				0xFFFFFFFFUL

#define MOD_TABLES_AFTER_R9					4		//for global r9
#define MOD_TAB_OFFSET_AFTER_R9				0x20

#ifdef SUPPORT_ZODIAC
	#define MAX_ENTRIES_DAL						0x180
	#define MAX_ENTRIES_TAL						0x40
	#define TAL_PTR								(mEntriesTal + 1)
#else
	#define MAX_ENTRIES_DAL						0x100
	#define TAL_PTR								NULL
#endif

#ifdef SUPPORT_OS_54
	#define MAX_ENTRIES_BOOT					0x3B0
#else
	#define MAX_ENTRIES_BOOT					0x38d
#endif
#define MAX_ENTRIES_UI						0x180
#define MAX_ENTRIES_REPALM					0x60


struct DormantAppWorld {
	struct BelowR9 br9;
	// <--- mRalCurrentAppDormantBR9 points here
	void *above[MOD_TABLES_AFTER_R9];
};

static struct DormantAppWorld mRalCurrentAppDormantBR9store = {};
#define mRalCurrentAppDormantBR9		(&mRalCurrentAppDormantBR9store.br9)
#define mRalCurrentAppDormantR9World	(mRalCurrentAppDormantBR9 + 1)


static uint32_t mModulesLock;

static void* mModulesInitialStore[] = {[RAL_MODULE_ID_BOOT] = 0};	//only needs to fit up to BOOT until we move it
static uint32_t* mModuleEntriesInitialStore[] = {[RAL_MODULE_ID_BOOT] = 0};

static void **mModules = mModulesInitialStore;						//we allocate space to store all modules so for us refNum is always module id
static uint32_t **mModuleEntries = mModuleEntriesInitialStore;		//each points to table of entries. entry index "0" is "num entries" in table, further N entries are actual entries, then there are N "jump" instructions used for RALLinkClient. those do not exist for DAL/Boot/UI

static uint32_t XRAM1 mEntriesDal[MAX_ENTRIES_DAL + 1] = {0,};		//in eachof these first entry is "num entries" next are pointers
static uint32_t XRAM1 mEntriesBoot[MAX_ENTRIES_BOOT + 1] = {0,};
static uint32_t *mEntriesUi = NULL;
#ifdef SUPPORT_ZODIAC
	static uint32_t XRAM1 mEntriesTal[MAX_ENTRIES_TAL + 1] = {0,};
#endif
static uint32_t XRAM2 mEntriesRePalm[MAX_ENTRIES_REPALM + 1] = {0,};
static bool mIsZodiac = false, mMemoryPivoted = false;
static struct ModDependency *mDeps = NULL;

static const void* mGlobR9tab[RAL_NUM_MODULES_BELOW_R9 + MOD_TABLES_AFTER_R9] = {
	mEntriesRePalm + 1, TAL_PTR, /* mEntriesUi + 1*/ NULL, mEntriesBoot + 1, mEntriesDal + 1,
	mModulesInitialStore + 0 * MOD_TAB_OFFSET_AFTER_R9, mModulesInitialStore + 1 * MOD_TAB_OFFSET_AFTER_R9,	//yes these point past the end. i am ok with it :)
	mModulesInitialStore + 2 * MOD_TAB_OFFSET_AFTER_R9, mModulesInitialStore + 3 * MOD_TAB_OFFSET_AFTER_R9};



static uint32_t ralResolveArmThunks(const void* armPtr)		//see if an ARM ptr is a thunk or a set of thunks, and if so, try to resove them as far as possible
{
	uint32_t addr = (uint32_t)armPtr;
	
	if (addr & 1)		//was not arm pointer? just return it
		return (uintptr_t)armPtr;
	
	while(1) {
		const uint32_t *instrs = (const uint32_t*)addr;
		
		if ((instrs[0] & 0xFF000000UL) == 0xEA000000UL) {		//jump - follow it
			
			addr = (((int32_t)(instrs[0] << 8)) >> 6) + addr + 8;
			continue;
		}
		
		if (	instrs[0] == 0xE59FC004 &&			//LDR R12, [Pc, #4]
					(
						instrs[1] == 0xE08CC00F ||		//ADD     R12, R12, PC
						instrs[1] == 0xE08FC00C			//ADD     R12, PC, R12
					) &&
					instrs[2] == 0xE12FFF1C				//BX R12
				)
		{
		
			// resolved to thumb addr!!
			addr = addr + 12 + instrs[3];
			break;
		}
		
		break;
		
	}
	
	logvst("RESOLVED 0x%08x -> 0x%08x\n", armPtr, addr);
	
	return addr;
}

static const void* ralResolveJumpTarget(const void* ptrToJumpInstr)	//understands arm and thumb jumps, generates proper brtanch targets for either
{
	uint32_t ret = 0, jumpInstrAddr = (uint32_t)ptrToJumpInstr, jumpInstr = *(uint32_t*)jumpInstrAddr;
	
	if ((jumpInstr & 0xFF000000UL) == 0xEA000000UL) {		//ARM mode
		
		ret = ralResolveArmThunks(ptrToJumpInstr);
	}
	else if ((jumpInstr & 0xD000F800UL) == 0x9000F000UL) {	//B.W
		
		if (!(jumpInstr & 0x00000400))
			jumpInstr ^= 0x28000000;
		
		ret += (((jumpInstr >> 16) & 0x7FF) <<  0);
		ret += (((jumpInstr >>  0) & 0x3FF) << 11);
		ret += (((jumpInstr >> 27) & 0x001) << 21);
		ret += (((jumpInstr >> 29) & 0x001) << 22);
		ret += (((jumpInstr >> 10) & 0x001) << 23);
		
		ret = (((int32_t)(ret << 8)) >> 7) + jumpInstrAddr + 4 + 1;
	}
	else if ((jumpInstr & 0xFFFFF800) == 0x46C0E000) {		//B.N + NOP (v6m does this)
		
		ret = (((int32_t)(jumpInstr << 21)) >> 20) + jumpInstrAddr + 4 + 1;
	}
	
	else {
		fatal("not a jump instr 0x%08x @ 0x%08x\n", jumpInstr, jumpInstrAddr);
	}
	
	return (const void*)ret;
}

static void ralRepalmUnimplFunc(void)
{
	fatal("Unimplemented rePalm func called from 0x%08x\n", __builtin_return_address(0)	);
}

static struct BelowR9* ralGetBelowR9Locked(uint32_t modId)
{
	uint32_t maxId = mMemoryPivoted ? RAL_MAX_MODULE_ID : RAL_MODULE_ID_BOOT + 1;
	
	return (modId < maxId && mModules[modId]) ? (((struct BelowR9*)(mModules[modId])) - 1) : NULL;
}

bool ralSetRePalmTabFunc(uint32_t idx, void* func)
{
	if (idx >= MAX_ENTRIES_REPALM)
		return false;
	
	mEntriesRePalm[idx + 1] = (uintptr_t)func;
	return true;
}

void ralMemMgrAvail(void)
{
	struct BelowR9 *br9;
	uint32_t i, j, sz;
	
	(void)impl_KALMutexReserve(mModulesLock, -1);

	if (mMemoryPivoted)
		fatal("Cannot pivot memory again\n");
	
	sz = sizeof(*mEntriesUi) * (MAX_ENTRIES_UI + 1);
	mEntriesUi = MemChunkNew(0, sz, 0x1200);
	if (!mEntriesUi)
		fatal("ral: Failed to alloc UI table\n");
	memset(mEntriesUi, 0, sz);
	
	sz = sizeof(*mModules) * RAL_MAX_MODULE_ID;
	mModules = MemChunkNew(0, sz, 0x1200);
	if (!mModules)
		fatal("ral: Failed to alloc modules table\n");
	memset(mModules, 0, sz);
	
	sz = sizeof(*mModuleEntries) * RAL_MAX_MODULE_ID;
	mModuleEntries = MemChunkNew(0, sz, 0x1200);
	if (!mModuleEntries)
		fatal("ral: Failed to alloc module entries table\n");
	memset(mModuleEntries, 0, sz);
	
	//copy what we have already
	for (i = 0; i < sizeof(mModulesInitialStore) / sizeof (*mModulesInitialStore); i++)
		mModules[i] = mModulesInitialStore[i];
	
	for (i = 0; i < sizeof(mModuleEntriesInitialStore) / sizeof (*mModuleEntriesInitialStore); i++)
		mModuleEntries[i] = mModuleEntriesInitialStore[i];
	
	//set up some more state that we now can
	mModuleEntries[RAL_MODULE_ID_UI] = mEntriesUi;
	#ifdef SUPPORT_ZODIAC
		mModuleEntries[RAL_MODULE_ID_TAL] = mEntriesTal;
	#endif

	mGlobR9tab[RAL_NUM_MODULES_BELOW_R9 - RAL_MODULE_ID_UI - 1] = mEntriesUi + 1;
	
	for (i = 0; i < MOD_TABLES_AFTER_R9; i++)
		mGlobR9tab[RAL_NUM_MODULES_BELOW_R9 + i] = mModules + i * MOD_TAB_OFFSET_AFTER_R9;

	//adjust existing modules
	for (i = 0; i < RAL_MAX_MODULE_ID; i++) {
		
		br9 = ralGetBelowR9Locked(i);
		if (!br9)
			continue;
		
		//git it a UI table pointer
		br9->sysTables[RAL_NUM_MODULES_BELOW_R9 - 1 - RAL_MODULE_ID_UI] = mEntriesUi + 1;
		
		//give new module table pointers
		for (j = 0; j < MOD_TABLES_AFTER_R9; j++)
			((void**)(br9 + 1))[j] = mModules + j * MOD_TAB_OFFSET_AFTER_R9;
	}

	//init dormant app BR9 world
	for (i = 0; i < RAL_NUM_MODULES_BELOW_R9; i++)
		mRalCurrentAppDormantBR9store.br9.sysTables[i] = (void*)mGlobR9tab[i];
	for (i = 0; i < MOD_TABLES_AFTER_R9; i++)
		mRalCurrentAppDormantBR9store.above[i] = (void*)mGlobR9tab[i + RAL_NUM_MODULES_BELOW_R9];
	
	mMemoryPivoted = true;

	(void)impl_KALMutexRelease(mModulesLock);
}

bool ralInit(void)		//init the ral. do this early as before this R9 points nowhere
{
	uint32_t i;
	
	asm volatile("mov r9, %0"::"r"(mGlobR9tab + RAL_NUM_MODULES_BELOW_R9));
	
	mModuleEntries[RAL_MODULE_ID_DAL] = mEntriesDal;
	mModuleEntries[RAL_MODULE_ID_BOOT] = mEntriesBoot;
	
	mEntriesRePalm[0] = MAX_ENTRIES_REPALM;
	for (i = 0; i < MAX_ENTRIES_REPALM; i++) {		//if someone called ralSetRePalmTabFunc() before we got here, honor that
		if(!mEntriesRePalm[i + 1])
			mEntriesRePalm[i + 1] = (uintptr_t)&ralRepalmUnimplFunc;
	}
	
	return errNone == impl_KALMutexCreate(&mModulesLock, CREATE_4CC('R','a','l','L'));
}

uint32_t __attribute__((used)) ralSetSafeR9(void)
{
	uint32_t oldR9;
	
	asm volatile(
		"	mov %0, r9	\n\t"
		"	mov r9, %1	\n\t"
		:"=&r"(oldR9)
		:"r"(mGlobR9tab + RAL_NUM_MODULES_BELOW_R9)
	);
	
	return oldR9;
}

void ralRestoreR9(uint32_t state)
{
	asm volatile("mov r9, %0"::"r"(state));
}

static uint32_t ralReadUnalignedBE32(const void* ptr)
{
	const uint8_t *v = (const uint8_t*)ptr;
	uint32_t i, r = 0;
	
	for (i = 0; i < 4; i++)
		r = (r << 8) + *v++;
	
	return r;
}

static uint32_t ralReadUnalignedLE32(const void* ptr)
{
	return __builtin_bswap32(ralReadUnalignedBE32(ptr));
}

static uint32_t ralGetModuleId(const void *codePtr)
{
	const struct ModAmdcHeader *hdr = (const struct ModAmdcHeader*)codePtr;
	
	return (hdr->modIdHi / 4) * 32 + hdr->modIdLo / 4;
}

static uint32_t ralLookupLoadedModuleLocked(uint32_t refNo)
{
	//look up a module id given a module reference number (for us this is trivial since they are equal)
	//this func is here to simplify the future transition to a world where they will not be
	
	return refNo;
}

static uint32_t ralGetLoadedModuleReferenceNumberLocked(const struct BelowR9 *br9)
{
	//look up a module reference number given a br9. For us this is trivial since reference number == modId
	//this func is here to simplify the future transition to a world where they will not be
	
	return ralGetModuleId(br9->self.codePtr);
}

static uint32_t ralFindModuleRefNoLocked(const struct RalModuleDescr *descr)	//if module is loaded, find its reference number. if not, return RAL_MODULE_REF_NO_INVALID
{
	uint32_t i, ret = RAL_MODULE_REF_NO_INVALID, maxId = mMemoryPivoted ? RAL_MAX_MODULE_ID : RAL_MODULE_ID_BOOT + 1;
	
	for (i = 0; i < maxId; i++){
		
		if (mModules[i] && ralGetBelowR9Locked(i)->self.codePtr == descr->codePtr) {
			ret = i;
			break;
		}
	}
	
	return ret;
}

//return local id of the db if yes, else 0
//assumes Dm is alive
static LocalID ralLogModuleNameHelperCheckDbForOurCodeRes(DmOpenRef openDb, const void *ptr)
{
	uint16_t potentialResIdx;
	MemHandle resH;
	LocalID dbh;
	void *resP;
	
	potentialResIdx = DmFindResourceType(openDb, CREATE_4CC('a','m','d','c'), 0);
	if (potentialResIdx != 0xffff){
		resH = DmGetResourceIndex(openDb, potentialResIdx);
		if (resH) {
			resP = MemHandleLock(resH);
			if (resP == ptr) {
				if (DmOpenDatabaseInfo(openDb, &dbh, NULL, NULL, NULL))
					dbh = 0;
				
				return dbh;
			}
			MemHandleUnlock(resH);
			DmReleaseResource(resH);
		}
	}
	
	return 0;
}

//log module name (after finding it)
static void ralLogModuleName(const char *whomFor, const struct RalModuleDescr *descr)
{
	char name[33] = "<UNKNOWN MODULE>";
	
	if (LOG_DEBUG) {
	
		if (dalGetInitStage() < DAL_INIT_STAGE_MEM_AND_DM) {	//at this point we can only be called with a rom db. For them dbH is pointer to Db header, where the name is the first field. but it may not be terminated
			
			if (descr->dbH)
				memcpy(name, (void*)descr->dbH, 32);
		}
		else {													//Dm is alive and we can do more things
				
			LocalID dbh = descr->dbH;
			DmOpenRef openDb = NULL;
	
			if (!dbh) {
				
				logt("No DBH given to ralLogModuleName. Going to do it the slow way\n");
				
				while ((openDb = DmNextOpenResDatabase(openDb)) && !dbh)
					dbh = ralLogModuleNameHelperCheckDbForOurCodeRes(openDb, descr->codePtr);
			}
			
			if (!dbh) {
				const struct ModAmdcHeader *amdc = (const struct ModAmdcHeader*)descr->codePtr;
				
				logt("Still no DBH known to ralLogModuleName. Trying to believe the code resource\n");
			
				openDb = DmOpenDatabaseByTypeCreator(amdc->dbType, amdc->dbCrid, dmModeReadOnly);
				if (openDb) {
					dbh = ralLogModuleNameHelperCheckDbForOurCodeRes(openDb, descr->codePtr);
					DmCloseDatabase(openDb);
				}
			}
			
			if (!dbh) {
				uint32_t i, n = DmNumDatabases();
				
				logt("Still no DBH known to ralLogModuleName. Going to do it the even slower way\n");
				
				for (i = 0; i < n && !dbh; i++) {
					
					openDb = DmOpenDatabase(DmGetDatabase(i), dmModeReadOnly);
					if (openDb) {
						dbh = ralLogModuleNameHelperCheckDbForOurCodeRes(openDb, descr->codePtr);
						DmCloseDatabase(openDb);
					}
				}
			}
			
			if (!dbh)
				fatal("WTF: no DBH known to ralLogModuleName. Giving up\n");
				
			if (DmDatabaseInfo(dbh, name, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL))
				loge("ralLogModuleName: cannot get db info\n");
		}
		
		logd("Module name passed to %s: '%s'\n", whomFor, name);
	}
}

static uint32_t ralFigureOutModuleMemoryNeeds(const struct RalModuleDescr *descr, uint32_t *memNeededBelowR9P, uint32_t *memNeededAboveR9P, uint32_t *memNeededForJumpAndEntryTablesP, uint32_t extraEntryptSlots)
{
	const struct ModAmdcHeader *amdc = (const struct ModAmdcHeader*)descr->codePtr;
	const struct ModAmdiHeader *info = (const struct ModAmdiHeader*)descr->infoPtr;
	uint32_t nEntries = 0, jumptableEntries = 0, jumptableBytes = 0;
	uint32_t above = MOD_TABLES_AFTER_R9 * sizeof(uint32_t);
	uint32_t below = sizeof(struct BelowR9);
	
	switch (ralGetModuleId(descr->codePtr)) {
		case RAL_MODULE_ID_DAL:
		case RAL_MODULE_ID_BOOT:
		case RAL_MODULE_ID_UI:
	#ifdef SUPPORT_ZODIAC
		case RAL_MODULE_ID_TAL:
	#endif
			//these do not require any memory specially for addresses as we store them in mEntriesDal, mEntriesBoot, mEntriesUi, mEntriesTal respectively
			break;
		default:
			if (extraEntryptSlots)
				fatal("Allocating non-preloaded modules with extra jumptable space not supported\n");
			//these do require space for addresses AND RalLinkClient jumptables
			nEntries = amdc->numExportedFuncs;
			jumptableEntries = 1 /* num_entries entry */ + nEntries /* addresses */ + nEntries /* arm jump instructions */;
			jumptableBytes = jumptableEntries * sizeof(uint32_t);
			break;
	}
	
	if (info) {
		uint32_t infoBelow = ralReadUnalignedBE32(&info->spaceBelowR9);
		uint32_t infoAbove = ralReadUnalignedBE32(&info->spaceAboveR9);
		
		if (infoBelow > below)
			below = infoBelow;
		if (infoAbove > above)
			above = infoAbove;
	}
	
	if (below < sizeof(struct BelowR9)) {
		logd("BelowR9 area size not as expected: %u bytes\n", below);
		below = sizeof(struct BelowR9);
	}
	
	above = (above + sizeof(uint32_t) - 1) / sizeof(uint32_t) * sizeof(uint32_t);			//round "above" size up to uint32_t size since we place jumptable after it and it needs to be uint32-t aligned
	
	if (memNeededBelowR9P)
		*memNeededBelowR9P = below;
	if (memNeededAboveR9P)
		*memNeededAboveR9P = above;
	if (memNeededForJumpAndEntryTablesP)
		*memNeededForJumpAndEntryTablesP = jumptableBytes;

	return above + below + jumptableBytes;
}

//src is amdd + 4, returns src after we've used as much as we need
const uint8_t* ralUnpackInitedData(const uint8_t *srcP, void *dstEnd)
{
	const uint8_t *src = (const uint8_t*)srcP;
	uint32_t i, j, v;
	uint8_t *dst;
	
	for (i = 0; i < 3; i++) {
		
		bool finishInnerLoop = false;
		
		//it points somewhere
		dst = ((uint8_t*)dstEnd) + (int32_t)ralReadUnalignedBE32(src);
		src += 4;
		
		while (!finishInnerLoop) {
			
			v = *src++;
			if (v & 0x80) {
				
				v = (v & 0x7F) + 1;
				while(v--)
					*dst++ = *src++;
			}
			else if (v & 0x40) {
				
				v = (v & 0x3F) + 1;
				while(v--)
					*dst++ = 0;
			}
			else if (v & 0x20) {
				
				uint8_t valToWrite = *src++;
				v = (v & 0x1F) + 2;
				
				while(v--)
					*dst++ = valToWrite;
			}
			else if (v & 0x10) {
				
				v = (v & 0x0F) + 1;
				
				while(v--)
					*dst++ = 0xFF;
			}
			else switch (v) {
				case 0:			//end of this chunk.
					finishInnerLoop = true;
					break;
				case 1:
					for (j = 0; j < 4; j++)
						*dst++ = 0;
					for (j = 0; j < 2; j++)
						*dst++ = 0xFF;
					for (j = 0; j < 2; j++)
						*dst++ = *src++;
					break;
				case 2:
					for (j = 0; j < 4; j++)
						*dst++ = 0;
					*dst++ = 0xFF;
					for (j = 0; j < 3; j++)
						*dst++ = *src++;
					break;
				case 3:
					*dst++ = 0xA9;
					*dst++ = 0xF0;
					for (j = 0; j < 2; j++)
						*dst++ = 0;
					for (j = 0; j < 2; j++)
						*dst++ = *src++;
					*dst++ = 0;
					*dst++ = *src++;
					break;
				case 4:
					*dst++ = 0xA9;
					*dst++ = 0xF0;
					*dst++ = 0;
					for (j = 0; j < 3; j++)
						*dst++ = *src++;
					*dst++ = 0;
					*dst++ = *src++;
					break;
				default:
					fatal("Invalid packed data byte 0x%02X at 0x%08x\n", v, (unsigned)v);
					break;
			}
		}
	}
	
	return src;
}

const uint8_t* ralPerformRelocsZodiac(const uint8_t *src, void *dst, const void *relocAgainst)
{
	uint32_t nRelocs = ralReadUnalignedLE32(src);
	
	logt("%u relocs\n", nRelocs);
	src += sizeof(uint32_t);
	
	while (nRelocs--) {
		
		*(uint32_t*)(((uintptr_t)dst) + ralReadUnalignedLE32(src)) += (uintptr_t)relocAgainst;
		src += sizeof(uint32_t);
	}
	
	return src;
}

//returns pointer to first unused source byte. this code is correct despite looking wrong
const uint8_t* ralPerformRelocsNormal(const uint8_t *srcP, void *dst, const void *relocAgainst)
{
	const uint8_t *src = ((const uint8_t*)srcP) + 4;
	uint32_t nRelocs, ofst = 0, v, j;
	
	nRelocs = ralReadUnalignedLE32(srcP);
	logt("%u relocs\n", nRelocs);
	while(nRelocs--) {
		
		v = *src++;
	
		logt("byte is 0x%02x\n", v);	
		
		//read offset
		if (v & 0x80) {				//u7 offset
			
			//yes, unsigned as per zire 31 dal
			v = (uint8_t)(v * 2);
		}
		else if (v & 0x40) {		//s14 offset >= 0x80
			
			v = (((uint32_t)*src++) << 8) | v;
			v <<= 18;
			v = ((int32_t)v) >> 17;
		}
		else {						//s30 offset, but not able to represent some numbers (yes)
			
			v = (((uint32_t)src[2]) << 24) | (((uint32_t)src[1]) << 16) | (((uint32_t)src[0]) << 8) | v;
			src += 3;
			v <<= 2;
			v = ((int32_t)v) >> 1;
		}
		
		logt("val is 0x%08x\n", v);
		logt("ofst was 0x%08x, now is 0x%08x\n", ofst, ofst + v);
		
		//apply offset
		ofst += v;
		
		//apply reloc
		(*(uint32_t*)(((uintptr_t)dst) + ofst)) += (uintptr_t)relocAgainst;
	}
	
	return src;
}

static void ralDependencyAddLocked(uint32_t self, uint32_t needed)
{
	struct ModDependency *dep;
	
	dep = (struct ModDependency*)kheapAlloc(sizeof(struct ModDependency));
	if (!dep) {
		fatal("Failed to allocate dep struct\n");
		return;
	}
	
	logt("dep: recording module 0x%04x auto-loading module 0x%04x\n", self, needed);
	dep->next = mDeps;
	dep->me = self;
	dep->needed = needed;
	
	mDeps = dep;
}

//called when module is ultimately unloaded
//one would think that this needs to be recursive, but it does not.
//we'll just mark for unloading that first levle of things. once they are unloaded,
//we'll be back here to handle the next level
static void ralDependencyNotifyUnloadLocked(uint32_t mod)
{
	struct ModDependency *dep, *prev = NULL;
	struct BelowR9 *br9;

	for (dep = mDeps; dep;) {
		
		struct ModDependency *t;
		
		if (dep->me != mod) {
			prev = dep;
			dep = dep->next;
			continue;
		}
		
		br9 = ralGetBelowR9Locked(dep->needed);
		if (!br9) {
			fatal("Cannot find data for module to be auto-unloaded\n");
			return;
		}
		
		if (!br9->useCount) {
			fatal("Already zero refcnt on module to be auto-unloaded\n");
			return;
		}
		
		br9->useCount--;
		
		logt("dep: auto-unloading module 0x%04x (was auto-loaded by 0x%04x). refcnt now %u\n", dep->needed, dep->me, br9->useCount);
		t = dep->next;
		kheapFree(dep);
		dep = t;
		if (prev)
			prev->next = dep;
		else
			mDeps = dep;
	}
}

static bool ralDependencyCheckForOneCycleRecurse(uint32_t mod)
{
	struct BelowR9 *br9 = ralGetBelowR9Locked(mod);
	struct ModDependency *dep;
	
	if (!br9) {
		fatal("loaded module has no br9?\n");
		return false;
	}
	
	if (br9->gcMarker) {
		br9->useCount--;
		fatal("dep: cycle found at 0x%04x (this code path is UNTESTED)\n", mod);
		return true;
	}
	
	br9->gcMarker = 1;
	for (dep = mDeps; dep; dep = dep->next) {
		
		if (dep->me != mod)
			continue;
		
		if (ralDependencyCheckForOneCycleRecurse(dep->needed)) {
			br9->useCount--;
			logw("dep: cycle continues to 0x%04x (this code path is UNTESTED)\n", dep->needed);
			return true;
		}
	}
	br9->gcMarker = 0;
	return false;
}

//find one cycle or none. return true if one was found and eliminated
static bool ralDependencyCheckForOneCycleLocked(uint32_t modId)
{
	struct BelowR9 *br9;
	uint32_t i;
	
	//mark all modules as zero
	for (i = 0; i < RAL_MAX_MODULE_ID; i++) {
		br9 = ralGetBelowR9Locked(i);
		if (!br9)
			continue;
		br9->gcMarker = 0;
	}
	
	//BFS for cycle search
	ralDependencyCheckForOneCycleRecurse(modId);
	
	//XXXX: TODO: we do not handle this really
	
	return false;
}

//called when a module is "unloaded" but not to refcnt of 0
static void ralDependencyCheckForCyclesLocked(uint32_t modId)
{
	while (ralDependencyCheckForOneCycleLocked(modId));
}

Err DALEXPORT impl_RALGetModuleInfo(const struct RalModuleDescr *descr, struct RalModuleInfo* nfo)
{
	const struct ModAmdcHeader *amdc = (const struct ModAmdcHeader*)descr->codePtr;
	uint32_t modId = ralGetModuleId(descr->codePtr);
	
	logt("%s\n", __func__);
	ralLogModuleName(__func__, descr);

	nfo->revision = amdc->revision;
	nfo->numEntrypts = amdc->numExportedFuncs;
	nfo->isApp = (modId == RAL_MODULE_ID_APP) ? 1 : 0;
	
	(void)impl_KALMutexReserve(mModulesLock, -1);
	nfo->refNo = ralFindModuleRefNoLocked(descr);
	(void)impl_KALMutexRelease(mModulesLock);
	
	if (nfo->refNo != RAL_MODULE_REF_NO_INVALID)	//if already loaded, we need no more memory
		nfo->neededMem = 0;
	else											//if not loaded ,we need ot figure out how much memory it needs. do so
		nfo->neededMem = ralFigureOutModuleMemoryNeeds(descr, NULL, NULL, NULL, 0);

	return errNone;
}

static Err pmSystemLibPaceDispatchPatch(EmulStateRef ref, void* param, uint32_t trapNum)
{
	if (!trapNum)
		((const char**)param)[5] = "PmSystemLib-PmSy";
	else if (trapNum != 1) {
	
		if (trapNum == 0xa805) {
			
			logw("PmSystemLibPatch: Pretending to write tokens\n");
			PceSet68KInt16ReturnResult(ref, -1);
			logw("PmSystemLibPatch: Pretended to write tokens\n");
			
			return errNone;
		}
		if (trapNum == 0xa801 || trapNum == 0xa802 || trapNum == 0xa821 /*PmSysScreenFrame */) {		//open and close are allowed
			
			PceSet68KInt16ReturnResult(ref, errNone);
			return errNone;
		}
	
		fatal("PmSystemLib: trap 0x%04x\n", trapNum);
	}
	
	return errNone;
}

static void pmSystemLibReplacementFuncForAllEntrypts(uint32_t num)
{
	fatal("PmSystemLib: func %u called\n", num);
}

static uint32_t pmSystemLibMainEntryptPatch(uint16_t cmd, void* cmdPBP, uint16_t flags)
{
	if (cmd == RAL_CMD_GET_PACE_ENTRY)
		*(void**)cmdPBP = (void*)pmSystemLibPaceDispatchPatch;
	
	return errNone;
}

static Err pmSystemLibReplacementFuncRetErr(void)
{
	return sysErrNotAllowed;
}

static Err pmSystemLibReplacementFuncRetNoError(void)
{
	return errNone;
}

static void ralPreLoadPatchLibrary(const struct RalModuleDescr *descr, uint32_t* entryptAddrs, uint32_t nEntries)
{
	const struct ModAmdcHeader *amdc = (const struct ModAmdcHeader*)descr->codePtr;
	uint32_t i;
	
	
	if (amdc->dbType == CREATE_4CC('l','i','b','r') && amdc->dbCrid == CREATE_4CC('P','m','S','y')) {	//leaky but who cares :)
		
		//this shit is complicated
		
		static const uint32_t patches[] = {		// 0x30 times this:		<< MOV R0, #i, B.N common  >>, for i == 0 .. 0x2f
			0xE05D2000, 0xE05B2001, 0xE0592002, 0xE0572003, 0xE0552004, 0xE0532005, 0xE0512006, 0xE04F2007,
			0xE04D2008, 0xE04B2009, 0xE049200A, 0xE047200B, 0xE045200C, 0xE043200D, 0xE041200E, 0xE03F200F,
			0xE03D2010, 0xE03B2011, 0xE0392012, 0xE0372013, 0xE0352014, 0xE0332015, 0xE0312016, 0xE02F2017,
			0xE02D2018, 0xE02B2019, 0xE029201A, 0xE027201B, 0xE025201C, 0xE023201D, 0xE021201E, 0xE01F201F,
			0xE01D2020, 0xE01B2021, 0xE0192022, 0xE0172023, 0xE0152024, 0xE0132025, 0xE0112026, 0xE00F2027,
			0xE00D2028, 0xE00B2029, 0xE009202A, 0xE007202B, 0xE005202C, 0xE003202D, 0xE001202E, 0xE7FF202F, 
			//common:
			0x47104a00,// LDR R2, . + 4, bx r2
			(uint32_t)&pmSystemLibReplacementFuncForAllEntrypts
		};
		
		logi("Patching PmSystemLib\n");
		
		if (nEntries > (sizeof(patches) - 8) / sizeof(uint32_t))
			fatal("PmSystemLib has too many entry points\n");
		
		for (i = 0; i < nEntries; i++)
			entryptAddrs[i] = 1 /* thumb */ + (uint32_t)(patches + i);
		
		entryptAddrs[12] = (uint32_t)&pmSystemLibReplacementFuncRetErr;
		entryptAddrs[13] = (uint32_t)&pmSystemLibReplacementFuncRetErr;
		entryptAddrs[21] = (uint32_t)&pmSystemLibReplacementFuncRetNoError;	// PmSysScreenFrame
		
		return;
	}

	//should be last
	sonyPatchLibEntrypts(amdc->dbType, amdc->dbCrid, entryptAddrs, nEntries);
}

static void ralModifyLibEntryPr(const struct RalModuleDescr *descr, ModuleEntryPoint* entryPt)
{
	const struct ModAmdcHeader *amdc = (const struct ModAmdcHeader*)descr->codePtr;
	
#ifdef SUPPORT_OS_54
	if (amdc->dbType == CREATE_4CC('l','i','b','r') && amdc->dbCrid == CREATE_4CC('P','m','S','y')) {
		
		*entryPt = pmSystemLibMainEntryptPatch;
	}
#endif

#ifdef SONY_SUPPORT_ENABLED
	//should be last
	sonyPatchLibMainEntry(amdc->dbType, amdc->dbCrid, entryPt);
#endif
}

static bool ralPreLoadCheckPermittedLibrary(const struct RalModuleDescr *descr)
{
	const struct ModAmdcHeader *amdc = (const struct ModAmdcHeader*)descr->codePtr;
	
	if ((amdc->flags & RAL_AMDC_FLAGS_MASK_REPALM) == RAL_AMDC_FLAGS_VAL_REPALM)		//ours always allowed
		return true;
	
	if (amdc->dbType == CREATE_4CC('l','i','b','s')) {
		switch (amdc->dbCrid) {
			case CREATE_4CC('N','F','S','D'):
				logw("Refusing to load %s slot driver\n", "NVFS");
				break;
			
			case CREATE_4CC('T','F','F','S'):
				logw("Refusing to load %s slot driver\n", "TFFS");
				break;
			
			case CREATE_4CC('s','d','s','d'):
				logw("Refusing to load %s slot driver\n", "SD");
				break;
			
			case CREATE_4CC('S','l','M','s'):
				logw("Refusing to load %s slot driver\n", "MS");
				break;
			
			case CREATE_4CC('p','n','p','s'):
				logw("Refusing to load %s slot driver\n", "SDIO");
				break;
			
			case CREATE_4CC('S','l','C','f'):
				logw("Refusing to load %s slot driver\n", "CF");
				break;
			
			case CREATE_4CC('t','w','S','0'):
				logw("Allowing to load %s slot driver\n", "TapWave Internal");
				return true;
			
			default:
				logw("Refusing to load some other unknown slot driver: " FMT_4CC "\n", CNV_4CC(amdc->dbCrid));
				break;
		}
		return false;
	}
	else if (amdc->dbType == CREATE_4CC('l','i','b','r') && amdc->dbCrid == CREATE_4CC('N','a','n','d'))
		logw("Refusing to load SonyUX NAND driver\n");
	else if (amdc->dbType == CREATE_4CC('l','i','b','f') && amdc->dbCrid == CREATE_4CC('N','F','F','S'))
		logw("Refusing to load NVFS fs driver\n");
	else if (amdc->dbType == CREATE_4CC('a','e','x','t') && amdc->dbCrid == CREATE_4CC('b','t','s','e'))
		logw("Refusing to load Bluetooth Transport driver\n");
	else if (amdc->dbType == CREATE_4CC('a','e','x','t') && amdc->dbCrid == CREATE_4CC('m','t','_','e'))
		logw("Refusing to load MfgTest driver\n");
	else if (amdc->dbType == CREATE_4CC('v','d','r','v'))
		logw("Refusing to load serial port drivers driver\n");
	else if (sonyDenyLibLoad(amdc->dbType, amdc->dbCrid))
		logw("Refusing to load sony-related lib (" FMT_4CC "," FMT_4CC ")\n", CNV_4CC(amdc->dbType), CNV_4CC(amdc->dbCrid));
	else if (amdc->dbType == CREATE_4CC('l','i','b','r') && amdc->dbCrid == CREATE_4CC('T','F','F','S'))
		logw("Refusing to load TFFS slot driver\n");
	else if (amdc->dbType == CREATE_4CC('l','i','b','r') && amdc->dbCrid == CREATE_4CC('s','d','s','d'))
		logw("Refusing to load old SD slot driver\n");
	else
		return true;
	
	return false;
}

void DALEXPORT impl_RALSetA5(uintptr_t val)
{
	//the semantics of thi func are weird to say the least.
	// Yes the following code *IS* correct
	
	logt("%s(0x%08x)\n", __func__, val);
	
	if (!mMemoryPivoted)
		fatal("%s cannot be called before we pivot\n", __func__);
	
	(void)impl_KALMutexReserve(mModulesLock, -1);
	
	if (val & 1)
		mModules[RAL_MODULE_ID_APP] = (void*)mRalCurrentAppDormantR9World;
	else
		mModules[RAL_MODULE_ID_APP] = (void*)val;
	
	(void)impl_KALMutexRelease(mModulesLock);
}

Err DALEXPORT impl_RALGetStaticBase(uint32_t refNo, void **baseP)
{
	logt("%s(0x%08x)\n", __func__, refNo);
	
	if (!mMemoryPivoted)
		fatal("%s cannot be called before we pivot\n", __func__);
	
	if (refNo >= RAL_MAX_MODULE_ID)
		return RAL_ERROR_INTERNAL_ERR;

	(void)impl_KALMutexReserve(mModulesLock, -1);
	if (baseP)
		*baseP = (void*)mModules[refNo];
	(void)impl_KALMutexRelease(mModulesLock);
	
	return errNone;
}

//load a module, optionalyl make dispatch table bigger by a given number of slots
Err ralPrvLoadModuleEx(const struct RalModuleDescr *descr, void* dataArea, const void **entryPtAddrP, uint32_t *refNoP, uint32_t wantedMinEntries)
{
	uint32_t i, spaceBelowR9, spaceAboveR9 = 0, spaceJumpTable, spaceTotal, nEntries, nRealEntries;
	const struct ModAmdcHeader *amdc = (const struct ModAmdcHeader*)descr->codePtr;
	uint32_t modId, extraEntryptSlots = 0, refNo = RAL_MODULE_REF_NO_INVALID;
	uint32_t *nEntriesStorePtr, *addrTable, *ralLinkClientJumptable;
	Err retErr = RAL_ERROR_INTERNAL_ERR;
	ModuleEntryPoint moduleMainEntrypt;
	bool callLoadedEntryPt = false;
	uint8_t *dataAboveR9;
	struct BelowR9 *br9;
		
		
	logt("%s() from 0x%08x. DESCR: {dbH:0x%08x code:0x%08x (->0x%08x) data:0x%08x+0x%x info:0x%08x}\n",
				__func__, __builtin_return_address(0), descr->dbH, descr->codePtr, dalGetInitStage() >= DAL_INIT_STAGE_MEM_AND_DM ? ((uintptr_t)descr->codePtr) + MemPtrSize((void*)descr->codePtr) : 0,
				descr->initedDataPtr, descr->initedDataSz, descr->infoPtr);
	ralLogModuleName(__func__, descr);
	
	modId = ralGetModuleId(descr->codePtr);
	logd("Module ID: 0x%03x\n", modId);
	
	if (!mMemoryPivoted && modId != RAL_MODULE_ID_DAL && modId != RAL_MODULE_ID_BOOT)
		fatal("Cannot load any more modules before memory pivot\n");
	
	if (modId == 3 && amdc->dbType == CREATE_4CC('a','e','x','o') && amdc->dbCrid == CREATE_4CC('t','w','O','S')) {
		
		#ifndef SUPPORT_ZODIAC
			fatal("Zodiac TAL detected but Zodiac support is disabled!\n");
		#endif
		
		logi("Zodiac TAL detected. Switching to zodiac method of module data handling\n");
	
		mIsZodiac = true;
	}
	
	if (modId >= RAL_MAX_MODULE_ID) {
		fatal("Module ID 0x%03x is too high\n", modId);
		return RAL_ERROR_INVALID_MODULE_ID;
	}
	
	(void)impl_KALMutexReserve(mModulesLock, -1);
	
	//see if it is already loaded, and if so, just up the ref count
	br9 = ralGetBelowR9Locked(modId);
	
	if (RAL_MODULE_REF_NO_INVALID != ralFindModuleRefNoLocked(descr)) {
	
		if (!br9)
			fatal("loaded module MUST have an R9-world!\n");
	
		logt("re-loading existing module - just upping the refnt\n");
		br9->useCount++;
		retErr = errNone;
		
		//grab ref no
		refNo = ralGetLoadedModuleReferenceNumberLocked(br9);
	}
	
	//the app module ID is handled specially
	else if (modId == RAL_MODULE_ID_APP && br9 == mRalCurrentAppDormantBR9) {
		
		logd("App launch, no globals. br9=0x%08x\n", br9);
		
		//another app is running and its world was replaced with the dormant one
		// to hide its globals from interference of a sub call, so load new app with no globals!
	
		refNo = RAL_REF_NO_NO_GLOBALS;
		retErr = RAL_ERROR_NO_GLOBALS;
	}
	else if (modId == RAL_MODULE_ID_APP && br9) {
			
		loge("Refusing to load app over another's globals not hidden by dormancy (0x%08x)\n", br9);
		retErr = RAL_ERROR_INTERNAL_ERR;
		goto out_failure;
	}
	else {
		
		if (!ralPreLoadCheckPermittedLibrary(descr)) {
			retErr = RAL_ERROR_INTERNAL_ERR;
			goto out_failure;
		}

		//find how much memory we'll use. while we do assume caller provided the memory, we still need to know how to subdivide it
		spaceTotal = ralFigureOutModuleMemoryNeeds(descr, &spaceBelowR9, &spaceAboveR9, &spaceJumpTable, extraEntryptSlots);
		
		//zero the memory
		memset(dataArea, 0, spaceTotal);
	
		//calculate where everything is
		dataAboveR9 = ((uint8_t*)dataArea) + spaceBelowR9;
		addrTable = (uint32_t*)(dataAboveR9 + spaceAboveR9);
		br9 = ((struct BelowR9*)dataAboveR9) - 1;
		
		//unpack and reloc inited data as needed
		if (descr->initedDataPtr) {
			const uint8_t *src = ((const uint8_t*)descr->initedDataPtr) + sizeof(uint32_t);
			const uint8_t *srcEnd = src + descr->initedDataSz;
			uint8_t *dst = dataAboveR9 + spaceAboveR9;
			
			src = ralUnpackInitedData(src, dst);
			if (mIsZodiac) {
				
				if (src < srcEnd)		//we might have data-to-data relocs to process. if so, process them
					src = ralPerformRelocsZodiac(src, dataAboveR9, dst - *(uint32_t*)descr->initedDataPtr);
				
				if (src < srcEnd)		//we might have data-to-code relocs to process. if so, process them
					src = ralPerformRelocsZodiac(src, dataAboveR9, descr->codePtr);
			}
			else {
			
				if (src < srcEnd)		//we might have data-to-data relocs to process. if so, process them
					src = ralPerformRelocsNormal(src, dst, dst);
				
				if (src < srcEnd)		//we might have data-to-code relocs to process. if so, process them
					src = ralPerformRelocsNormal(src, dst, descr->codePtr);
			}
			
			if (src != srcEnd)
				logt("Not all inited data used in module init, %d bytes left\n", (int)(srcEnd - src));
		}
		
		//fill in the below-r9 data
		br9->self = *descr;
		br9->useCount = 1;
		br9->sysTables[0] = mEntriesRePalm + 1;
		br9->sysTables[RAL_NUM_MODULES_BELOW_R9 - 1 - RAL_MODULE_ID_TAL] = TAL_PTR;
		br9->sysTables[RAL_NUM_MODULES_BELOW_R9 - 1 - RAL_MODULE_ID_UI] = mEntriesUi ? (mEntriesUi + 1) : NULL;
		br9->sysTables[RAL_NUM_MODULES_BELOW_R9 - 1 - RAL_MODULE_ID_BOOT] = mEntriesBoot + 1;
		br9->sysTables[RAL_NUM_MODULES_BELOW_R9 - 1 - RAL_MODULE_ID_DAL] = mEntriesDal + 1;
		
		//init the mandatory 4 words above r9 (done after data & reloc processing since they may clear them)
		for (i = 0; i < MOD_TABLES_AFTER_R9; i++)
			((void**)dataAboveR9)[i] = mModules + i * MOD_TAB_OFFSET_AFTER_R9;
		
		//calculate where we'll store "num entries", addresses, and RalLinkClient jumptable entries, with deference to special modules
		nRealEntries = amdc->numExportedFuncs;
		if (nRealEntries < wantedMinEntries)
			extraEntryptSlots = wantedMinEntries - nRealEntries;
		nEntries = nRealEntries + extraEntryptSlots;
		switch (modId) {
			case RAL_MODULE_ID_DAL:
				if (nEntries > MAX_ENTRIES_DAL)	 {
					fatal("Too many entries in DAL\n");
					retErr = RAL_ERROR_TOO_MANY_ENTRIES;
					goto out_failure;
				}
				nEntriesStorePtr = mEntriesDal;
				addrTable = mEntriesDal + 1;
				ralLinkClientJumptable = NULL;
				break;
			case RAL_MODULE_ID_BOOT:
				if (nEntries > MAX_ENTRIES_BOOT)	 {
					fatal("Too many entries in Boot\n");
					retErr = RAL_ERROR_TOO_MANY_ENTRIES;
					goto out_failure;
				}
				nEntriesStorePtr = mEntriesBoot;
				addrTable = mEntriesBoot + 1;
				ralLinkClientJumptable = NULL;
				break;
			case RAL_MODULE_ID_UI:
				if (nEntries > MAX_ENTRIES_UI)	 {
					fatal("Too many entries in UI\n");
					retErr = RAL_ERROR_TOO_MANY_ENTRIES;
					goto out_failure;
				}
				nEntriesStorePtr = mEntriesUi;
				addrTable = mEntriesUi + 1;
				ralLinkClientJumptable = NULL;
				break;
			
		#ifdef SUPPORT_ZODIAC
		
			case RAL_MODULE_ID_TAL:
				if (nEntries > MAX_ENTRIES_TAL)	 {
					fatal("Too many entries in TAL\n");
					retErr = RAL_ERROR_TOO_MANY_ENTRIES;
					goto out_failure;
				}
				nEntriesStorePtr = mEntriesTal;
				addrTable = mEntriesTal + 1;
				ralLinkClientJumptable = NULL;
				break;
				
		#endif
		
			default:
				nEntriesStorePtr = addrTable;
				addrTable++;
				ralLinkClientJumptable = addrTable + nEntries;
				break;
		}
		
		//write "num entries" entry
		*nEntriesStorePtr = nEntries;
		
		//write the address table
		for (i = 0; i < nRealEntries; i++)
			addrTable[i] = (uint32_t)ralResolveJumpTarget((uint8_t*)amdc->entryptJumptableOfst + i * sizeof(uint32_t) + (uintptr_t)amdc);
		for (; i < nEntries; i++)
			addrTable[i] = 0xffffffff;
		
		//write the RalLinkClient jumptable if needed
		if (ralLinkClientJumptable) {
			for (i = 0; i < nEntries; i++)
				ralLinkClientJumptable[i] = 0xE51FF000 + (2 + nEntries) * 4;	//LDR PC, [PC, -(8 + nEntries * 4))]
			
			mpuInstrCacheClearDataCacheClean((uintptr_t)ralLinkClientJumptable, sizeof(*ralLinkClientJumptable) * nEntries);
		}
		
		//store module data pointer into the relevant array
		mModules[modId] = dataAboveR9;
		mModuleEntries[modId] = nEntriesStorePtr;
		
	#ifdef SUPPORT_OS_54
		//check for special things we want to patch
		ralPreLoadPatchLibrary(descr, addrTable, nEntries);
	#endif
	
		//call entry point
		callLoadedEntryPt = true;

		//grab ref no
		refNo = ralGetLoadedModuleReferenceNumberLocked(br9);
		
		//return no error
		retErr = errNone;
	}
	
	//get entrypt ptr
	moduleMainEntrypt = (ModuleEntryPoint)ralResolveJumpTarget(amdc);
	
	//modify entrypt?
	ralModifyLibEntryPr(descr, &moduleMainEntrypt);
	
	if (callLoadedEntryPt) {
		//call the entry point with "entered" code
		logt("Calling module entry point 0x%08x with 'loading' call\n", moduleMainEntrypt);
		if (moduleMainEntrypt)
			(void)moduleMainEntrypt(RAL_CMD_LOAD, NULL, 0);
		logt("Module entry point returned\n");
		retErr = errNone;
	}

	//return module reference number and entry point (if requested)
	if (refNoP)
		*refNoP = refNo;
	if (entryPtAddrP)
		*entryPtAddrP = (const void*)moduleMainEntrypt;
	
out_failure:
	//we're done with the mutex
	(void)impl_KALMutexRelease(mModulesLock);
	
	logt("%s() -> 0x%04x. DESCR: {dbH:0x%08x code:0x%08x data:0x%08x+0x%x info:0x%08x}\n",
				__func__, retErr, descr->dbH, descr->codePtr,
				descr->initedDataPtr, descr->initedDataSz, descr->infoPtr);
	return retErr;
}

Err DALEXPORT impl_RALLoadModule(const struct RalModuleDescr *descr, void* dataArea, const void **entryPtAddrP, uint32_t *refNoP)
{
	uint32_t wantedMinEntries = 0, modId = ralGetModuleId(descr->codePtr);
	
	if (modId == RAL_MODULE_ID_BOOT)
		wantedMinEntries = 909;
	else if (modId == RAL_MODULE_ID_UI)
		wantedMinEntries = 352;
	
	return ralPrvLoadModuleEx(descr, dataArea, entryPtAddrP, refNoP, wantedMinEntries);
}

Err DALEXPORT impl_RALUnloadModule(uint32_t refNo, struct RalModuleDescr *codeDescriptorOutP, void **dataAreaToFreeP)
{
	struct BelowR9 *br9;
	uint32_t modId;
	Err ret;
	
	logt("%s(ref 0x%x)\n", __func__, refNo);
	
	if (refNo == RAL_REF_NO_NO_GLOBALS) {
		logd("no globals -> no unload\n");
		if (dataAreaToFreeP)
			*dataAreaToFreeP = NULL;
		
		return errNone;
	}
	
	(void)impl_KALMutexReserve(mModulesLock, -1);
	modId = ralLookupLoadedModuleLocked(refNo);
	if (modId == RAL_MODULE_ID_INVALID) {
		fatal("Unloading a module that is not loaded\n");
		ret = RAL_ERROR_MODULE_NOT_LOADED;
	}
	else {
		br9 = ralGetBelowR9Locked(modId);
		if (!br9) {
			fatal("Cannot find data for module to be unloaded\n");
			ret = RAL_ERROR_INTERNAL_ERR;
		}
		else if (br9 == mRalCurrentAppDormantBR9) {
			
			fatal("Cannot unload dormant state\n");
			ret = RAL_ERROR_INTERNAL_ERR;
		}
		else if (br9->useCount > 1) {
			logt("Unloading a module with refCnt %u. Simply decrementing\n", br9->useCount);
			br9->useCount--;
			if (dataAreaToFreeP)
				*dataAreaToFreeP = NULL;
			ralDependencyCheckForCyclesLocked(modId);
			ret = errNone;
		}
		else if (modId == RAL_MODULE_ID_DAL || modId == RAL_MODULE_ID_BOOT || modId == RAL_MODULE_ID_UI) {
			loge("Cannot unload module with id %u\n", modId);
			ret = RAL_ERROR_MODULE_CANNOT_UNLOAD;
		}
		else {
			ModuleEntryPoint entryptToCall = (ModuleEntryPoint)ralResolveJumpTarget(br9->self.codePtr);
			
			//provide desfriptor if requested
			if (codeDescriptorOutP)
				*codeDescriptorOutP = br9->self;
			
			//call the entry point with "exited" code
			
			//modify entrypt?
			ralModifyLibEntryPr(&br9->self, &entryptToCall);
			
			logt("Calling module entry point 0x%08x with 'unloading' call\n", entryptToCall);
			entryptToCall(RAL_CMD_UNLOAD, NULL, 0);
			
			//tell caller what to free
			if (dataAreaToFreeP)
				*dataAreaToFreeP = br9;
				
			mModuleEntries[modId] = NULL;
			mModules[modId] = NULL;
			
			ralDependencyNotifyUnloadLocked(modId);
			
			ret = errNone;
		}
	}
	(void)impl_KALMutexRelease(mModulesLock);
	
	return ret;
}

Err DALEXPORT impl_RALUnloadNext(uint32_t *refNo /* in and out */, struct RalModuleDescr *codeDescriptorOutP /* out */, void **dataAreaToFreeP)
{
	Err ret = RAL_ERROR_NO_MODE_MODULES_TO_UNLOAD;
	struct BelowR9 *br9;
	int32_t i;
	
	logt("%s\n", __func__);
	
	if (!mMemoryPivoted)
		fatal("%s cannot be called before we pivot\n", __func__);
	
	if (*refNo >= RAL_MAX_MODULE_ID)
		i = RAL_MAX_MODULE_ID - 1;
	else if (*refNo)
		i = *refNo - 1;
	else
		i = 0;
	
	(void)impl_KALMutexReserve(mModulesLock, -1);
	for (; i > 0; i--) {	//0 is the dal and is never unloaded or considered for unloading
		if (i == RAL_MODULE_ID_APP)
			continue;
		br9 = ralGetBelowR9Locked(i);
		if (br9 && !br9->useCount) {
			
			uint32_t ref = ralGetLoadedModuleReferenceNumberLocked(br9);
			struct RalModuleDescr descr;
			
			descr = br9->self;
			
			if (refNo)
				*refNo = RAL_MAX_MODULE_ID;		//start from top again next time, the shortcut is not worth it
			if (codeDescriptorOutP)
				*codeDescriptorOutP = descr;
			
			logt("dep: next unloading mod 0x%04x\n", i);
			ret = impl_RALUnloadModule(ref, &descr, dataAreaToFreeP);
			break;
		}
	}
	
	(void)impl_KALMutexRelease(mModulesLock);
	
	logt("%s -> 0x%04x\n", __func__, ret);
	return ret;
}

static bool ralIsModulePatcheable(const struct ModAmdcHeader* hdr)
{
	uint32_t modId = ralGetModuleId(hdr);
	uint32_t flags = hdr->flags;
	
	return (flags & RAL_MODULE_FLAG_SYSTEM_MODULE) || (modId >= RAL_NUM_MODULES_BELOW_R9 && (flags & RAL_MODULE_FLAG_PATCHEABLE_MODULE));
}

Err DALEXPORT impl_RALPatchEntry(uint32_t refNo, uint32_t entryptNo, const void *replacementFunc, const void **oldFuncP)
{
	uint32_t *nEntriesStorePtr, *addrTable, nEntries;
	uint32_t modId;
	Err ret;
	
	logt("%s(refNo=%u entryptNo=%u replacementFunc=0x%08X)\n", __func__, refNo, entryptNo, replacementFunc);
	(void)impl_KALMutexReserve(mModulesLock, -1);
	modId = ralLookupLoadedModuleLocked(refNo);
	if (modId == RAL_MODULE_ID_INVALID) {
		fatal("Patching a module that is not loaded\n");
		ret = RAL_ERROR_MODULE_NOT_LOADED;
	}
	else {
		nEntriesStorePtr = mModuleEntries[modId];
		addrTable = nEntriesStorePtr + 1;
		nEntries = *nEntriesStorePtr;
		
		if (entryptNo >= nEntries) {
			fatal("Patching an entry beyond how many there are (%u >= %u)\n", entryptNo, nEntries);
			ret = RAL_ERROR_NONEXISTENT_ENTRY;
		}
		else {
			
			if (ralIsModulePatcheable((const struct ModAmdcHeader*)ralGetBelowR9Locked(modId)->self.codePtr)) {
			
				if (oldFuncP)
					*oldFuncP = (void*)addrTable[entryptNo];
				
				addrTable[entryptNo] = ralResolveArmThunks(replacementFunc);
				ret = errNone;
			}
			else {
				
				loge("Patching unpatcheable module id 0x%04\n", modId);
				ret = RAL_ERROR_NONEXISTENT_ENTRY;
			}
		}
	}
	
	(void)impl_KALMutexRelease(mModulesLock);
	return ret;
}

Err DALEXPORT impl_RALGetEntryAddresses(uint32_t refNo, uint32_t firstEntryPtOfInterest, UInt32 lastEntryPtOfInterest, const void **addrsP)
{
	uint32_t *nEntriesStorePtr, *addrTable, nEntries, modId, i;
	Err ret;
	
	logt("%s(refNo=%u entries=[%u,%u])\n", __func__, refNo, firstEntryPtOfInterest, lastEntryPtOfInterest);
	
	if (firstEntryPtOfInterest > lastEntryPtOfInterest) {
		fatal("requested range is negative\n");
		ret = RAL_ERROR_NONEXISTENT_ENTRY;
	}
	else {
		(void)impl_KALMutexReserve(mModulesLock, -1);
		
		modId = ralLookupLoadedModuleLocked(refNo);
		if (modId == RAL_MODULE_ID_INVALID) {
			fatal("Getting entry addrs for a module that is not loaded\n");
			ret = RAL_ERROR_MODULE_NOT_LOADED;
		}
		else {
			nEntriesStorePtr = mModuleEntries[modId];
			addrTable = nEntriesStorePtr + 1;
			nEntries = *nEntriesStorePtr;
			
			if (firstEntryPtOfInterest == lastEntryPtOfInterest && firstEntryPtOfInterest == RAL_ENTRYPT_NUM_MAIN_ENTRY) {
				
				struct RalModuleDescr *descr = &ralGetBelowR9Locked(modId)->self;
				ModuleEntryPoint entryptToCall = (ModuleEntryPoint)ralResolveJumpTarget(descr->codePtr);
			
				//modify entrypt?
				ralModifyLibEntryPr(descr, &entryptToCall);
				
				//return it
				*addrsP = (void*)entryptToCall;
				ret = errNone;
			}
			else if (lastEntryPtOfInterest >= nEntries) {
				fatal("requested range goes beyond the number of entries available\n");
				ret = RAL_ERROR_NONEXISTENT_ENTRY;
			}
			else {
				
				for (i = firstEntryPtOfInterest; i <= lastEntryPtOfInterest; i++)
					addrsP[i - firstEntryPtOfInterest] = (void*)addrTable[i];		//addrtable is already resolved
				ret = errNone;
			}
		}
		
		(void)impl_KALMutexRelease(mModulesLock);
	}
	return ret;
}

Err DALEXPORT impl_RALLinkClient(UInt32 refNo, UInt32 clientId, void **dispatchTbl)
{
	uint32_t *nEntriesStorePtr, *addrTable, *ralLinkClientJumptable, nEntries, modId;
	bool thumbEntryptsSupported = !!(((uintptr_t)dispatchTbl) & 1);
	struct DispatchTableStruct *dispatchTab;
	Err ret;
	
	dispatchTab = (struct DispatchTableStruct*)(((uintptr_t)dispatchTbl) &~ 3);
	
	//thumbEntryptsSupported flag is set when dispatchTblP has the low bit set meaning the caller is aware of our THUMB extension fo this func
	// the extension places the pointer to func table into globals instead of jump instrs
	// this way it cna be directly loaded
	
	logt("%s(refNo=%u clientId=%u dispatchTab=0x%08X) [thumb entrypts supported: %s]\n", __func__, refNo, clientId, dispatchTbl, thumbEntryptsSupported ? "YES" : "NO");
	(void)impl_KALMutexReserve(mModulesLock, -1);
	modId = ralLookupLoadedModuleLocked(refNo);
	if (modId == RAL_MODULE_ID_INVALID) {
		fatal("Linknig with a module that is not loaded\n");
		ret = RAL_ERROR_MODULE_NOT_LOADED;
	}
	else if (modId == RAL_MODULE_ID_DAL || modId == RAL_MODULE_ID_BOOT || modId == RAL_MODULE_ID_UI) {
		fatal("Cannot link with module with id %u\n", modId);
		ret = RAL_ERROR_MODULE_CANNOT_BE_LINKED_WITH;
	}
	else {
		nEntriesStorePtr = mModuleEntries[modId];
		addrTable = nEntriesStorePtr + 1;
		nEntries = *nEntriesStorePtr;
		
		if (thumbEntryptsSupported)
			ralLinkClientJumptable = addrTable;
		else
			ralLinkClientJumptable = addrTable + nEntries;
		
		ralDependencyAddLocked(clientId, modId);
		
		logt("num entries = %u\n", nEntries);
		dispatchTab->dispatchTable = ralLinkClientJumptable;
		dispatchTab->moduleId = refNo;
		ret = errNone;
	}
	
	(void)impl_KALMutexRelease(mModulesLock);
	return ret;
}

static uint64_t __attribute__((naked)) RALCallWithNewStackGuts(void *param, void *func, void *sp)
{
	asm volatile(
		"mov    r3, sp			\n\t"
		"mov    sp, r2			\n\t"
		"push	{r3, lr}		\n\t"
		"blx    r1				\n\t"
		"pop    {r2, r3}		\n\t"
		"mov    sp, r2			\n\t"
		"bx     r3				\n\t"
	);
	
	//make gcc happy
	(void)param;
	(void)func;
	(void)sp;
	return 0;
}

uint64_t DALEXPORT impl_RALCallWithNewStack(void *func, void *param, void *spVal)
{
	uint64_t ret;
	
	#ifdef USE_XRAM_FOR_DYNHEAP
		
		uint32_t stackSz = 8192;	//so what?
		char *stackMem = NULL;
		
		if (((uintptr_t)spVal) >= ROMRAM_BASE && (((uintptr_t)spVal) - ROMRAM_BASE) < ROMRAM_SIZE) {
			
			stackMem = kheapAllocEx(stackSz, MEM_USABLE_AS_STACK | MEM_FAST | MEM_NO_OS_HEAP);
			
			if (stackMem)
				spVal = stackMem + stackSz;
			else
				fatal("%s: cannot alloc", __func__);
		}
		
	#endif
	
	logt("%s\n", __func__);
	ret = RALCallWithNewStackGuts(param, func, spVal);
	
	#ifdef USE_XRAM_FOR_DYNHEAP
		
		if (stackMem)
			kheapFree(stackMem);
		
	#endif
	
	return ret;
}