#include <string.h>
#include "printf.h"
#include "msio.h"
#include "cpu.h"

/*

DMA usage in DMA1 unit:
	strm0: used to enable SPI1 on BS1 start
	strm1: used to set GPIO to function mode to stop signalling interrupt at start of BS1
	strm2: SPI3->CRC unit for write TPC
	strm3: tx buffer -> SPI1 for read TPC
	strm4: SPI2-> rx buffer for write TPC
	strm5: used to enable SPI2 on BS1 start
	strm6: used to enable SPI3 on BS1 start
	strm7: used for display data
	
	
	
	BS on A0
	
	SPI	 	nCS(BS.d2)		SCK			MISO(MSIO)
	1	 	A15(5)  		A5(5)		B4(5)
	2	 	B12(5)  		A9(5)		B14(5)
	3	 	A4(6)	  		B3(6)		C11(6)
	extra	C10				A10			B15
	pins
	
	B15 is uses to signal INT
	A0 is used as EXTI0 (start processing code), also used to trigger DMAs that RX things
	C10 is used as EXTI10 (report tpc done) and to read BS status
	
	i am not sure why i wired A10 into SCK...
*/


#define NUM_RX_BUFS			3

static volatile uint8_t mRxBuffers[NUM_RX_BUFS][512] __attribute__((section(".uncached")));
static volatile uint8_t mRegs[MS_NUM_REGS + 4] __attribute__((section(".uncached")));
static uint8_t mOutBuffer[MSIO_OUT_BUFFER_CT][MSIO_OUT_BUFER_SZ] __attribute__((section(".uncached")));

static volatile uint8_t mRxBufIdxW, mRxBufIdxR, mRxBufNumUsed, mDoneTpc[NUM_RX_BUFS];
static volatile uint16_t mBytesRxd[NUM_RX_BUFS];


static volatile uint8_t mTpcInProgress, mTpc;			//0x00 while none, 0xff if invalid
static uint8_t mCurCategory;

struct ReadDataInfo {
	const void *data;
	uint32_t len;		//512 is really max, but having this as u32 makes asm easier
};

static struct ReadDataInfo mLongReadDataInfo;
static struct ReadDataInfo mShortReadDataInfo;

#define mRegWindow		(mRegs + MS_NUM_REGS)


void __attribute__((section(".ramcode"))) dtx(uint_fast8_t val)
{
//	(void)val;
//	while (!(UART4->ISR & USART_ISR_TXFT));
//	UART4->TDR = val;
}

static void __attribute__((section(".ramcode"))) msioPrvConfigForRx(void)
{
	//we often do this after a red/data read and before userspace has a chance to cancel irqs so we might spuriously signal an irq when it hsoudl be cleared
	//this is considered ok (for now?)
	
	dtx('c');
	DMA1_Stream0->CR = 0;
	DMA1_Stream5->CR = 0;
	DMA1_Stream6->CR = 0;
	DMA1_Stream1->CR = 0;
	DMA1_Stream4->CR = 0;
	DMA1_Stream2->CR = 0;
	DMA1_Stream3->CR = 0;
	DMA1->LIFCR = -1;
	DMA1->HIFCR = -1;
	
	while (DMA1_Stream0->CR & DMA_SxCR_EN);
	DMA1_Stream0->NDTR = 1;
	DMA1_Stream0->CR = DMA_SxCR_PL | DMA_SxCR_MSIZE_1 | DMA_SxCR_PSIZE_1 | DMA_SxCR_DIR_0 | DMA_SxCR_EN;
	
	while (DMA1_Stream5->CR & DMA_SxCR_EN);
	DMA1_Stream5->NDTR = 1;
	DMA1_Stream5->CR = DMA_SxCR_PL | DMA_SxCR_MSIZE_1 | DMA_SxCR_PSIZE_1 | DMA_SxCR_DIR_0 | DMA_SxCR_EN;
	
	while (DMA1_Stream6->CR & DMA_SxCR_EN);
	DMA1_Stream6->NDTR = 1;
	DMA1_Stream6->CR = DMA_SxCR_PL | DMA_SxCR_MSIZE_1 | DMA_SxCR_PSIZE_1 | DMA_SxCR_DIR_0 | DMA_SxCR_EN;
	
	while (DMA1_Stream1->CR & DMA_SxCR_EN);
	DMA1_Stream1->NDTR = 1;
	DMA1_Stream1->CR = DMA_SxCR_PL | DMA_SxCR_MSIZE_1 | DMA_SxCR_PSIZE_1 | DMA_SxCR_DIR_0 | DMA_SxCR_EN;
	
	SPI1->CR1 = 0;
	SPI1->IFCR = -1;
	SPI1->CR2 = 0;
	SPI1->CFG1 = 3 << SPI_CFG1_DSIZE_Pos;
	
	SPI2->CR1 = 0;
	SPI2->CR2 = 0xfffefffe;
	SPI2->CFG1 = (7 << SPI_CFG1_DSIZE_Pos) | SPI_CFG1_RXDMAEN;	
	SPI2->IFCR = -1;
	
	SPI3->CR1 = 0;
	SPI3->CR2 = 0xfffefffe;
	SPI3->CFG1 = (7 << SPI_CFG1_DSIZE_Pos) | SPI_CFG1_RXDMAEN;	
	SPI3->IFCR = -1;
	
	while (DMA1_Stream4->CR & DMA_SxCR_EN);
	DMA1_Stream4->NDTR = 512;
	DMA1_Stream4->M0AR = (uintptr_t)mRxBuffers[mRxBufIdxW];
	DMA1_Stream4->CR = DMA_SxCR_PL | DMA_SxCR_MINC | DMA_SxCR_EN;
	
	while (DMA1_Stream2->CR & DMA_SxCR_EN);
	DMA1_Stream2->NDTR = 514;
	DMA1_Stream2->CR = DMA_SxCR_PL | DMA_SxCR_EN;
	
	CRC->CR = CRC_CR_POLYSIZE_0 | CRC_CR_RESET;
	
	while (DMA1_Stream3->CR & DMA_SxCR_EN);
	
	EXTI->PR1 = 1;
	(void)EXTI->PR1;
	EXTI->IMR1 = 1;
	
	GPIOB->MODER = (GPIOB->MODER &~ (3 << (2 * 15))) | (1 << (2 * 15));	//int signal as needed
	
	mTpcInProgress = 0;
	mTpc = 0;
	
	dtx('d');
}

static void __attribute__((section(".ramcode"))) msioPrvTpcSuccess(bool success)
{
	dtx(0xab);
	dtx(success ? 0xaa: 0xff);
	if (success) {
		
		dtx(mRxBufIdxW);
		mDoneTpc[mRxBufIdxW] = mTpc;
		if (++mRxBufIdxW == NUM_RX_BUFS)
			mRxBufIdxW = 0;
		if (mRxBufNumUsed++ == NUM_RX_BUFS)
			fatal("too many buffers unserviced\n");
		
		dtx(mRxBufNumUsed);
		
		//tell users
		NVIC_SetPendingIRQ(MSIO_IRQn);
	}
	msioPrvConfigForRx();
}

void __attribute__((used, section(".ramcode"))) msioSignalIrq(bool requestingInt)		//only do this while not in command
{
	GPIOB->BSRR = requestingInt ? (1 << 15) : (1 << (16 + 15));
}

void __attribute__((used, section(".ramcode"))) EXTI15_10_IRQHandler(void)
{
	//do not ask
	if (EXTI->PR1 & (1 << 10)) {
		
		EXTI->IMR1 &=~ (1 << 10);
		EXTI->PR1 = 1 << 10;
		(void)EXTI->PR1;
	
		dtx('j');
		SPI2->CR1 = 0;
		SPI3->CR1 = 0;
		SPI1->CR1 = 0;
		mTpc = mTpcInProgress;
		dtx('k');
		
		msioPrvTpcSuccess(true);
	}
}

void __attribute__((used, section(".ramcode"))) EXTI0_IRQHandler(void)
{
	//we get here on BS going up. by the time we are running here, BS.d2 has already gone up, spi is recieving the TPC, if all is well
	bool doResetup = false, success = false;
	uint_fast8_t tpc, tpcCheck;
	uint32_t sanityCheck;
	
	//dtx('n');
	
//	SPI2->CR1 = SPI_CR1_SPE;	//early enough to do this
//	SPI3->CR1 = SPI_CR1_SPE;	//early enough to do this
	
	while (!(SPI1->SR & SPI_SR_RXP));		//wait for a TPC type (first nibble)

	//TPC RXed
	tpc = *(volatile uint8_t*)&SPI1->RXDR;
	
	if (tpc & 0x08) {		//write tpc
		
		volatile uint8_t *rxb;
		uint32_t nRxed;
		
		dtx(tpc);
		
		while (!(SPI1->SR & SPI_SR_RXP));
		tpcCheck = *(volatile uint8_t*)&SPI1->RXDR;
		if ((tpcCheck ^ tpc) != 0x0f) {
			doResetup = true;
			goto tpc_check_fail;
		}
		
		//for various reasons we need to go to 16 bit mode here...do not ask...
		SPI1->CR1 = 0;
		SPI1->CFG1 = (15 << SPI_CFG1_DSIZE_Pos);
		SPI1->CR1 = SPI_CR1_HDDIR | SPI_CR1_SPE;
		*(volatile uint16_t*)&SPI1->TXDR = 0x0055;	//give us max time to pause, we can only write one thing into the buffer at this point
		*(volatile uint16_t*)&SPI1->TXDR = 0x5555;	//we can now write another
		
		dtx(0x34);
		while (GPIOC->IDR & (1 << 10));		//wait for it to go down as sometimes this is a bit extended (DSP devices)
		while (!(GPIOC->IDR & (1 << 10)));
		dtx(0x35);
		SPI2->CR1 = 0;
		SPI3->CR1 = 0;
		
		//CRC is already READY (thanks to spi3)
		if (CRC->DR) {
			
			//fail - reconfigure all
			SPI1->CR1 = 0;
			dtx(0xde);
			doResetup = true;
			goto tpc_check_fail;	
		}
		
		nRxed = 514 - DMA1_Stream2->NDTR;
		dtx(nRxed);
		dtx(512 - DMA1_Stream4->NDTR);
		
		mTpc = tpc;
		
		//wait for ack/nak to be accepted
		while (GPIOC->IDR & (1 << 10));
		SPI1->CR1 &=~ SPI_CR1_HDDIR;
		dtx(0x56);
		
		mBytesRxd[mRxBufIdxW] = nRxed;
		rxb = mRxBuffers[mRxBufIdxW];
		
		switch (tpc) {
			
			case MS_SET_RW_REG_ADRS:
				if (nRxed >= 6) {
					
					uint_fast8_t rS = rxb[0], rL = rxb[1], wS = rxb[2], wL = rxb[3];
					
					if (rS < MS_NUM_REGS && MS_NUM_REGS - rS >= rL && wS < MS_NUM_REGS && MS_NUM_REGS - wS >= wL) {
						
						mRegWindow[0] = rS;
						mRegWindow[1] = rL;
						mRegWindow[2] = wS;
						mRegWindow[3] = wL;
					}
				}
				break;
			
			case MS_WR_REG: {
				
				volatile uint8_t *dst = mRegs + mRegWindow[2], *end = dst + mRegWindow[3];
				volatile uint8_t *src = rxb;
				
				while (dst != end)
					*dst++ = *src++;
				
				break;
			}
			
			case MS_WR_SDATA:
			case MS_WR_LDATA:
				break;
			
			case MS_SET_CMD:
				if (nRxed != 3)
					mRegs[MS_REG_NO_INT] = INT_VAL_INVAL_CMD;
				else
					mRegs[MS_REG_NO_INT] &=~ INT_VAL_CMD_DONE;
				break;
		}
		
		success = true;
		doResetup = true;
	}
	else {					//read tpc
		
		uint_fast16_t readLen;
		const void *readData;
		
		switch (tpc) {
			
			case MS_RD_REG:
				readData = (uint8_t*)(mRegs + mRegWindow[0]);
				readLen = mRegWindow[1];
				break;
			
			case MS_GET_INT:
				readData = (uint8_t*)(mRegs + MS_REG_NO_INT);
				readLen = 1;
				break;
			
			case MS_RD_LDATA:
			
				if (mLongReadDataInfo.data) {
					
					readData = mLongReadDataInfo.data;
					mLongReadDataInfo.data = NULL;
					readLen = mLongReadDataInfo.len;
				}
				else {
					SPI1->CR1 &=~ SPI_CR1_SPE;
					doResetup = true;
					goto tpc_check_fail;
				}
				break;
			
			case MS_RD_SDATA:
			
				if (mShortReadDataInfo.data) {
					
					readData = mShortReadDataInfo.data;
					mShortReadDataInfo.data = NULL;
					readLen = mShortReadDataInfo.len;
				}
				else {
					
					SPI1->CR1 &=~ SPI_CR1_SPE;
					doResetup = true;
					goto tpc_check_fail;
				}
				break;
			
			default:
				doResetup = true;
				SPI1->CR1 &=~ SPI_CR1_SPE;
				goto tpc_check_fail;
		}
		
		SPI2->CFG1 = (3 << SPI_CFG1_DSIZE_Pos);
		SPI2->TXDR = 0xAAAAAA00;		//get it ready in case of read
		SPI2->CR1 = SPI_CR1_SPE | SPI_CR1_HDDIR;	//send ACK! (after this we have enough time to finish the setup of the rest of this)
		
		//XXX: the above assertion will not be true if we got here late and we are already in BS1
		//in that case, we'll send ACK now, go to BS2 and not yet have data ready...
		//this should not happen
		
		DMA1_Stream3->NDTR = readLen;
		DMA1_Stream3->M0AR = (uintptr_t)readData;
		
		while (!(SPI1->SR & SPI_SR_RXP));
		tpcCheck = *(volatile uint8_t*)&SPI1->RXDR;
		if ((tpcCheck ^ tpc) != 0x0f) {
			//cancel the ACK we were about to send or are already sending
			
			dtx(tpc);
			dtx(0xe0);
			dtx(tpcCheck);
			SPI2->CR1 = 0;
			goto tpc_check_fail;
		}
		
		
		//reset/enable crc?
		DMA1_Stream3->CR = DMA_SxCR_PL | DMA_SxCR_MINC | DMA_SxCR_DIR_0 | DMA_SxCR_EN | DMA_SxCR_TCIE;
		
		SPI1->CR1 = 0;
		SPI1->CR2 = readLen;
		SPI1->CFG1 = (7 << SPI_CFG1_DSIZE_Pos) | (15 << SPI_CFG1_CRCSIZE_Pos) | SPI_CFG1_CRCEN | SPI_CFG1_TXDMAEN;
		SPI1->CR1 = SPI_CR1_HDDIR | SPI_CR1_SPE;
		
		EXTI->PR1 = 1 << 10;
		EXTI->IMR1 = 1 << 10;
		
		
		dtx(tpc);
		mTpcInProgress = tpc;
	}

tpc_check_fail:
	
	EXTI->IMR1 &=~ 1;
	EXTI->PR1 = 1;
	(void)EXTI->PR1;
	dtx(0x59);
	
	if (doResetup)
		msioPrvTpcSuccess(success);
}

void msioInit(void)
{
	static volatile uint32_t  __attribute__((section(".uncached"))) mSpiEnableCR1, __attribute__((section(".uncached"))) mGpiobModerVal;
	
	memset((void*)mRegs, 0xff, sizeof(mRegs));
	mRegWindow[0] = 0x00;
	mRegWindow[1] = 0x1f;
	mRegWindow[2] = 0x10;
	mRegWindow[3] = 0x0f;
	
	mSpiEnableCR1 = SPI_CR1_SPE;

	SYSCFG->EXTICR[0] = (SYSCFG->EXTICR[0] &~ SYSCFG_EXTICR1_EXTI0_Msk) | SYSCFG_EXTICR1_EXTI0_PA;		//EXTI0 is PA0
	SYSCFG->EXTICR[2] = (SYSCFG->EXTICR[2] &~ SYSCFG_EXTICR3_EXTI10_Msk) | SYSCFG_EXTICR3_EXTI10_PC;	//EXTI10 is PC0
	
	mRegs[0] = 0;
	mRegs[MS_REG_NO_STA0] = 0x11;	//write-locked and a flag that all MSs set (0x10)
	mRegs[MS_REG_NO_STA1] = 0x00;
	mRegs[MS_REG_NO_CATEGORY] = mCurCategory = MS_CATEGORY_STORAGE;
	mRegs[MS_REG_NO_SYSCFG] = 0x80;
	
	NVIC_EnableIRQ(EXTI0_IRQn);
	
	NVIC_EnableIRQ(EXTI15_10_IRQn);
	
	EXTI->RTSR1 |= EXTI_RTSR1_TR0;	//trigger on EXTI0 rising edge
	EXTI->FTSR1 |= EXTI_FTSR1_TR10;	//trigger on EXTI10 falling edge
	
	//SPI1 will accept TPC with nCS active high
	//SPI2 will TX/RX data with nCS low   (se UDRCFG)
	//SPI1 will send data in BS3 with nCS active high
	// SPI3 shadows SPI2 for data RX to provide CRC unit with data while SPI2 writes it to RAM
	
	//on BS going up:
	// enable SPI1, clear buffers
	
	SPI1->CR1 = 0;
	SPI1->CRCPOLY = 0x18005;
	SPI1->CFG2 = SPI_CFG2_SSIOP | SPI_CFG2_COMM_1 | SPI_CFG2_COMM_0;
	
	SPI2->CR1 = 0;
	SPI2->CRCPOLY = 0x18005;
	SPI2->CFG2 = SPI_CFG2_COMM_1 | SPI_CFG2_COMM_0;
	
	SPI3->CR1 = 0;
	SPI3->CFG2 = SPI_CFG2_COMM_1 | SPI_CFG2_COMM_0;
	
	CRC->INIT = 0;
	CRC->POL = 0x8005;
	
	DMAMUX1_RequestGenerator0->RGCR = DMAMUX_RGxCR_GPOL_0 | DMAMUX_RGxCR_GE | 6;
	DMAMUX1_RequestGenerator1->RGCR = DMAMUX_RGxCR_GPOL_0 | DMAMUX_RGxCR_GE | 6;
	DMAMUX1_RequestGenerator2->RGCR = DMAMUX_RGxCR_GPOL_0 | DMAMUX_RGxCR_GE | 6;
	DMAMUX1_RequestGenerator3->RGCR = DMAMUX_RGxCR_GPOL_0 | DMAMUX_RGxCR_GE | 6;
	DMAMUX1_Channel0->CCR = 1;	//request source is generateor output 0
	DMAMUX1_Channel1->CCR = 2;	//request source is generateor output 1
	DMAMUX1_Channel5->CCR = 3;	//request source is generateor output 2
	DMAMUX1_Channel6->CCR = 4;	//request source is generateor output 3
	
	//triggering two DMA from one trigger does not reliably work...
	
	DMAMUX1_Channel3->CCR = 38;		//spi1 tx
	DMAMUX1_Channel4->CCR = 39;		//spi2 rx
	DMAMUX1_Channel2->CCR = 61;		//spi3 rx
	
	DMA1_Stream0->PAR = (uintptr_t)&SPI1->CR1;
	DMA1_Stream0->M0AR = (uintptr_t)&mSpiEnableCR1;
	DMA1_Stream5->PAR = (uintptr_t)&SPI2->CR1;
	DMA1_Stream5->M0AR = (uintptr_t)&mSpiEnableCR1;
	DMA1_Stream6->PAR = (uintptr_t)&SPI3->CR1;
	DMA1_Stream6->M0AR = (uintptr_t)&mSpiEnableCR1;
	
	mGpiobModerVal = GPIOB->MODER;
	DMA1_Stream1->PAR = (uintptr_t)&GPIOB->MODER;
	DMA1_Stream1->M0AR = (uintptr_t)&mGpiobModerVal;
		
	DMA1_Stream3->PAR = (uintptr_t)&SPI1->TXDR;
	
	DMA1_Stream4->PAR = (uintptr_t)&SPI2->RXDR;
	DMA1_Stream2->PAR = (uintptr_t)&SPI3->RXDR;
	DMA1_Stream2->M0AR = (uintptr_t)&CRC->DR;
	
	msioSignalIrq(false);
}

void msioEnable(void)
{
	msioPrvConfigForRx();
}

volatile uint8_t* __attribute__((section(".ramcode")))  msioGetRegs(void)
{
	return mRegs;
}

void __attribute__((section(".ramcode"))) msioReleaseBuf(void)
{
	uint32_t newNumBufsUsed, fail;
	
	dtx('p');
	
	do {
		asm volatile(
			"1:							\n\t"
			"	ldrexb	%0, [%2]		\n\t"
			"	subs	%0, #1			\n\t"
			"	strexb	%1, %0, [%2]	\n\t"
			:"=&r"(newNumBufsUsed), "=&r"(fail)
			:"r"(&mRxBufNumUsed)
			:"cc", "memory"
		);
	} while (fail);
	
	dtx(newNumBufsUsed);
	
	if (++mRxBufIdxR == NUM_RX_BUFS)
		mRxBufIdxR = 0;
}

bool __attribute__((section(".ramcode"))) msioPoll(uint8_t *tpcP, const void **dataP, uint16_t *lenP)
{
	uint32_t dummy, tpc;
	
	dtx('q');
	dtx(mRxBufNumUsed);
	
	if (!mRxBufNumUsed)
		return false;
	
	tpc = mDoneTpc[mRxBufIdxR];
	if (tpc == MS_WR_REG) {		//this might want to be done in interrupt context?
		
		if (mRegs[MS_REG_NO_CATEGORY] != mCurCategory) {
			
			if (msioCategoryChanged(mRegs[MS_REG_NO_CATEGORY]))
				mCurCategory = mRegs[MS_REG_NO_CATEGORY];
			else
				mRegs[MS_REG_NO_CATEGORY] = mCurCategory;
		}
	}
	
	if (tpcP)
		*tpcP = tpc;
	
	if (dataP)
		*dataP = (uint8_t*)mRxBuffers[mRxBufIdxR];
	
	if (lenP)
		*lenP = mBytesRxd[mRxBufIdxR];
	
	return !!tpc;
}

bool __attribute__((used, section(".ramcode"))) msioHaveLongDataToTx(void)
{
	return !!mLongReadDataInfo.data;
}

bool __attribute__((used, section(".ramcode"))) msioHaveShortDataToTx(void)
{
	return !!mShortReadDataInfo.data;
}

bool __attribute__((used, section(".ramcode"))) msioProvideLongReadData(const void *data, uint_fast16_t len)
{
	bool hadPrevData = !!mLongReadDataInfo.data;
	
	mLongReadDataInfo.len = len;
	mLongReadDataInfo.data = data;
	
	return !hadPrevData;
}

bool __attribute__((used, section(".ramcode"))) msioProvideShortReadData(const void *data, uint_fast16_t len)
{
	bool hadPrevData = !!mShortReadDataInfo.data;
	
	mShortReadDataInfo.len = len;
	mShortReadDataInfo.data = data;
	
	return !hadPrevData;
}

void* __attribute__((used, section(".ramcode"))) msioGetOutBuffer(uint_fast8_t bufIdx)
{
	return mOutBuffer[bufIdx];
}
