/*
 * Copyright 2013 Red Hat Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 * Authors: Ben Skeggs
 */

#ifdef INCLUDE_PROC
process(PROC_MEMX, #memx_init, #memx_recv)
#endif

/******************************************************************************
 * MEMX data segment
 *****************************************************************************/
#ifdef INCLUDE_DATA
.equ #memx_opcode 0
.equ #memx_header 2
.equ #memx_length 4
.equ #memx_func   8

#define handler(cmd,hdr,len,func) /*
*/	.b16 MEMX_##cmd /*
*/	.b16 hdr /*
*/	.b16 len /*
*/      .b16 0 /*
*/	.b32 func

memx_func_head:
handler(ENTER , 0x0000, 0x0000, #memx_func_enter)
memx_func_next:
handler(LEAVE , 0x0000, 0x0000, #memx_func_leave)
handler(WR32  , 0x0000, 0x0002, #memx_func_wr32)
handler(WAIT  , 0x0004, 0x0000, #memx_func_wait)
handler(DELAY , 0x0001, 0x0000, #memx_func_delay)
handler(VBLANK, 0x0001, 0x0000, #memx_func_wait_vblank)
handler(TRAIN , 0x0000, 0x0000, #memx_func_train)
memx_func_tail:

.equ #memx_func_size #memx_func_next - #memx_func_head
.equ #memx_func_num (#memx_func_tail - #memx_func_head) / #memx_func_size

memx_ts_start:
.b32 0
memx_ts_end:
.b32 0

memx_data_head:
.skip 0x0800
memx_data_tail:

memx_train_head:
.skip 0x0100
memx_train_tail:
#endif

/******************************************************************************
 * MEMX code segment
 *****************************************************************************/
#ifdef INCLUDE_CODE
// description
//
// $r15 - current (memx)
// $r4  - packet length
// $r3  - opcode desciption
// $r0  - zero
memx_func_enter:
#if NVKM_PPWR_CHIPSET == GT215
	mov $r8 0x1610
	nv_rd32($r7, $r8)
	imm32($r6, 0xfffffffc)
	and $r7 $r6
	mov $r6 0x2
	or $r7 $r6
	nv_wr32($r8, $r7)
#else
	mov $r6 0x001620
	imm32($r7, ~0x00000aa2);
	nv_rd32($r8, $r6)
	and $r8 $r7
	nv_wr32($r6, $r8)

	imm32($r7, ~0x00000001)
	nv_rd32($r8, $r6)
	and $r8 $r7
	nv_wr32($r6, $r8)

	mov $r6 0x0026f0
	nv_rd32($r8, $r6)
	and $r8 $r7
	nv_wr32($r6, $r8)
#endif

	mov $r6 NV_PPWR_OUTPUT_SET_FB_PAUSE
	nv_iowr(NV_PPWR_OUTPUT_SET, $r6)
	memx_func_enter_wait:
		nv_iord($r6, NV_PPWR_OUTPUT)
		and $r6 NV_PPWR_OUTPUT_FB_PAUSE
		bra z #memx_func_enter_wait

	nv_iord($r6, NV_PPWR_TIMER_LOW)
	st b32 D[$r0 + #memx_ts_start] $r6
	ret

// description
//
// $r15 - current (memx)
// $r4  - packet length
// $r3  - opcode desciption
// $r0  - zero
memx_func_leave:
	nv_iord($r6, NV_PPWR_TIMER_LOW)
	st b32 D[$r0 + #memx_ts_end] $r6

	mov $r6 NV_PPWR_OUTPUT_CLR_FB_PAUSE
	nv_iowr(NV_PPWR_OUTPUT_CLR, $r6)
	memx_func_leave_wait:
		nv_iord($r6, NV_PPWR_OUTPUT)
		and $r6 NV_PPWR_OUTPUT_FB_PAUSE
		bra nz #memx_func_leave_wait

#if NVKM_PPWR_CHIPSET == GT215
	mov $r8 0x1610
	nv_rd32($r7, $r8)
	imm32($r6, 0xffffffcc)
	and $r7 $r6
	nv_wr32($r8, $r7)
#else
	mov $r6 0x0026f0
	imm32($r7, 0x00000001)
	nv_rd32($r8, $r6)
	or $r8 $r7
	nv_wr32($r6, $r8)

	mov $r6 0x001620
	nv_rd32($r8, $r6)
	or $r8 $r7
	nv_wr32($r6, $r8)

	imm32($r7, 0x00000aa2);
	nv_rd32($r8, $r6)
	or $r8 $r7
	nv_wr32($r6, $r8)
#endif
	ret

#if NVKM_PPWR_CHIPSET < GF119
// description
//
// $r15 - current (memx)
// $r4  - packet length
//	+00: head to wait for vblank on
// $r3  - opcode desciption
// $r0  - zero
memx_func_wait_vblank:
	ld b32 $r6 D[$r1 + 0x00]
	cmp b32 $r6 0x0
	bra z #memx_func_wait_vblank_head0
	cmp b32 $r6 0x1
	bra z #memx_func_wait_vblank_head1
	bra #memx_func_wait_vblank_fini

	memx_func_wait_vblank_head1:
	mov $r7 0x20
	bra #memx_func_wait_vblank_0

	memx_func_wait_vblank_head0:
	mov $r7 0x8

	memx_func_wait_vblank_0:
		nv_iord($r6, NV_PPWR_INPUT)
		and $r6 $r7
		bra nz #memx_func_wait_vblank_0

	memx_func_wait_vblank_1:
		nv_iord($r6, NV_PPWR_INPUT)
		and $r6 $r7
		bra z #memx_func_wait_vblank_1

	memx_func_wait_vblank_fini:
	add b32 $r1 0x4
	ret

#else

// XXX: currently no-op
//
// $r15 - current (memx)
// $r4  - packet length
//	+00: head to wait for vblank on
// $r3  - opcode desciption
// $r0  - zero
memx_func_wait_vblank:
	add b32 $r1 0x4
	ret

#endif

// description
//
// $r15 - current (memx)
// $r4  - packet length
//	+00*n: addr
//	+04*n: data
// $r3  - opcode desciption
// $r0  - zero
memx_func_wr32:
	ld b32 $r6 D[$r1 + 0x00]
	ld b32 $r5 D[$r1 + 0x04]
	add b32 $r1 0x08
	nv_wr32($r6, $r5)
	sub b32 $r4 0x02
	bra nz #memx_func_wr32
	ret

// description
//
// $r15 - current (memx)
// $r4  - packet length
//	+00: addr
//	+04: mask
//	+08: data
//	+0c: timeout (ns)
// $r3  - opcode desciption
// $r0  - zero
memx_func_wait:
	nv_iord($r8, NV_PPWR_TIMER_LOW)
	ld b32 $r14 D[$r1 + 0x00]
	ld b32 $r13 D[$r1 + 0x04]
	ld b32 $r12 D[$r1 + 0x08]
	ld b32 $r11 D[$r1 + 0x0c]
	add b32 $r1 0x10
	call(wait)
	ret

// description
//
// $r15 - current (memx)
// $r4  - packet length
//	+00: time (ns)
// $r3  - opcode desciption
// $r0  - zero
memx_func_delay:
	ld b32 $r14 D[$r1 + 0x00]
	add b32 $r1 0x04
	call(nsec)
	ret

// description
//
// $r15 - current (memx)
// $r4  - packet length
// $r3  - opcode desciption
// $r0  - zero
memx_func_train:
#if NVKM_PPWR_CHIPSET == GT215
// $r5 - outer loop counter
// $r6 - inner loop counter
// $r7 - entry counter (#memx_train_head + $r7)
	mov $r5 0x3
	mov $r7 0x0

// Read random memory to wake up... things
	imm32($r9, 0x700000)
	nv_rd32($r8,$r9)
	mov $r14 0x2710
	call(nsec)

	memx_func_train_loop_outer:
		mulu $r8 $r5 0x101
		sethi $r8 0x02000000
		imm32($r9, 0x1111e0)
		nv_wr32($r9, $r8)
		push $r5

		mov $r6 0x0
		memx_func_train_loop_inner:
			mov $r8 0x1111
			mulu $r9 $r6 $r8
			shl b32 $r8 $r9 0x10
			or $r8 $r9
			imm32($r9, 0x100720)
			nv_wr32($r9, $r8)

			imm32($r9, 0x100080)
			nv_rd32($r8, $r9)
			or $r8 $r8 0x20
			nv_wr32($r9, $r8)

			imm32($r9, 0x10053c)
			imm32($r8, 0x80003002)
			nv_wr32($r9, $r8)

			imm32($r14, 0x100560)
			imm32($r13, 0x80000000)
			add b32 $r12 $r13 0
			imm32($r11, 0x001e8480)
			call(wait)

			// $r5 - inner inner loop counter
			// $r9 - result
			mov $r5 0
			imm32($r9, 0x8300ffff)
			memx_func_train_loop_4x:
				imm32($r10, 0x100080)
				nv_rd32($r8, $r10)
				imm32($r11, 0xffffffdf)
				and $r8 $r11
				nv_wr32($r10, $r8)

				imm32($r10, 0x10053c)
				imm32($r8, 0x80003002)
				nv_wr32($r10, $r8)

				imm32($r14, 0x100560)
				imm32($r13, 0x80000000)
				mov b32 $r12 $r13
				imm32($r11, 0x00002710)
				call(wait)

				nv_rd32($r13, $r14)
				and $r9 $r9 $r13

				add b32 $r5 1
				cmp b16 $r5 0x4
				bra l #memx_func_train_loop_4x

			add b32 $r10 $r7 #memx_train_head
			st b32 D[$r10 + 0] $r9
			add b32 $r6 1
			add b32 $r7 4

			cmp b16 $r6 0x10
			bra l #memx_func_train_loop_inner

		pop $r5
		add b32 $r5 1
		cmp b16 $r5 7
		bra l #memx_func_train_loop_outer

#endif
	ret

// description
//
// $r15 - current (memx)
// $r14 - sender process name
// $r13 - message (exec)
// $r12 - head of script
// $r11 - tail of script
// $r0  - zero
memx_exec:
	push $r14
	push $r13
	mov b32 $r1 $r12
	mov b32 $r2 $r11

	memx_exec_next:
		// fetch the packet header
		ld b32 $r3 D[$r1]
		add b32 $r1 4
		extr $r4 $r3 16:31
		extr $r3 $r3 0:15

		// execute the opcode handler
		sub b32 $r3 1
		mulu $r3 #memx_func_size
		ld b32 $r5 D[$r3 + #memx_func_head + #memx_func]
		call $r5

		// keep going, if we haven't reached the end
		cmp b32 $r1 $r2
		bra l #memx_exec_next

	// send completion reply
	ld b32 $r11 D[$r0 + #memx_ts_start]
	ld b32 $r12 D[$r0 + #memx_ts_end]
	sub b32 $r12 $r11
	nv_iord($r11, NV_PPWR_INPUT)
	pop $r13
	pop $r14
	call(send)
	ret

// description
//
// $r15 - current (memx)
// $r14 - sender process name
// $r13 - message
// $r12 - data0
// $r11 - data1
// $r0  - zero
memx_info:
	cmp b16 $r12 0x1
	bra e #memx_info_train

	memx_info_data:
	mov $r12 #memx_data_head
	mov $r11 #memx_data_tail - #memx_data_head
	bra #memx_info_send

	memx_info_train:
	mov $r12 #memx_train_head
	mov $r11 #memx_train_tail - #memx_train_head

	memx_info_send:
	call(send)
	ret

// description
//
// $r15 - current (memx)
// $r14 - sender process name
// $r13 - message
// $r12 - data0
// $r11 - data1
// $r0  - zero
memx_recv:
	cmp b32 $r13 MEMX_MSG_EXEC
	bra e #memx_exec
	cmp b32 $r13 MEMX_MSG_INFO
	bra e #memx_info
	ret

// description
//
// $r15 - current (memx)
// $r0  - zero
memx_init:
	ret
#endif