Wren6991 · May 29, 2021 16:54
diff --git a/amo.S b/amo.S
 #define OPCODE_AMO  0x2f

 #define FUNCT5_AMOSWAP 0x01
 #define FUNCT5_AMOADD  0x00
 #define FUNCT5_AMOXOR  0x04
 #define FUNCT5_AMOAND  0x0c
 #define FUNCT5_AMOOR   0x08
 #define FUNCT5_AMOMIN  0x10
 #define FUNCT5_AMOMAX  0x14
 #define FUNCT5_AMOMINU 0x18
 #define FUNCT5_AMOMAXU 0x1c

 .global handle_illegal_instr
 handle_illegal_instr:
 	// Spill all registers to stack, so we can index them
 	sw sp, -120(sp)
 	addi sp, sp, -128
 	sw x0 , 0  (sp)
 	sw x1 , 4  (sp)
 	// skip sp
 	sw x3 , 12 (sp)
 	sw x4 , 16 (sp)
 	sw x5 , 20 (sp)
 	sw x6 , 24 (sp)
 	sw x7 , 28 (sp)
 	sw x8 , 32 (sp)
 	sw x9 , 36 (sp)
 	sw x10, 40 (sp)
 	sw x11, 44 (sp)
 	sw x12, 48 (sp)
 	sw x13, 52 (sp)
 	sw x14, 56 (sp)
 	sw x15, 60 (sp)
 	sw x16, 64 (sp)
 	sw x17, 68 (sp)
 	sw x18, 72 (sp)
 	sw x19, 76 (sp)
 	sw x20, 80 (sp)
 	sw x21, 84 (sp)
 	sw x22, 88 (sp)
 	sw x23, 92 (sp)
 	sw x24, 96 (sp)
 	sw x25, 100(sp)
 	sw x26, 104(sp)
 	sw x27, 108(sp)
 	sw x28, 112(sp)
 	sw x29, 116(sp)
 	sw x30, 120(sp)
 	sw x31, 124(sp)

 	// Get instruction in a0 without performing any unaligned accesses (LSB of
 	// mepc is always 0), then check its opcode (7 LSBs) to see if it's one
 	// we can emulate
 	csrr a1, mepc
 	lhu a0, (a1)
 	lhu a2, 2(a1)
 	slli a2, a2, 16
 	or a0, a2, a2
 	andi a1, a0, 0x3f

 	li a4, OPCODE_AMO
 	beq a1, a4, handle_amo
 	// All matches fell through, we don't know how to handle this. Just NOP...

 	// Instruction handled, we can restore all the integer state from the
 	// stack. If the emulated instruction wrote to a register, it did so by
 	// modifying this spilled register frame. Note we need to return to the
 	// instruction *after* the one we handled, so increment mepc.
 restore_integer_regs_then_mret:
 bad_instr:
 	csrr a0, mepc
 	addi a0, a0, 4
 	csrw mepc, a0
 	lw x1 , 4  (sp)
 	// Leave SP til last
 	lw x3 , 12 (sp)
 	lw x4 , 16 (sp)
 	lw x5 , 20 (sp)
 	lw x6 , 24 (sp)
 	lw x7 , 28 (sp)
 	lw x8 , 32 (sp)
 	lw x9 , 36 (sp)
 	lw x10, 40 (sp)
 	lw x11, 44 (sp)
 	lw x12, 48 (sp)
 	lw x13, 52 (sp)
 	lw x14, 56 (sp)
 	lw x15, 60 (sp)
 	lw x16, 64 (sp)
 	lw x17, 68 (sp)
 	lw x18, 72 (sp)
 	lw x19, 76 (sp)
 	lw x20, 80 (sp)
 	lw x21, 84 (sp)
 	lw x22, 88 (sp)
 	lw x23, 92 (sp)
 	lw x24, 96 (sp)
 	lw x25, 100(sp)
 	lw x26, 104(sp)
 	lw x27, 108(sp)
 	lw x28, 112(sp)
 	lw x29, 116(sp)
 	lw x30, 120(sp)
 	lw x31, 124(sp)
 	lw sp , 8  (sp)
 	mret

 handle_amo:
 	// Get rs1, rs2 in a2, a3
 	srli a2, a0, 15 - 2
 	andi a2, a2, 0x1f << 2
 	add a2, a2, sp
 	lw a2, (a2)
 	srli a3, a0, 20 - 2
 	andi a3, a3, 0x1f << 2
 	add a3, a3, sp
 	lw a3, (a3)

 	// Decode correct AMO routine or fall through if no match
 	srli a1, a0, 27

 	addi a1, a1, 0              - FUNCT5_AMOSWAP
 	beqz a1, amoswap
 	addi a1, a1, FUNCT5_AMOSWAP - FUNCT5_AMOADD
 	beqz a1, amoadd
 	addi a1, a1, FUNCT5_AMOADD  - FUNCT5_AMOXOR
 	beqz a1, amoxor
 	addi a1, a1, FUNCT5_AMOXOR  - FUNCT5_AMOAND
 	beqz a1, amoand
 	addi a1, a1, FUNCT5_AMOAND  - FUNCT5_AMOOR
 	beqz a1, amoor
 	addi a1, a1, FUNCT5_AMOOR   - FUNCT5_AMOMIN
 	li a7, 0b0101 // slt true
 	beqz a1, amo_minmax
 	addi a1, a1, FUNCT5_AMOMIN  - FUNCT5_AMOMAX
 	li a7, 0b0100 // slt false
 	beqz a1, amo_minmax
 	addi a1, a1, FUNCT5_AMOMAX  - FUNCT5_AMOMINU
 	li a7, 0b1001 // sltu true
 	beqz a1, amo_minmax
 	addi a1, a1, FUNCT5_AMOMINU - FUNCT5_AMOMAXU
 	li a7, 0b1000 // sltu false
 	beqz a1, amo_minmax
 	j bad_instr


 // AMO routines.
 // Data for rs1, rs2 are passed in a2, a3.
 // Data for rd is returned in a4.

 amoadd:
 	lr.w a4, (a2)
 	add a5, a4, a3
 	sc.w a5, a5, (a2)
 	bnez a5, amoadd
 	j amo_done

 amoand:
 	lr.w a4, (a2)
 	and a5, a4, a3
 	sc.w a5, a5, (a2)
 	bnez a5, amoand
 	j amo_done

 amoor:
 	lr.w a4, (a2)
 	or a5, a4, a3
 	sc.w a5, a5, (a2)
 	bnez a5, amoor
 	j amo_done

 amoxor:
 	lr.w a4, (a2)
 	xor a5, a4, a3
 	sc.w a5, a5, (a2)
 	bnez a5, amoxor
 	j amo_done

 // Swap based on expected value combination of slt, sltu for current and new
 // value. Avoid branches in critical section, so we don't lose local progress
 // guarantee.

 // a7[1:0] are expected values for {sltu, slt}
 // a7[3:2] are care-mask for {sltu, slt}
 amo_minmax:
 	lr.w a4, (a2)
 	slt t0, a3, a4
 	sltu t1, a3, a4
 	slli t1, t1, 1
 	or t0, t0, t1
 	xor t0, t0, a7
 	srli t1, a7, 2
 	and t0, t0, t1
 	seqz a5, t0
 	// set a5 to a3 if comparison true, else a4
 	xor a6, a3, a4
 	mul a5, a5, a6
 	xor a5, a5, a4
 	sc.w a1, a5, (a2)
 	bnez a1, amo_minmax
 	j amo_done

 amoswap:
 	lr.w a4, (a2)
 	sc.w a5, a3, (a2)
 	bnez a5, amoswap
 	// fall-thru to amo_done

 amo_done:
 	srli a3, a0, 7 - 2
 	andi a3, a3, 0x1f << 2
 	add a3, a3, sp
 	sw a4, (a3)
 	j restore_integer_regs_then_mret
	#define OPCODE_AMO 0x2f

	#define FUNCT5_AMOSWAP 0x01
	#define FUNCT5_AMOADD 0x00
	#define FUNCT5_AMOXOR 0x04
	#define FUNCT5_AMOAND 0x0c
	#define FUNCT5_AMOOR 0x08
	#define FUNCT5_AMOMIN 0x10
	#define FUNCT5_AMOMAX 0x14
	#define FUNCT5_AMOMINU 0x18
	#define FUNCT5_AMOMAXU 0x1c

	.global handle_illegal_instr
	handle_illegal_instr:
	// Spill all registers to stack, so we can index them
	sw sp, -120(sp)
	addi sp, sp, -128
	sw x0 , 0 (sp)
	sw x1 , 4 (sp)
	// skip sp
	sw x3 , 12 (sp)
	sw x4 , 16 (sp)
	sw x5 , 20 (sp)
	sw x6 , 24 (sp)
	sw x7 , 28 (sp)
	sw x8 , 32 (sp)
	sw x9 , 36 (sp)
	sw x10, 40 (sp)
	sw x11, 44 (sp)
	sw x12, 48 (sp)
	sw x13, 52 (sp)
	sw x14, 56 (sp)
	sw x15, 60 (sp)
	sw x16, 64 (sp)
	sw x17, 68 (sp)
	sw x18, 72 (sp)
	sw x19, 76 (sp)
	sw x20, 80 (sp)
	sw x21, 84 (sp)
	sw x22, 88 (sp)
	sw x23, 92 (sp)
	sw x24, 96 (sp)
	sw x25, 100(sp)
	sw x26, 104(sp)
	sw x27, 108(sp)
	sw x28, 112(sp)
	sw x29, 116(sp)
	sw x30, 120(sp)
	sw x31, 124(sp)

	// Get instruction in a0 without performing any unaligned accesses (LSB of
	// mepc is always 0), then check its opcode (7 LSBs) to see if it's one
	// we can emulate
	csrr a1, mepc
	lhu a0, (a1)
	lhu a2, 2(a1)
	slli a2, a2, 16
	or a0, a2, a2
	andi a1, a0, 0x3f

	li a4, OPCODE_AMO
	beq a1, a4, handle_amo
	// All matches fell through, we don't know how to handle this. Just NOP...

	// Instruction handled, we can restore all the integer state from the
	// stack. If the emulated instruction wrote to a register, it did so by
	// modifying this spilled register frame. Note we need to return to the
	// instruction after the one we handled, so increment mepc.
	restore_integer_regs_then_mret:
	bad_instr:
	csrr a0, mepc
	addi a0, a0, 4
	csrw mepc, a0
	lw x1 , 4 (sp)
	// Leave SP til last
	lw x3 , 12 (sp)
	lw x4 , 16 (sp)
	lw x5 , 20 (sp)
	lw x6 , 24 (sp)
	lw x7 , 28 (sp)
	lw x8 , 32 (sp)
	lw x9 , 36 (sp)
	lw x10, 40 (sp)
	lw x11, 44 (sp)
	lw x12, 48 (sp)
	lw x13, 52 (sp)
	lw x14, 56 (sp)
	lw x15, 60 (sp)
	lw x16, 64 (sp)
	lw x17, 68 (sp)
	lw x18, 72 (sp)
	lw x19, 76 (sp)
	lw x20, 80 (sp)
	lw x21, 84 (sp)
	lw x22, 88 (sp)
	lw x23, 92 (sp)
	lw x24, 96 (sp)
	lw x25, 100(sp)
	lw x26, 104(sp)
	lw x27, 108(sp)
	lw x28, 112(sp)
	lw x29, 116(sp)
	lw x30, 120(sp)
	lw x31, 124(sp)
	lw sp , 8 (sp)
	mret

	handle_amo:
	// Get rs1, rs2 in a2, a3
	srli a2, a0, 15 - 2
	andi a2, a2, 0x1f << 2
	add a2, a2, sp
	lw a2, (a2)
	srli a3, a0, 20 - 2
	andi a3, a3, 0x1f << 2
	add a3, a3, sp
	lw a3, (a3)

	// Decode correct AMO routine or fall through if no match
	srli a1, a0, 27

	addi a1, a1, 0 - FUNCT5_AMOSWAP
	beqz a1, amoswap
	addi a1, a1, FUNCT5_AMOSWAP - FUNCT5_AMOADD
	beqz a1, amoadd
	addi a1, a1, FUNCT5_AMOADD - FUNCT5_AMOXOR
	beqz a1, amoxor
	addi a1, a1, FUNCT5_AMOXOR - FUNCT5_AMOAND
	beqz a1, amoand
	addi a1, a1, FUNCT5_AMOAND - FUNCT5_AMOOR
	beqz a1, amoor
	addi a1, a1, FUNCT5_AMOOR - FUNCT5_AMOMIN
	li a7, 0b0101 // slt true
	beqz a1, amo_minmax
	addi a1, a1, FUNCT5_AMOMIN - FUNCT5_AMOMAX
	li a7, 0b0100 // slt false
	beqz a1, amo_minmax
	addi a1, a1, FUNCT5_AMOMAX - FUNCT5_AMOMINU
	li a7, 0b1001 // sltu true
	beqz a1, amo_minmax
	addi a1, a1, FUNCT5_AMOMINU - FUNCT5_AMOMAXU
	li a7, 0b1000 // sltu false
	beqz a1, amo_minmax
	j bad_instr


	// AMO routines.
	// Data for rs1, rs2 are passed in a2, a3.
	// Data for rd is returned in a4.

	amoadd:
	lr.w a4, (a2)
	add a5, a4, a3
	sc.w a5, a5, (a2)
	bnez a5, amoadd
	j amo_done

	amoand:
	lr.w a4, (a2)
	and a5, a4, a3
	sc.w a5, a5, (a2)
	bnez a5, amoand
	j amo_done

	amoor:
	lr.w a4, (a2)
	or a5, a4, a3
	sc.w a5, a5, (a2)
	bnez a5, amoor
	j amo_done

	amoxor:
	lr.w a4, (a2)
	xor a5, a4, a3
	sc.w a5, a5, (a2)
	bnez a5, amoxor
	j amo_done

	// Swap based on expected value combination of slt, sltu for current and new
	// value. Avoid branches in critical section, so we don't lose local progress
	// guarantee.

	// a7[1:0] are expected values for {sltu, slt}
	// a7[3:2] are care-mask for {sltu, slt}
	amo_minmax:
	lr.w a4, (a2)
	slt t0, a3, a4
	sltu t1, a3, a4
	slli t1, t1, 1
	or t0, t0, t1
	xor t0, t0, a7
	srli t1, a7, 2
	and t0, t0, t1
	seqz a5, t0
	// set a5 to a3 if comparison true, else a4
	xor a6, a3, a4
	mul a5, a5, a6
	xor a5, a5, a4
	sc.w a1, a5, (a2)
	bnez a1, amo_minmax
	j amo_done

	amoswap:
	lr.w a4, (a2)
	sc.w a5, a3, (a2)
	bnez a5, amoswap
	// fall-thru to amo_done

	amo_done:
	srli a3, a0, 7 - 2
	andi a3, a3, 0x1f << 2
	add a3, a3, sp
	sw a4, (a3)
	j restore_integer_regs_then_mret