diff --git a/README.md b/README.md index 4651d63..84f3c6d 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,9 @@ # RPU Basic RISC-V CPU implementation in VHDL. -This is a RV32I ISA CPU implementation, based off of my TPU CPU design. It is very simple, is missing several features, but can run rv32i-compiled GCC toolchain binaries at over 200MHz on a Digilent Arty S7-50 board, built with Xilinx Spartan 7 tools. Can also boot Zephyr given correct SoC environment and invalid emulation handling of multiply/divide/mod M-extension instruction via invalid instruction trap. +This is a RV32IMZcsr ISA CPU implementation, based off of my TPU CPU design. It is very simple, but has run rv32i-compiled GCC toolchain binaries at over 200MHz on a Digilent Arty S7-50 board, built with Xilinx Spartan 7 tools. + +When used in the ArtyS7-RPU-SoC can run DooM timedemo3 at ~8fps, and boot operating systems such as Zephyr RTOS. Please let me know if you are using any of the RPU design in your own projects! I am contactable on twitter @domipheus. diff --git a/tests/rpu_core_tb.vhd b/tests/rpu_core_tb.vhd index 4c7d0c0..b586370 100644 --- a/tests/rpu_core_tb.vhd +++ b/tests/rpu_core_tb.vhd @@ -64,11 +64,15 @@ architecture Behavioral of rpu_core_tb is MEM_I_dataReady : IN std_logic ; - O_DBG:out std_logic_vector(XLEN32M1 downto 0) + O_DBG:out std_logic_vector(63 downto 0) ); END COMPONENT; + signal CLK12MHZ : STD_LOGIC := '0'; + signal CLKTIME : STD_LOGIC := '0'; + constant I_CLKTIME_period : time := 8 ns; + signal cEng_core : std_logic := '0'; signal I_reset : std_logic := '1'; signal I_halt : std_logic := '0'; @@ -79,7 +83,9 @@ architecture Behavioral of rpu_core_tb is signal MEM_O_data_swizzed : std_logic_vector(31 downto 0) := (others => '0'); - signal O_int_ack : std_logic; + signal O_int_ack : std_logic:= '0'; + signal O_int_ack_stable_12mhz : std_logic:= '0'; + signal O_int_ack_stable_12mhz_core : std_logic:= '0'; signal MEM_O_cmd : std_logic := '0'; signal MEM_O_we : std_logic := '0'; @@ -91,7 +97,7 @@ architecture Behavioral of rpu_core_tb is signal MEM_I_data_raw : std_logic_vector(31 downto 0) := (others => '0'); -- Clock period definitions - constant I_clk_period : time := 10 ns; + constant I_clk_period : time := 5 ns; signal MEM_readyState: integer := 0; @@ -131,40 +137,438 @@ architecture Behavioral of rpu_core_tb is signal MEM_DATA_OUT_BRAM_3: std_logic_vector(BWIDTHM1 downto 0):= (others => '0'); - signal O_DBG: std_logic_vector(XLEN32M1 downto 0); + signal O_DBG: std_logic_vector(63 downto 0); + constant mmio_addr_mtime_lo: STD_LOGIC_VECTOR( 31 downto 0) := X"4400bff8"; + constant mmio_addr_mtime_hi: STD_LOGIC_VECTOR( 31 downto 0) := X"4400bffc"; + signal gcsr_mtime_lo: STD_LOGIC_VECTOR( 31 downto 0) := (others => '0'); + signal gcsr_mtime_hi: STD_LOGIC_VECTOR( 31 downto 0) := (others => '0'); - type rom_type is array (0 to 31) + signal memcontroller_reset_count: integer := 100000; + + signal count12MHz: std_logic_vector(63 downto 0) := X"0000000000000000"; + + constant mmio_addr_mtimecmp0_lo: STD_LOGIC_VECTOR( 31 downto 0) := X"44004000"; + constant mmio_addr_mtimecmp0_hi: STD_LOGIC_VECTOR( 31 downto 0) := X"44004004"; + -- constant mmio_addr_mtimecmp0_lo: STD_LOGIC_VECTOR( 31 downto 0) := X"00000004"; + -- constant mmio_addr_mtimecmp0_hi: STD_LOGIC_VECTOR( 31 downto 0) := X"00000008"; + signal gcsr_timer_initialized : STD_LOGIC:='0'; + signal gcsr_mtimecmp0_lo: STD_LOGIC_VECTOR( 31 downto 0) := X"0000000c";--X"07270E00";--20s 0E4E1C00"; --?10 seconds of 12mhz counter?"; --(others => '0'); + signal gcsr_mtimecmp0_hi: STD_LOGIC_VECTOR( 31 downto 0) := (others => '0'); + signal gcsr_mtimecmp0_stable: STD_LOGIC_VECTOR( 63 downto 0) := (others => '0'); + + signal gcsr_mtimecmp0_lo_written: STD_LOGIC := '0'; + signal gcsr_mtimecmp0_hi_written: STD_LOGIC := '0'; + signal gcsr_mtimecmp_irq_reset: STD_LOGIC := '0'; + signal gcsr_mtimecmp_irq_reset_stable: STD_LOGIC := '0'; + signal gcsr_mtimecmp_irq_en: STD_LOGIC := '0'; ---- + signal gcsr_mtimecmp_irq_en_stable: STD_LOGIC := '0'; ------ + signal gcsr_mtimecmp_irq: STD_LOGIC := '0'; + + signal gcsr_mtimecmp_irq_served: std_logic := '0'; + signal plic_int : std_logic := '0'; + + type rom_type is array (0 to 16384) of std_logic_vector(31 downto 0); - - constant ROM: rom_type:=( - X"00008137", -- lui sp,0x8 - X"ffc10113", -- addi sp,sp,-4 # 7ffc <_end+0x7fd0> - X"c00015f3", -- csrrw a1,cycle,zero - X"c8001673", -- csrrw a2,cycleh,zero - X"f13016f3", -- csrrw a3,mimpid,zero - X"30101773", -- csrrw a4,misa,zero - X"c02017f3", -- csrrw a5,instret,zer - X"c8201873", -- csrrw a6,instreth,ze - X"c00018f3", -- csrrw a7,cycle,zero - X"c8001973", -- csrrw s2,cycleh,zero - X"400019f3", -- csrrw s3,0x400,zero - X"40069a73", -- csrrw s4,0x400,a3 - X"40001af3", -- csrrw s5,0x400,zero - X"40011b73", -- csrrw s6,0x400,sp - X"40001bf3", -- csrrw s7,0x400,zero - X"40073c73", -- csrrc s8,0x400,a4 - X"40001cf3", -- csrrw s9,0x400,zero - X"40111d73", -- csrrw s10,0x401,sp - X"40101df3", -- csrrw s11,0x401,zero - X"40172e73", -- csrrs t3,0x401,a4 - X"40101ef3", -- csrrw t4,0x401,zero + signal ROM2: rom_type :=(others => X"00000000"); + signal ROM3: rom_type :=(others => X"00000000"); + signal ROM: rom_type:=( + X"00000097", --auipc ra,0x0 + X"14408093", --addi ra,ra,324 # 10000230 <_trap_handler> + X"30509ff3", --csrrw t6,mtvec,ra + X"00002197", --auipc gp,0x2 + X"f0818193", --addi gp,gp,-248 # 10002000 + X"00002117", --auipc sp,0x2 + X"f1010113", --addi sp,sp,-240 # 10002010 + X"00002097", --auipc ra,0x2 + X"f1808093", --addi ra,ra,-232 # 10002020 + X"00500293", --li t0,5 + X"00600313", --li t1,6 + X"0001a203", --lw tp,0(gp) + X"00412023", --sw tp,0(sp) + X"0011a203", --lw tp,1(gp) + X"00412223", --sw tp,4(sp) + X"0021a203", --lw tp,2(gp) + X"00412423", --sw tp,8(sp) + X"0031a203", --lw tp,3(gp) + X"00412623", --sw tp,12(sp) + X"00002197", --auipc gp,0x2 + X"ecc18193", --addi gp,gp,-308 # 10002004 + X"00002117", --auipc sp,0x2 + X"ef810113", --addi sp,sp,-264 # 10002038 + X"00002097", --auipc ra,0x2 + X"f1008093", --addi ra,ra,-240 # 10002058 + X"00500293", --li t0,5 + X"00600313", --li t1,6 + X"00019203", --lh tp,0(gp) + X"00412023", --sw tp,0(sp) + X"00119203", --lh tp,1(gp) + X"00412223", --sw tp,4(sp) + X"00219203", --lh tp,2(gp) + X"00412423", --sw tp,8(sp) + X"00319203", --lh tp,3(gp) + X"00412623", --sw tp,12(sp) + X"0001d203", --lhu tp,0(gp) + X"00412823", --sw tp,16(sp) + X"0011d203", --lhu tp,1(gp) + X"00412a23", --sw tp,20(sp) + X"0021d203", --lhu tp,2(gp) + X"00412c23", --sw tp,24(sp) + X"0031d203", --lhu tp,3(gp) + X"00412e23", --sw tp,28(sp) + X"00002117", --auipc sp,0x2 + X"ee010113", --addi sp,sp,-288 # 10002078 + X"00002097", --auipc ra,0x2 + X"ee808093", --addi ra,ra,-280 # 10002088 + X"00000313", --li t1,0 + X"9999a2b7", --lui t0,0x9999a + X"99928293", --addi t0,t0,-1639 # 99999999 <_end+0x89997795> + X"00512023", --sw t0,0(sp) + X"00512223", --sw t0,4(sp) + X"00512423", --sw t0,8(sp) + X"00512623", --sw t0,12(sp) + X"00612023", --sw t1,0(sp) + X"00410113", --addi sp,sp,4 + X"006120a3", --sw t1,1(sp) + X"00410113", --addi sp,sp,4 + X"00612123", --sw t1,2(sp) + X"00410113", --addi sp,sp,4 + X"006121a3", --sw t1,3(sp) + X"00002117", --auipc sp,0x2 + X"ec010113", --addi sp,sp,-320 # 100020a0 + X"00002097", --auipc ra,0x2 + X"ec808093", --addi ra,ra,-312 # 100020b0 + X"00000313", --li t1,0 + X"9999a2b7", --lui t0,0x9999a + X"99928293", --addi t0,t0,-1639 # 99999999 <_end+0x89997795> + X"00512023", --sw t0,0(sp) + X"00512223", --sw t0,4(sp) + X"00512423", --sw t0,8(sp) + X"00512623", --sw t0,12(sp) + X"00611023", --sh t1,0(sp) + X"00410113", --addi sp,sp,4 + X"006110a3", --sh t1,1(sp) + X"00410113", --addi sp,sp,4 + X"00611123", --sh t1,2(sp) + X"00410113", --addi sp,sp,4 + X"006111a3", --sh t1,3(sp) + X"305f9073", --csrw mtvec,t6 + X"02c0006f", --j 10000258 + + + X"34102f73", --csrr t5,mepc + X"004f0f13", --addi t5,t5,4 + X"341f1073", --csrw mepc,t5 + X"34302f73", --csrr t5,mtval + X"003f7f13", --andi t5,t5,3 + X"01e0a023", --sw t5,0(ra) + X"34202f73", --csrr t5,mcause + X"01e0a223", --sw t5,4(ra) + X"00808093", --addi ra,ra,8 + X"30200073", --mret + + + X"00100193", --li gp,1 + X"00002f17", --auipc t5,0x2 + X"e64f0f13", --addi t5,t5,-412 # 100020c0 + X"000f2103", --lw sp,0(t5) + X"00412083", --lw ra,4(sp) + X"00812283", --lw t0,8(sp) + X"00c12303", --lw t1,12(sp) + X"01012383", --lw t2,16(sp) + X"01412403", --lw s0,20(sp) + X"01812483", --lw s1,24(sp) + X"01c12503", --lw a0,28(sp) + X"02012583", --lw a1,32(sp) + X"02412603", --lw a2,36(sp) + X"02812683", --lw a3,40(sp) + X"02c12703", --lw a4,44(sp) + X"03012783", --lw a5,48(sp) + X"03412803", --lw a6,52(sp) + X"03812883", --lw a7,56(sp) + X"03c12903", --lw s2,60(sp) + X"04012983", --lw s3,64(sp) + X"04412a03", --lw s4,68(sp) + X"04812a83", --lw s5,72(sp) + X"04c12b03", --lw s6,76(sp) + X"05012b83", --lw s7,80(sp) + X"05412c03", --lw s8,84(sp) + X"05812c83", --lw s9,88(sp) + X"05c12d03", --lw s10,92(sp) + X"06012d83", --lw s11,96(sp) + X"06412e03", --lw t3,100(sp) + X"06812e83", --lw t4,104(sp) + X"06c12f03", --lw t5,108(sp) + X"07012f83", --lw t6,112(sp) + X"08010113", --addi sp,sp,128 + X"0000006f", -- j 00 +-- X"00008067", --ret + + + X"00002f17", --auipc t5,0x2 + X"de0f0f13", --addi t5,t5,-544 # 100020c0 + X"000f2103", --lw sp,0(t5) + X"00412083", --lw ra,4(sp) + X"00812283", --lw t0,8(sp) + X"00c12303", --lw t1,12(sp) + X"01012383", --lw t2,16(sp) + X"01412403", --lw s0,20(sp) + X"01812483", --lw s1,24(sp) + X"01c12503", --lw a0,28(sp) + X"02012583", --lw a1,32(sp) + X"02412603", --lw a2,36(sp) + X"02812683", --lw a3,40(sp) + X"02c12703", --lw a4,44(sp) + X"03012783", --lw a5,48(sp) + X"03412803", --lw a6,52(sp) + X"03812883", --lw a7,56(sp) + X"03c12903", --lw s2,60(sp) + X"04012983", --lw s3,64(sp) + X"04412a03", --lw s4,68(sp) + X"04812a83", --lw s5,72(sp) + X"04c12b03", --lw s6,76(sp) + X"05012b83", --lw s7,80(sp) + X"05412c03", --lw s8,84(sp) + X"05812c83", --lw s9,88(sp) + X"05c12d03", --lw s10,92(sp) + X"06012d83", --lw s11,96(sp) + X"06412e03", --lw t3,100(sp) + X"06812e83", --lw t4,104(sp) + X"06c12f03", --lw t5,108(sp) + X"07012f83", --lw t6,112(sp) + X"08010113", --addi sp,sp,128 + X"0000006f", -- j 00 + + -- X"00008067", --ret + -- X"c0001073", --unimp +-- +-- X"00000097", --auipc ra,0x0 +-- X"20808093", --addi ra,ra,520 # 100002f4 <_trap_handler> +-- X"30509ff3", --csrrw t6,mtvec,ra +-- X"30127073", --csrci misa,4 +-- X"00002097", --auipc ra,0x2 +-- X"f0408093", --addi ra,ra,-252 # 10002000 +-- X"11111137", --lui sp,0x11111 +-- X"11110113", --addi sp,sp,273 # 11111111 <_end+0x110ef0d> +-- X"00a0006f", --j 10000116 +-- X"00000113", --li sp,0 +-- X"00002097", --auipc ra,0x2 +-- X"ef808093", --addi ra,ra,-264 # 1000200c +-- X"22222137", --lui sp,0x22222 +-- X"22210113", --addi sp,sp,546 # 22222222 <_end+0x1222001e> +-- X"00000217", --auipc tp,0x0 +-- X"01120213", --addi tp,tp,17 # 10000135 +-- X"00020067", --jr tp # 0 <_start-0x10000000> +-- X"00000113", --li sp,0 +-- X"0020a023", --sw sp,0(ra) +-- X"00408093", --addi ra,ra,4 +-- X"33333137", --lui sp,0x33333 +-- X"33310113", --addi sp,sp,819 # 33333333 <_end+0x2333112f> +-- X"00000217", --auipc tp,0x0 +-- X"01020213", --addi tp,tp,16 # 10000154 +-- X"00120067", --jr 1(tp) # 0 <_start-0x10000000> +-- X"00000113", --li sp,0 +-- X"0020a023", --sw sp,0(ra) +-- X"00408093", --addi ra,ra,4 +-- X"44444137", --lui sp,0x44444 +-- X"44410113", --addi sp,sp,1092 # 44444444 <_end+0x34442240> +-- X"00000217", --auipc tp,0x0 +-- X"01420213", --addi tp,tp,20 # 10000178 +-- X"ffd20067", --jr -3(tp) # 0 <_start-0x10000000> +-- X"00000113", --li sp,0 +-- X"0020a023", --sw sp,0(ra) +-- X"00408093", --addi ra,ra,4 +-- X"00002097", --auipc ra,0x2 +-- X"e9c08093", --addi ra,ra,-356 # 10002018 +-- X"55555137", --lui sp,0x55555 +-- X"55510113", --addi sp,sp,1365 # 55555555 <_end+0x45553351> +-- X"00000217", --auipc tp,0x0 +-- X"01220213", --addi tp,tp,18 # 1000019e +-- X"00020067", --jr tp # 0 <_start-0x10000000> +-- X"00000113", --li sp,0 +-- X"66666137", --lui sp,0x66666 +-- X"66610113", --addi sp,sp,1638 # 66666666 <_end+0x56664462> +-- X"00000217", --auipc tp,0x0 +-- X"01320213", --addi tp,tp,19 # 100001b7 +-- X"00020067", --jr tp # 0 <_start-0x10000000> +-- X"00000113", --li sp,0 +-- X"77777137", --lui sp,0x77777 +-- X"77710113", --addi sp,sp,1911 # 77777777 <_end+0x67775573> +-- X"00000217", --auipc tp,0x0 +-- X"01020213", --addi tp,tp,16 # 100001cc +-- X"00220067", --jr 2(tp) # 0 <_start-0x10000000> +-- X"00000113", --li sp,0 +-- X"88889137", --lui sp,0x88889 +-- X"88810113", --addi sp,sp,-1912 # 88888888 <_end+0x78886684> +-- X"00000217", --auipc tp,0x0 +-- X"01020213", --addi tp,tp,16 # 100001e4 +-- X"00320067", --jr 3(tp) # 0 <_start-0x10000000> +-- X"00000113", --li sp,0 +-- X"00002097", --auipc ra,0x2 +-- X"e6408093", --addi ra,ra,-412 # 10002048 +-- X"00500293", --li t0,5 +-- X"00600313", --li t1,6 +-- X"00628763", --beq t0,t1,10000202 +-- X"9999a137", --lui sp,0x9999a +-- X"99910113", --addi sp,sp,-1639 # 99999999 <_end+0x89997795> +-- X"00000013", --nop +-- X"00000013", --nop +-- X"00528563", --beq t0,t0,10000212 +-- X"00000113", --li sp,0 +-- X"00002097", --auipc ra,0x2 +-- X"e4408093", --addi ra,ra,-444 # 10002054 +-- X"00500293", --li t0,5 +-- X"00600313", --li t1,6 +-- X"00529763", --bne t0,t0,1000022e +-- X"aaaab137", --lui sp,0xaaaab +-- X"aaa10113", --addi sp,sp,-1366 # aaaaaaaa <_end+0x9aaa88a6> +-- X"00000013", --nop +-- X"00000013", --nop +-- X"00629563", --bne t0,t1,1000023e +-- X"00000113", --li sp,0 +-- X"00002097", --auipc ra,0x2 +-- X"e2408093", --addi ra,ra,-476 # 10002060 +-- X"00500293", --li t0,5 +-- X"00600313", --li t1,6 +-- X"00534763", --blt t1,t0,1000025a +-- X"bbbbc137", --lui sp,0xbbbbc +-- X"bbb10113", --addi sp,sp,-1093 # bbbbbbbb <_end+0xabbb99b7> +-- X"00000013", --nop +-- X"00000013", --nop +-- X"0062c563", --blt t0,t1,1000026a +-- X"00000113", --li sp,0 +-- X"00002097", --auipc ra,0x2 +-- X"e0408093", --addi ra,ra,-508 # 1000206c +-- X"00500293", --li t0,5 +-- X"00600313", --li t1,6 +-- X"00536763", --bltu t1,t0,10000286 +-- X"ccccd137", --lui sp,0xccccd +-- X"ccc10113", --addi sp,sp,-820 # cccccccc <_end+0xbcccaac8> +-- X"00000013", --nop +-- X"00000013", --nop +-- X"0062e563", --bltu t0,t1,10000296 +-- X"00000113", --li sp,0 +-- X"00002097", --auipc ra,0x2 +-- X"de408093", --addi ra,ra,-540 # 10002078 +-- X"00500293", --li t0,5 +-- X"00600313", --li t1,6 +-- X"0062d763", --bge t0,t1,100002b2 +-- X"dddde137", --lui sp,0xdddde +-- X"ddd10113", --addi sp,sp,-547 # dddddddd <_end+0xcdddbbd9> +-- X"00000013", --nop +-- X"00000013", --nop +-- X"00535563", --bge t1,t0,100002c2 +-- X"00000113", --li sp,0 +-- X"00002097", --auipc ra,0x2 +-- X"dc408093", --addi ra,ra,-572 # 10002084 +-- X"00500293", --li t0,5 +-- X"00600313", --li t1,6 +-- X"0062f763", --bgeu t0,t1,100002de +-- X"eeeef137", --lui sp,0xeeeef +-- X"eee10113", --addi sp,sp,-274 # eeeeeeee <_end+0xdeeeccea> +-- X"00000013", --nop +-- X"00000013", --nop +-- X"00537563", --bgeu t1,t0,100002ee +-- X"00000113", --li sp,0 +-- X"305f9073", --csrw mtvec,t6 +-- X"0300006f", --j 10000320 +-- +-- --<_trap_handler>: +-- X"34302f73", --csrr t5,mtval +-- X"ffef0f13", --addi t5,t5,-2 +-- X"341f1073", --csrw mepc,t5 +-- X"34302f73", --csrr t5,mtval +-- X"003f7f13", --andi t5,t5,3 +-- X"01e0a023", --sw t5,0(ra) +-- X"34202f73", --csrr t5,mcause +-- X"01e0a223", --sw t5,4(ra) +-- X"0020a423", --sw sp,8(ra) +-- X"00c08093", --addi ra,ra,12 +-- X"30200073", --mret +-- +-- -- : +-- X"00100193", --li gp,1 +-- X"00100f13", -- li t5,1 +-- X"00100e93", -- li t4,1 +-- X"03df0eb3", -- mul t4,t5,t4 +-- + + X"0000006f", -- j 00 + + X"06300513", -- 0 li a0,99 0 + X"00a00693", -- 4 li a3,10 + X"02d57733", -- 8 remu a4,a0,a3 + X"00f605b3", -- c add a1,a2,a5 + X"00178793", -- 10 addi a5,a5,1 + X"02d55533", -- 14 divu a0,a0,a3 + X"03070713", -- 18 addi a4,a4,48 + + + + --X"00812423", -- sw s0,8(sp) + -- X"00112623", -- sw ra,12(sp) + --X"00048413", -- mv s0,s1 + --X"00048793", -- mv a5,s1 + --X"40960633", -- sub a2,a2,s1 + X"00a00693", -- li a3,10 + X"02d57733", -- remu a4,a0,a3 + X"00f605b3", -- add a1,a2,a5 + X"00178793", -- addi a5,a5,1 + X"02d55533", -- divu a0,a0,a3 + X"03070713", -- addi a4,a4,48 + X"fee78fa3", -- sb a4,-1(a5) + X"fe0514e3", -- bnez a0,20bbc + -- 00000000 : + X"0000006f", -- j 00 - X"0000006f", -- infloop others => X"00000000"); + + signal I_hart0_int0_coreclk_stable : STD_LOGIC := '0'; + signal O_hart0_int_ack0_coreclk_stable : STD_LOGIC := '0'; + signal hart0_int_ack0_external : STD_LOGIC := '0'; + signal int_was_inactive: STD_LOGIC := '0'; + + signal count12MHz_stable: STD_LOGIC_VECTOR(63 downto 0) := (others => '0'); BEGIN + + I_int <= gcsr_mtimecmp_irq; - +process(CLK12MHZ) +begin + if rising_edge(CLK12MHZ) then + count12MHz <= std_logic_vector(unsigned(count12MHz) + 1); + end if; +end process; + +process (cEng_core) +begin +if rising_edge(cEng_core) then + count12MHz_stable <= count12MHz; +end if; +end process; + + +process(cEng_core) +begin + if rising_edge(cEng_core) then + if gcsr_mtimecmp_irq_en = '1' then + if count12MHz_stable >= (gcsr_mtimecmp0_hi & gcsr_mtimecmp0_lo) then + gcsr_mtimecmp_irq <= '1'; + gcsr_mtimecmp_irq_en <= '0'; + end if; + else + if gcsr_mtimecmp_irq_reset = '1' then + gcsr_mtimecmp_irq_en <= '1'; + end if; + if gcsr_mtimecmp_irq = '1' and O_int_ack = '1' then + gcsr_mtimecmp_irq <= '0'; + end if; + end if; + end if; +end process; + + I_int_data <= EXCEPTION_INT_MACHINE_TIMER; + -- The O_we signal can sustain too long. Clamp it to only when O_cmd is active. MEM_WE <= MEM_O_cmd and MEM_O_we; @@ -182,13 +586,17 @@ BEGIN MEM_ANY_CS <= MEM_CS_BRAM_1 or MEM_CS_BRAM_2 or MEM_CS_BRAM_3; -- select the correct data to send to cpu - MEM_I_data_raw <= INT_DATA when O_int_ack = '1' - else MEM_DATA_OUT_BRAM_1 when MEM_CS_BRAM_1 = '1' + MEM_I_data_raw <= + MEM_DATA_OUT_BRAM_1 when MEM_CS_BRAM_1 = '1' else MEM_DATA_OUT_BRAM_2 when MEM_CS_BRAM_2 = '1' else MEM_DATA_OUT_BRAM_3 when MEM_CS_BRAM_3 = '1' - else IO_DATA; - - MEM_I_data <= ROM(to_integer(unsigned( MEM_64KB_ADDR(15 downto 2) ))); + else X"91a1b1c1";--IO_DATA; + + MEM_DATA_OUT_BRAM_1 <= ROM(to_integer(unsigned( MEM_64KB_ADDR(15 downto 2)and "01" & X"fff" )));--and "00"&X"03F" ))); + MEM_DATA_OUT_BRAM_2 <= ROM2(to_integer(unsigned( MEM_64KB_ADDR(15 downto 2)and "01" & X"fff" )));--and "00"&X"03F" ))); + MEM_DATA_OUT_BRAM_3 <= ROM3(to_integer(unsigned( MEM_64KB_ADDR(15 downto 2)and "01" & X"fff" )));--and "00"&X"03F" ))); + + MEM_I_data <= MEM_I_data_raw; @@ -199,6 +607,16 @@ BEGIN cEng_core <= '1'; wait for I_clk_period/2; end process; + +CLKTIME_clk: process +begin + CLKTIME <= '0'; + wait for I_CLKTIME_period/2; + CLKTIME <= '1'; + wait for I_CLKTIME_period/2; +end process; + + CLK12MHZ <= CLKTIME; core0: core PORT MAP ( I_clk => cEng_core, @@ -226,10 +644,56 @@ BEGIN MEM_proc: process(cEng_core) begin if rising_edge(cEng_core) then + if gcsr_mtimecmp_irq_en = '1' and gcsr_mtimecmp_irq_reset = '1' then + gcsr_mtimecmp_irq_reset <= '0'; + end if; + if MEM_readyState = SOC_CtlState_Ready then if MEM_O_cmd = '1' then - + -- system memory maps + if MEM_O_addr = X"f0009000" and MEM_O_we = '1' then + -- onboard leds + IO_LEDS <= "0000" & MEM_O_data( 3 downto 0); + end if; + if MEM_O_addr = X"f0009000" and MEM_O_we = '0' then + -- onboard leds + IO_DATA <= X"000000" & IO_LEDS; + end if; + + if MEM_O_addr = mmio_addr_mtime_lo and MEM_O_we = '0' then + IO_DATA <= count12MHz_stable(31 downto 0); + end if; + + if MEM_O_addr = mmio_addr_mtime_hi and MEM_O_we = '0' then + IO_DATA <= count12MHz_stable(63 downto 32); + end if; + + + if MEM_O_addr = mmio_addr_mtimecmp0_lo and MEM_O_we = '1' then---1 + gcsr_mtimecmp0_lo <= MEM_O_data; + gcsr_mtimecmp0_lo_written <= '1'; + end if; + + if MEM_O_addr = mmio_addr_mtimecmp0_hi and MEM_O_we = '1' then---1 + gcsr_mtimecmp0_hi <= MEM_O_data; + if gcsr_mtimecmp0_lo_written = '1' then + --gcsr_mtimecmp0_hi_written <= '1'; + gcsr_mtimecmp_irq_reset <= '1'; + gcsr_mtimecmp0_lo_written <= '0'; + --gcsr_mtimecmp0_hi_written <= '0'; + end if; + end if; + + + if MEM_O_addr = mmio_addr_mtimecmp0_lo and MEM_O_we = '0' then + IO_DATA <= (gcsr_mtimecmp0_lo); + end if; + + if MEM_O_addr = mmio_addr_mtimecmp0_hi and MEM_O_we = '0' then + IO_DATA <= (gcsr_mtimecmp0_hi); + end if; + MEM_I_ready <= '0'; MEM_I_dataReady <= '0'; @@ -237,7 +701,15 @@ BEGIN -- DDR3 request, or immediate command? MEM_readyState <= SOC_CtlState_IMM_WriteCmdComplete; - + if (MEM_CS_BRAM_1 = '1') then + ROM(to_integer(unsigned( MEM_64KB_ADDR(15 downto 2)))) <= MEM_O_data; + end if; + if (MEM_CS_BRAM_2 = '1') then + ROM2(to_integer(unsigned( MEM_64KB_ADDR(15 downto 2)))) <= MEM_O_data; + end if; + if (MEM_CS_BRAM_3 = '1') then + ROM3(to_integer(unsigned( MEM_64KB_ADDR(15 downto 2)))) <= MEM_O_data; + end if; else -- DDR3 request, or immediate command? @@ -246,8 +718,7 @@ BEGIN end if; elsif MEM_readyState >= 1 then - - + -- Immediate commands do not cross clock domains and complete immediately if MEM_readyState = SOC_CtlState_IMM_ReadCmdComplete then MEM_I_ready <= '1'; @@ -260,6 +731,8 @@ BEGIN MEM_readyState <= SOC_CtlState_Ready; end if; + + end if; end if; end process; @@ -269,10 +742,13 @@ BEGIN stim_proc: process begin -- hold reset state for 100 ns. - wait for 100 ns; + wait for 20 ns; + memcontroller_reset_count <= 0; I_reset <= '0'; - + + wait; +-- end process; diff --git a/tests/tb_alu_int32_div.vhd b/tests/tb_alu_int32_div.vhd new file mode 100644 index 0000000..a93b935 --- /dev/null +++ b/tests/tb_alu_int32_div.vhd @@ -0,0 +1,168 @@ +---------------------------------------------------------------------------------- +-- Company: +-- Engineer: +-- +-- Create Date: 12.11.2018 22:51:11 +-- Design Name: +-- Module Name: rpu_core_tb - Behavioral +-- Project Name: +-- Target Devices: +-- Tool Versions: +-- Description: +-- +-- Dependencies: +-- +-- Revision: +-- Revision 0.01 - File Created +-- Additional Comments: +-- +---------------------------------------------------------------------------------- + + +library IEEE; +use IEEE.STD_LOGIC_1164.ALL; + +-- Uncomment the following library declaration if using +-- arithmetic functions with Signed or Unsigned values +use IEEE.NUMERIC_STD.ALL; + +-- Uncomment the following library declaration if instantiating +-- any Xilinx leaf cells in this code. +--library UNISIM; +--use UNISIM.VComponents.all; + +library work; +use work.constants.all; + +entity alu_int32_div_tb is +-- Port ( ); +end alu_int32_div_tb; + +architecture Behavioral of alu_int32_div_tb is + + + -- The RPU core definition + component alu_int32_div is + Port ( + I_clk : in STD_LOGIC; + I_exec : in STD_LOGIC; + I_dividend : in STD_LOGIC_VECTOR (XLEN32M1 downto 0); + I_divisor : in STD_LOGIC_VECTOR (XLEN32M1 downto 0); + I_op : in STD_LOGIC_VECTOR (1 downto 0); + O_dataResult : out STD_LOGIC_VECTOR (XLEN32M1 downto 0); + O_done : out STD_LOGIC; + O_int : out std_logic + ); + end component; + + signal I_clk : std_logic := '0'; + signal I_exec : std_logic := '0'; + signal I_dividend : std_logic_vector(31 downto 0) := (others => '0'); + signal I_divisor : std_logic_vector(31 downto 0) := (others => '0'); + signal I_op : std_logic_vector(1 downto 0) := (others => '0'); + signal O_dataResult : std_logic_vector(31 downto 0) := (others => '0'); + signal O_done : std_logic := '0'; + signal O_int : std_logic := '0'; + + + -- Clock period definitions + constant I_clk_period : time := 10 ns; +BEGIN + + -- Instantiate the Unit Under Test (UUT) + uut: alu_int32_div PORT MAP ( + I_clk => I_clk, + I_exec => I_exec, + I_dividend => I_dividend, + I_divisor => I_divisor, + I_op => I_op, + O_dataResult => O_dataResult, + O_done => O_done, + O_int => O_int + ); + + -- Clock process definitions + I_clk_process :process + begin + I_clk <= '0'; + wait for I_clk_period/2; + I_clk <= '1'; + wait for I_clk_period/2; + end process; + + + -- Stimulus process + stim_proc: process + begin + -- hold reset state for 100 ns. + wait for 100 ns; + + wait for I_clk_period*10; + -- insert stimulus here + + I_dividend <= X"ffffffff"; + I_divisor <= X"00000000"; + I_op <= ALU_INT32_DIV_OP_DIVU; + I_exec <= '1'; + wait for I_clk_period; + I_exec <= '0'; + + wait for I_clk_period*500; + + + I_dividend <= X"0000000a"; + I_divisor <= X"0000000a"; + I_op <= ALU_INT32_DIV_OP_REMU; + I_exec <= '1'; + wait for I_clk_period; + I_exec <= '0'; + + wait for I_clk_period*500; + + I_dividend <= X"00001001"; + I_divisor <= X"00000111"; + I_op <= ALU_INT32_DIV_OP_REM; + I_exec <= '1'; + wait for I_clk_period; + I_exec <= '0'; + + wait for I_clk_period*500; + + I_dividend <= X"ffff0001"; + I_divisor <= X"00000111"; + I_op <= ALU_INT32_DIV_OP_DIV; + I_exec <= '1'; + wait for I_clk_period; + I_exec <= '0'; + + wait for I_clk_period*500; + + +-- I_dividend <= X"ffff0001"; +-- I_divisor <= X"00000111"; +-- I_op <= ALU_INT32_DIV_OP_DIVU; +-- I_exec <= '1'; +-- wait for I_clk_period; +-- I_exec <= '0'; +-- wait for I_clk_period*500; + + I_dividend <= X"00011101"; + I_divisor <= X"00000001"; + I_op <= ALU_INT32_DIV_OP_DIV; + I_exec <= '1'; + wait for I_clk_period; + I_exec <= '0'; + + wait for I_clk_period*500; + + I_dividend <= X"00010001"; + I_divisor <= X"00000111"; + I_op <= ALU_INT32_DIV_OP_DIV; + I_exec <= '1'; + wait for I_clk_period; + I_exec <= '0'; + wait; + end process; + + +end Behavioral; diff --git a/vhdl/alu_int32_div.vhd b/vhdl/alu_int32_div.vhd new file mode 100644 index 0000000..af8049a --- /dev/null +++ b/vhdl/alu_int32_div.vhd @@ -0,0 +1,183 @@ +---------------------------------------------------------------------------------- +-- Project Name: RISC-V CPU +-- Description: ALU unit for 32-bit integer division ops +-- +---------------------------------------------------------------------------------- +-- Copyright 2020 Colin Riley +-- +-- Licensed under the Apache License, Version 2.0 (the "License"); +-- you may not use this file except in compliance with the License. +-- You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +---------------------------------------------------------------------------------- + +library IEEE; +use IEEE.STD_LOGIC_1164.all; + +-- Uncomment the following library declaration if using +-- arithmetic functions with Signed or Unsigned values +use IEEE.NUMERIC_STD.all; +library work; +use work.constants.all; + +entity alu_int32_div is + port ( + I_clk : in STD_LOGIC; + I_exec : in STD_LOGIC; + I_dividend : in STD_LOGIC_VECTOR (XLEN32M1 downto 0); + I_divisor : in STD_LOGIC_VECTOR (XLEN32M1 downto 0); + I_op : in STD_LOGIC_VECTOR (1 downto 0); + O_dataResult : out STD_LOGIC_VECTOR (XLEN32M1 downto 0); + O_done : out STD_LOGIC; + O_int : out std_logic + ); +end alu_int32_div; + +architecture Behavioral of alu_int32_div is + signal s_done : std_logic := '0'; + signal s_int : std_logic := '0'; + signal s_op : std_logic_vector(1 downto 0) := (others => '0'); + signal s_result : std_logic_vector(XLEN32M1 downto 0) := (others => '0'); + signal s_outsign : std_logic := '0'; + signal s_ur : unsigned(XLEN32M1 downto 0) := (others => '0'); + + signal s_i : integer := 0; + signal s_N : unsigned(XLEN32M1 downto 0) := (others => '0'); + signal s_D : unsigned(XLEN32M1 downto 0) := (others => '0'); + signal s_R : unsigned(XLEN32M1 downto 0) := (others => '0'); + signal s_Q : unsigned(XLEN32M1 downto 0) := (others => '0'); + constant STATE_IDLE : integer := 0; + constant STATE_INFLIGHTU : integer := 1; + constant STATE_COMPLETE : integer := 2; + + signal s_state : integer := 0; +begin + + process (I_clk) + begin + if rising_edge(I_clk) then + if s_state = STATE_IDLE then + s_done <= '0'; + if I_exec = '1' then + s_op <= I_op; + s_done <= '0'; + + if (I_divisor = X"00000000") then + s_state <= STATE_COMPLETE; + s_Q <= X"ffffffff"; + + if I_dividend(31) = '1' then + s_R <= unsigned(-signed(I_dividend)); + else + s_R <= unsigned(I_dividend); + end if; + + if (I_op = ALU_INT32_DIV_OP_DIV) or (I_op = ALU_INT32_DIV_OP_DIVU) then + s_outsign <= '0'; + else + s_outsign <= I_dividend(31); + end if; + + elsif (I_divisor = X"00000001") and (I_op = ALU_INT32_DIV_OP_DIV) then + s_state <= STATE_COMPLETE; + s_R <= X"00000000"; + if I_dividend(31) = '1' then + s_Q <= unsigned(-signed(I_dividend)); + else + s_Q <= unsigned(I_dividend); + end if; + s_outsign <= I_dividend(31); + + else + if I_op(ALU_INT32_DIV_OP_UNSIGNED_BIT) = '1' then + s_state <= STATE_INFLIGHTU; + s_N <= unsigned(I_dividend); + s_D <= unsigned(I_divisor); + s_ur <= X"00000000"; + s_Q <= X"00000000"; + s_R <= X"00000000"; + + s_i <= 31; + s_outsign <= '0'; + else + s_state <= STATE_INFLIGHTU; + + if (I_op = ALU_INT32_DIV_OP_DIV) then + s_outsign <= I_dividend(31) xor I_divisor(31); + else + s_outsign <= I_dividend(31); + end if; + + if I_dividend(31) = '1' then + s_N <= unsigned(-signed(I_dividend)); + else + s_N <= unsigned(I_dividend); + end if; + + if I_divisor(31) = '1' then + s_D <= unsigned(-signed(I_divisor)); + else + s_D <= unsigned(I_divisor); + end if; + + s_ur <= X"00000000"; + + s_Q <= X"00000000"; + s_R <= X"00000000"; + + s_i <= 31; + + end if; + end if; + end if; + + + elsif s_state = STATE_INFLIGHTU then + -- binary integer long division loop + if (s_R(30 downto 0) & s_N(s_i)) >= s_D then + s_R <= (s_R(30 downto 0) & s_N(s_i)) - s_D; + s_Q(s_i) <= '1'; + else + s_R <= s_R(30 downto 0) & s_N(s_i); + end if; + + if s_i = 0 then + s_state <= STATE_COMPLETE; + else + s_i <= s_i - 1; + end if; + + + elsif s_state = STATE_COMPLETE then + + if (s_op = ALU_INT32_DIV_OP_DIV) or (s_op = ALU_INT32_DIV_OP_DIVU) then + if (s_outsign = '1') then + s_result <= std_logic_vector(-signed(std_logic_vector(s_Q))); + else + s_result <= std_logic_vector(s_Q); + end if; + else + if (s_outsign = '1') then + s_result <= std_logic_vector(-signed(std_logic_vector(s_R))); + else + s_result <= std_logic_vector(s_R); + end if; + end if; + + s_done <= '1'; + s_state <= STATE_IDLE; + end if; + end if; + end process; + + O_dataResult <= s_result; + O_done <= s_done; + O_int <= s_int; +end Behavioral; \ No newline at end of file diff --git a/vhdl/constants.vhd b/vhdl/constants.vhd index b77f09d..48507e8 100644 --- a/vhdl/constants.vhd +++ b/vhdl/constants.vhd @@ -149,6 +149,24 @@ constant F7_OP_OR: std_logic_vector(6 downto 0) := "0000000"; constant F3_OP_AND: std_logic_vector(2 downto 0) := "111"; constant F7_OP_AND: std_logic_vector(6 downto 0) := "0000000"; +-- RV32M Extension +constant F7_OP_M_EXT: std_logic_vector(6 downto 0) := "0000001"; +constant F3_OP_M_MUL: std_logic_vector(2 downto 0) := "000"; +constant F3_OP_M_MULH: std_logic_vector(2 downto 0) := "001"; +constant F3_OP_M_MULHSU: std_logic_vector(2 downto 0) := "010"; +constant F3_OP_M_MULHU: std_logic_vector(2 downto 0) := "011"; +constant F3_OP_M_DIV: std_logic_vector(2 downto 0) := "100"; +constant F3_OP_M_DIVU: std_logic_vector(2 downto 0) := "101"; +constant F3_OP_M_REM: std_logic_vector(2 downto 0) := "110"; +constant F3_OP_M_REMU: std_logic_vector(2 downto 0) := "111"; + +-- bit 0 of the OP definitions denote unsigned ops; same as above +constant ALU_INT32_DIV_OP_UNSIGNED_BIT: integer := 0; +constant ALU_INT32_DIV_OP_DIV: std_logic_vector(1 downto 0) := "00"; +constant ALU_INT32_DIV_OP_DIVU: std_logic_vector(1 downto 0) := "01"; +constant ALU_INT32_DIV_OP_REM: std_logic_vector(1 downto 0) := "10"; +constant ALU_INT32_DIV_OP_REMU: std_logic_vector(1 downto 0) := "11"; + constant F3_MISCMEM_FENCE: std_logic_vector(2 downto 0) := "000"; constant F3_MISCMEM_FENCEI: std_logic_vector(2 downto 0) := "001"; @@ -262,6 +280,8 @@ constant CSR_OP_IMM_CLEAR_WR: std_logic_vector(4 downto 0) := "11100"; constant CSR_OP_IMM_CLEAR_W: std_logic_vector(4 downto 0) := "11101"; constant CSR_OP_IMM_CLEAR_R: std_logic_vector(4 downto 0) := "11110"; + + end constants; package body constants is diff --git a/vhdl/control_unit.vhd b/vhdl/control_unit.vhd index c81dd25..202769a 100644 --- a/vhdl/control_unit.vhd +++ b/vhdl/control_unit.vhd @@ -18,191 +18,283 @@ -- limitations under the License. ---------------------------------------------------------------------------------- library IEEE; -use IEEE.STD_LOGIC_1164.ALL; +use IEEE.STD_LOGIC_1164.all; library work; use work.constants.all; entity control_unit is - Port ( - I_clk : in STD_LOGIC; - I_reset : in STD_LOGIC; - I_halt: in STD_LOGIC; - I_aluop : in STD_LOGIC_VECTOR (6 downto 0); - + port ( + I_clk : in STD_LOGIC; + I_reset : in STD_LOGIC; + I_halt : in STD_LOGIC; + I_aluop : in STD_LOGIC_VECTOR (6 downto 0); + -- interrupts - I_int_enabled: in std_logic; - I_int: in STD_LOGIC; - O_int_ack: out STD_LOGIC; - I_int_mem_data: in STD_LOGIC_VECTOR(XLENM1 downto 0); - O_idata: out STD_LOGIC_VECTOR(XLENM1 downto 0); - O_set_idata:out STD_LOGIC; - O_set_ipc: out STD_LOGIC; - O_set_irpc: out STD_LOGIC; - O_instTick: out STD_LOGIC; - - -- mem controller state and control - I_ready: in STD_LOGIC; - O_execute: out STD_LOGIC; - I_dataReady: in STD_LOGIC; - - O_state : out STD_LOGIC_VECTOR (6 downto 0) + I_int_enabled : in std_logic; + I_int : in STD_LOGIC; + O_int_ack : out STD_LOGIC; + I_int_mem_data : in STD_LOGIC_VECTOR(XLENM1 downto 0); + O_idata : out STD_LOGIC_VECTOR(XLENM1 downto 0); + O_set_idata : out STD_LOGIC; + O_set_ipc : out STD_LOGIC; + O_set_irpc : out STD_LOGIC; + O_instTick : out STD_LOGIC; + -- mem controller state and control + I_misalignment : in STD_LOGIC; + I_ready : in STD_LOGIC; + O_execute : out STD_LOGIC; + I_dataReady : in STD_LOGIC; + + -- alu stall input + I_aluWait : in STD_LOGIC; + I_aluMultiCy : in STD_LOGIC; + O_state : out STD_LOGIC_VECTOR (6 downto 0) ); end control_unit; architecture Behavioral of control_unit is - signal s_state: STD_LOGIC_VECTOR(6 downto 0) := "0000001"; - - signal mem_ready: std_logic; - signal mem_execute: std_logic:='0'; - signal mem_dataReady: std_logic; - - signal mem_cycles : integer := 0; - - signal next_s_state: STD_LOGIC_VECTOR(6 downto 0) := "0000001"; - - signal interrupt_state: STD_LOGIC_VECTOR(2 downto 0) := "000"; - signal interrupt_ack: STD_LOGIC := '0'; - signal interrupt_was_inactive: STD_LOGIC := '1'; - signal set_idata: STD_LOGIC := '0'; - signal set_ipc: STD_LOGIC := '0'; - signal instTick: STD_LOGIC := '0'; + signal s_state : STD_LOGIC_VECTOR(6 downto 0) := "0000001"; + + signal mem_ready : std_logic; + signal mem_execute : std_logic := '0'; + signal mem_dataReady : std_logic; + + signal mem_cycles : integer := 0; + + signal next_s_state : STD_LOGIC_VECTOR(6 downto 0) := "0000001"; + + signal interrupt_state : STD_LOGIC_VECTOR(2 downto 0) := "000"; + signal interrupt_ack : STD_LOGIC := '0'; + signal interrupt_was_inactive : STD_LOGIC := '1'; + signal set_idata : STD_LOGIC := '0'; + signal set_ipc : STD_LOGIC := '0'; + signal instTick : STD_LOGIC := '0'; + signal s_hasWaited : STD_LOGIC := '0'; + + signal s_check_alignint : integer := 0; begin - O_execute <= mem_execute; - mem_ready <= I_ready; - mem_dataReady <= I_dataReady; - O_int_ack <= interrupt_ack; - O_set_idata <= set_idata; - O_set_irpc <= set_idata; - O_set_ipc <= set_ipc; - O_instTick <= instTick; + O_execute <= mem_execute; + mem_ready <= I_ready; + mem_dataReady <= I_dataReady; + O_int_ack <= interrupt_ack; + O_set_idata <= set_idata; + O_set_irpc <= set_idata; + O_set_ipc <= set_ipc; + O_instTick <= instTick; + + process (I_clk) + begin + if rising_edge(I_clk) and I_halt = '0' then + + if I_reset = '1' then + s_state <= "0000001"; + next_s_state <= "0000001"; + mem_cycles <= 0; + mem_execute <= '0'; + interrupt_was_inactive <= '1'; + interrupt_ack <= '0'; + interrupt_state <= "000"; + set_ipc <= '0'; + O_idata <= X"00000000"; + set_idata <= '0'; + instTick <= '0'; + else + case s_state is + --------------------------- + -- FETCH + when "0000001" => -- fetch + + if s_check_alignint /= 0 then + -- If we've seen an alignment hint we need to stall here for s_check_alignint + -- cycles, checking for an interrupt each time. When it's 0 we give up. + if I_int_enabled = '1' and interrupt_was_inactive = '1' and I_int = '1' then + interrupt_ack <= '1'; + interrupt_was_inactive <= '0'; + interrupt_state <= "001"; + next_s_state <= "0000001"; --F + s_state <= "1000000"; --S + s_check_alignint <= 0; + else + s_check_alignint <= s_check_alignint - 1; + end if; + else - process(I_clk) - begin - if rising_edge(I_clk) and I_halt = '0' then - if I_reset = '1' then - s_state <= "0000001"; - next_s_state <= "0000001"; - mem_cycles <= 0; - mem_execute <= '0'; - interrupt_was_inactive <= '1'; - interrupt_ack <= '0'; - interrupt_state <= "000"; - set_ipc <= '0'; - O_idata <= X"00000000"; - set_idata <= '0'; - instTick <= '0'; - else - case s_state is - when "0000001" => -- fetch - if I_int = '0' then - interrupt_was_inactive <= '1'; - end if; - instTick <= '0'; - if mem_cycles = 0 and mem_ready = '1' then - mem_execute <= '1'; - mem_cycles <= 1; - - elsif mem_cycles = 1 then - mem_execute <= '0'; - mem_cycles <= 2; - - elsif mem_cycles = 2 then - if mem_dataReady = '1' then - mem_cycles <= 0; - s_state <= "0000010"; - end if; - end if; - when "0000010" => --- decode - if I_int = '0' then - interrupt_was_inactive <= '1'; - end if; - s_state <= "0001000"; --E "0000100"; --R - when "0000100" => -- read -- DEPRECATED STAGE - s_state <= "0001000"; --E - when "0001000" => -- execute - if I_int = '0' then - interrupt_was_inactive <= '1'; - end if; - --MEM/WB - -- if it's not a memory alu op, goto writeback - if (I_aluop(6 downto 2) = OPCODE_LOAD or - I_aluop(6 downto 2) = OPCODE_STORE) then - s_state <= "0010000"; -- MEM - else - s_state <= "0100000"; -- WB - end if; - when "0010000" => -- mem - if I_int = '0' then - interrupt_was_inactive <= '1'; - end if; - -- sometimes memory can be busy, if so we need to relook here - if mem_cycles = 0 and mem_ready = '1' then - mem_execute <= '1'; - mem_cycles <= 1; - - elsif mem_cycles = 1 then - mem_execute <= '0'; - -- if it's a write, go through - if I_aluop(6 downto 2) = OPCODE_STORE then - mem_cycles <= 0; - s_state <= "0100000"; -- WB - elsif mem_dataReady = '1' then - -- if read, wait for data - mem_cycles <= 0; - s_state <= "0100000"; -- WB - end if; - end if; - when "0100000" => -- writeback - -- check interrupt? - if I_int_enabled='1' and interrupt_was_inactive = '1' and I_int = '1' then - interrupt_ack <= '1'; - interrupt_was_inactive <= '0'; - interrupt_state <= "001"; - next_s_state <= "0000001"; --F - s_state <= "1000000"; --F - else if I_int = '0' then interrupt_was_inactive <= '1'; end if; - s_state <= "0000001"; --F - end if; - instTick <= '1'; - when "1000000" => -- stalls - if I_int = '0' then - interrupt_was_inactive <= '1'; - end if; - instTick <= '0'; - -- interrupt stall - if interrupt_state = "001" then - -- give a cycle of latency - -- set PC to interrupt vector. - - set_ipc <= '1'; + instTick <= '0'; + if mem_cycles = 0 and mem_ready = '1' then + mem_execute <= '1'; + mem_cycles <= 1; + + elsif mem_cycles = 1 then + mem_execute <= '0'; + mem_cycles <= 2; + + elsif mem_cycles = 2 then + mem_execute <= '0'; + if mem_dataReady = '1' then + mem_cycles <= 0; + s_state <= "0000010"; + end if; + end if; + + end if; + + --------------------------- + -- DECODE + when "0000010" => --- decode + if I_int = '0' then + interrupt_was_inactive <= '1'; + end if; + s_hasWaited <= '0'; + s_state <= "0001000"; --E "0000100"; --R + + + --------------------------- + -- EXECUTE + when "0001000" => -- execute + if I_int = '0' then + interrupt_was_inactive <= '1'; + end if; + --MEM/WB + -- if it's not a memory alu op, goto writeback + if (I_aluop(6 downto 2) = OPCODE_LOAD or + I_aluop(6 downto 2) = OPCODE_STORE) then + s_state <= "0010000"; -- MEM + + -- -- mem load short cut + -- ISSUE - this fails to take into account the type of request, sizing, address correctly + -- and therefore needs removed for compliance to pass. + -- if I_misalignment = '0' and mem_cycles = 0 and mem_ready = '1' then + -- mem_execute <= '1'; + -- mem_cycles <= 1; + -- end if; + + else + if I_aluWait = '0' then + if I_aluMultiCy = '1' then + if s_hasWaited = '1' then + s_state <= "0100000"; -- WB + end if; + else + s_state <= "0100000"; -- WB + end if; + s_hasWaited <= '1'; + end if; + end if; + + --------------------------- + -- MEMORY + when "0010000" => -- mem + if I_int = '0' then + interrupt_was_inactive <= '1'; + end if; + + -- alignment traps here are tricky. + -- if we see the misalignment hint, wait 6 cycles and then test interrupt stall. + -- if no interrupt we need to re-run the stage. + + if I_misalignment = '1' and s_check_alignint = 0 then + s_check_alignint <= 6; + elsif s_check_alignint > 0 then + if I_int_enabled = '1' and interrupt_was_inactive = '1' and I_int = '1' then + interrupt_ack <= '1'; + interrupt_was_inactive <= '0'; + interrupt_state <= "001"; + next_s_state <= "0000001"; --F + s_state <= "1000000"; --F + s_check_alignint <= 0; + else + s_check_alignint <= s_check_alignint - 1; + end if; + + else + if mem_cycles = 0 and mem_ready = '1' then + + mem_execute <= '1'; + mem_cycles <= 1; + + elsif mem_cycles = 1 then + mem_execute <= '0'; + -- if it's a write, go through + if I_aluop(6 downto 2) = OPCODE_STORE then + mem_cycles <= 0; + s_state <= "0100000"; -- WB + elsif mem_dataReady = '1' then + -- if read, wait for data + mem_cycles <= 0; + s_state <= "0100000"; -- WB + end if; + end if; + end if; + + + --------------------------- + -- WRITEBACK + when "0100000" => -- writeback + -- check interrupt? + if I_int_enabled = '1' and interrupt_was_inactive = '1' and I_int = '1' then + interrupt_ack <= '1'; + interrupt_was_inactive <= '0'; + interrupt_state <= "001"; + next_s_state <= "0000001"; --F + s_state <= "1000000"; --F + else + if I_int = '0' then + interrupt_was_inactive <= '1'; + end if; + if I_misalignment = '1' and s_check_alignint = 0 then + s_check_alignint <= 3; + end if; + + -- misalign interrupts take a while to propagate + -- this signal short cuts to ensure we can catch any misalignments before fetch. + + s_state <= "0000001"; --F + + -- if the mem system is ready, shortcut the fetch + -- at this point, the next PC/Branch should be set. + -- need to ensure the sizing is correct. + if I_misalignment = '0' and mem_cycles = 0 and mem_ready = '1' then -- shortcut + mem_execute <= '1'; -- shortcut + mem_cycles <= 2; -- shortcut + end if; -- shortcut + + end if; + instTick <= '1'; + when "1000000" => -- stalls + if I_int = '0' then + interrupt_was_inactive <= '1'; + end if; + instTick <= '0'; + -- interrupt stall + if interrupt_state = "001" then + -- give a cycle of latency + -- set PC to interrupt vector. + + set_ipc <= '1'; interrupt_state <= "101"; - - -- interrupt_ack <= '0'; + elsif interrupt_state = "101" then set_ipc <= '0'; interrupt_ack <= '0'; - interrupt_state <= "111"; - elsif interrupt_state = "111" then - interrupt_state <= "000"; - s_state <= "0000001"; --F - end if; - when "1001000" => - -- alu 1 cycle stall - s_state <= "0100000"; -- WB - when others => - s_state <= "0000001"; - end case; - end if; - end if; - end process; - - O_state <= s_state; -end Behavioral; - - + interrupt_state <= "111"; + elsif interrupt_state = "111" then + interrupt_state <= "000"; + s_state <= "0000001"; --F + end if; + when "1001000" => + -- alu 1 cycle stall + s_state <= "0100000"; -- WB + when others => + s_state <= "0000001"; + end case; + end if; + end if; + end process; + O_state <= s_state; +end Behavioral; \ No newline at end of file diff --git a/vhdl/core.vhd b/vhdl/core.vhd index 122d236..99df5dc 100644 --- a/vhdl/core.vhd +++ b/vhdl/core.vhd @@ -21,231 +21,228 @@ -- limitations under the License. ---------------------------------------------------------------------------------- library IEEE; -use IEEE.STD_LOGIC_1164.ALL; - +use IEEE.STD_LOGIC_1164.all; + library work; use work.constants.all; - + entity core is - Port ( - I_clk : in STD_LOGIC; - I_reset : in STD_LOGIC; - I_halt : in STD_LOGIC; - + port ( + I_clk : in STD_LOGIC; + I_reset : in STD_LOGIC; + I_halt : in STD_LOGIC; + -- External Interrupt interface - I_int_data: in STD_LOGIC_VECTOR(31 downto 0); - I_int: in STD_LOGIC; - O_int_ack: out STD_LOGIC; - + I_int_data : in STD_LOGIC_VECTOR(31 downto 0); + I_int : in STD_LOGIC; + O_int_ack : out STD_LOGIC; + -- memory interface - MEM_I_ready : IN std_logic; - MEM_O_cmd : OUT std_logic; - MEM_O_we : OUT std_logic; + MEM_I_ready : in std_logic; + MEM_O_cmd : out std_logic; + MEM_O_we : out std_logic; -- fixme: this is not a true byteEnable and so is confusing. -- Will be fixed when memory swizzling is brought core-size - MEM_O_byteEnable : OUT std_logic_vector(1 downto 0); - MEM_O_addr : OUT std_logic_vector(XLEN32M1 downto 0); - MEM_O_data : OUT std_logic_vector(XLEN32M1 downto 0); - MEM_I_data : IN std_logic_vector(XLEN32M1 downto 0); - MEM_I_dataReady : IN std_logic - + MEM_O_byteEnable : out std_logic_vector(1 downto 0); + MEM_O_addr : out std_logic_vector(XLEN32M1 downto 0); + MEM_O_data : out std_logic_vector(XLEN32M1 downto 0); + MEM_I_data : in std_logic_vector(XLEN32M1 downto 0); + MEM_I_dataReady : in std_logic + ; -- This debug output contains some internal state for debugging - O_halted: OUT std_logic; - O_DBG:out std_logic_vector(63 downto 0) - ); + O_halted : out std_logic; + O_DBG : out std_logic_vector(63 downto 0) + ); end core; architecture Behavioral of core is - COMPONENT pc_unit - PORT( - I_clk : IN std_logic; - I_nPC : IN std_logic_vector(XLENM1 downto 0); - I_nPCop : IN std_logic_vector(1 downto 0); - I_intVec: IN std_logic; - O_PC : OUT std_logic_vector(XLENM1 downto 0) + component pc_unit + port ( + I_clk : in std_logic; + I_nPC : in std_logic_vector(XLENM1 downto 0); + I_nPCop : in std_logic_vector(1 downto 0); + I_intVec : in std_logic; + O_PC : out std_logic_vector(XLENM1 downto 0) ); - END COMPONENT; - - COMPONENT control_unit - PORT ( - I_clk : in STD_LOGIC; - I_halt: in STD_LOGIC; - I_reset : in STD_LOGIC; - I_aluop : in STD_LOGIC_VECTOR (6 downto 0); - O_state : out STD_LOGIC_VECTOR (6 downto 0); - - I_int: in STD_LOGIC; - O_int_ack: out STD_LOGIC; - - I_int_enabled: in STD_LOGIC; - I_int_mem_data: in STD_LOGIC_VECTOR(XLENM1 downto 0); - O_idata: out STD_LOGIC_VECTOR(XLENM1 downto 0); - O_set_idata:out STD_LOGIC; - O_set_ipc: out STD_LOGIC; - O_set_irpc: out STD_LOGIC; - O_instTick: out STD_LOGIC; - - I_ready: in STD_LOGIC; - O_execute: out STD_LOGIC; - I_dataReady: in STD_LOGIC - ); - END COMPONENT; - - - COMPONENT decoder_RV32 - PORT( - I_clk : IN std_logic; - I_en : IN std_logic; - I_dataInst : IN std_logic_vector(31 downto 0); - O_selRS1 : OUT std_logic_vector(4 downto 0); - O_selRS2 : OUT std_logic_vector(4 downto 0); - O_selD : OUT std_logic_vector(4 downto 0); - O_dataIMM : OUT std_logic_vector(31 downto 0); - O_regDwe : OUT std_logic; - O_aluOp : OUT std_logic_vector(6 downto 0); - O_aluFunc : OUT std_logic_vector(15 downto 0); - O_memOp : out STD_LOGIC_VECTOR(4 downto 0); - O_csrOP : out STD_LOGIC_VECTOR(4 downto 0); - O_csrAddr : out STD_LOGIC_VECTOR(11 downto 0); - O_trapExit: out STD_LOGIC; - O_int : out STD_LOGIC; - O_int_data : out STD_LOGIC_VECTOR (31 downto 0); - I_int_ack: in STD_LOGIC - ); - END COMPONENT; - - - component alu_RV32I is - Port ( I_clk : in STD_LOGIC; - I_en : in STD_LOGIC; - I_dataA : in STD_LOGIC_VECTOR (XLEN32M1 downto 0); - I_dataB : in STD_LOGIC_VECTOR (XLEN32M1 downto 0); - I_dataDwe : in STD_LOGIC; - I_aluop : in STD_LOGIC_VECTOR (4 downto 0); - I_aluFunc : in STD_LOGIC_VECTOR (15 downto 0); - I_PC : in STD_LOGIC_VECTOR (XLEN32M1 downto 0); - I_epc : in STD_LOGIC_VECTOR (XLENM1 downto 0); - I_dataIMM : in STD_LOGIC_VECTOR (XLEN32M1 downto 0); - O_dataResult : out STD_LOGIC_VECTOR (XLEN32M1 downto 0); - O_branchTarget : out STD_LOGIC_VECTOR (XLEN32M1 downto 0); - O_dataWriteReg : out STD_LOGIC; - O_lastPC: out STD_LOGIC_VECTOR(XLEN32M1 downto 0); - O_shouldBranch : out std_logic - - ); end component; - - COMPONENT register_set - PORT( - I_clk : IN std_logic; - I_en : IN std_logic; - I_dataD : IN std_logic_vector(31 downto 0); - I_selRS1 : IN std_logic_vector(4 downto 0); - I_selRS2 : IN std_logic_vector(4 downto 0); - I_selD : IN std_logic_vector(4 downto 0); - I_we : IN std_logic; - O_dataA : OUT std_logic_vector(31 downto 0); - O_dataB : OUT std_logic_vector(31 downto 0) - ); - END COMPONENT; - - COMPONENT csr_unit - PORT ( - I_clk : in STD_LOGIC; - I_en : in STD_LOGIC; - - I_dataIn : in STD_LOGIC_VECTOR(XLENM1 downto 0); - O_dataOut : out STD_LOGIC_VECTOR(XLENM1 downto 0); - - I_csrOp : in STD_LOGIC_VECTOR (4 downto 0); - I_csrAddr : in STD_LOGIC_VECTOR (11 downto 0); - - -- This unit can raise exceptions - O_int : out STD_LOGIC; - O_int_data : out STD_LOGIC_VECTOR (31 downto 0); - - I_instRetTick : in STD_LOGIC; - - - -- interrupt handling causes many data dependencies - -- mcause has a fast path in from other units - I_int_cause: in STD_LOGIC_VECTOR (XLENM1 downto 0); - I_int_pc: in STD_LOGIC_VECTOR (XLENM1 downto 0); - -- We need to know when an interrupt occurs as to perform the - -- relevant csr modifications. Same with exit. - I_int_entry: IN STD_LOGIC; - I_int_exit: IN STD_LOGIC; - -- Currently just feeds machine level CSR values - O_csr_status : out STD_LOGIC_VECTOR (XLENM1 downto 0); - O_csr_cause : out STD_LOGIC_VECTOR (XLENM1 downto 0); - O_csr_ie : out STD_LOGIC_VECTOR (XLENM1 downto 0); - O_csr_tvec : out STD_LOGIC_VECTOR (XLENM1 downto 0); - O_csr_epc : out STD_LOGIC_VECTOR (XLENM1 downto 0) - ); - END COMPONENT; - - COMPONENT lint_unit - PORT ( - I_clk : in STD_LOGIC; - I_reset : in STD_LOGIC; - I_nextPc : in STD_LOGIC_VECTOR (31 downto 0); - I_enMask : in STD_LOGIC_VECTOR (3 downto 0); - I_pc : in STD_LOGIC_VECTOR (31 downto 0); - I_int0 : in STD_LOGIC; - I_int_data0 : in STD_LOGIC_VECTOR (31 downto 0); - O_int0_ack: out STD_LOGIC; - I_int1 : in STD_LOGIC; - I_int_data1 : in STD_LOGIC_VECTOR (31 downto 0); - O_int1_ack: out STD_LOGIC; - I_int2 : in STD_LOGIC; - I_int_data2 : in STD_LOGIC_VECTOR (31 downto 0); - O_int2_ack: out STD_LOGIC; - I_int3 : in STD_LOGIC; - I_int_data3 : in STD_LOGIC_VECTOR (31 downto 0); - O_int3_ack: out STD_LOGIC; - O_int : out STD_LOGIC; - O_int_data : out STD_LOGIC_VECTOR (31 downto 0); - O_int_epc : out STD_LOGIC_VECTOR (31 downto 0) - ); - END COMPONENT; - - COMPONENT mem_controller - PORT( - I_clk : IN std_logic; - I_reset : IN std_logic; - O_ready : OUT std_logic; - I_execute : IN std_logic; - I_dataWe : IN std_logic; - I_address : IN std_logic_vector(XLENM1 downto 0); - I_data : IN std_logic_vector(XLENM1 downto 0); - I_dataByteEn : IN std_logic_vector(1 downto 0); - I_signExtend : in STD_LOGIC; - O_data : OUT std_logic_vector(XLENM1 downto 0); - O_dataReady : OUT std_logic; - MEM_I_ready : IN std_logic; - MEM_O_cmd : OUT std_logic; - MEM_O_we : OUT std_logic; - MEM_O_byteEnable : OUT std_logic_vector(1 downto 0); - MEM_O_addr : OUT std_logic_vector(XLENM1 downto 0); - MEM_O_data : OUT std_logic_vector(XLENM1 downto 0); - MEM_I_data : IN std_logic_vector(XLENM1 downto 0); - MEM_I_dataReady : IN std_logic - ); - END COMPONENT; + component control_unit + port ( + I_clk : in STD_LOGIC; + I_halt : in STD_LOGIC; + I_reset : in STD_LOGIC; + I_aluop : in STD_LOGIC_VECTOR (6 downto 0); + O_state : out STD_LOGIC_VECTOR (6 downto 0); - + I_int : in STD_LOGIC; + O_int_ack : out STD_LOGIC; + + I_int_enabled : in STD_LOGIC; + I_int_mem_data : in STD_LOGIC_VECTOR(XLENM1 downto 0); + O_idata : out STD_LOGIC_VECTOR(XLENM1 downto 0); + O_set_idata : out STD_LOGIC; + O_set_ipc : out STD_LOGIC; + O_set_irpc : out STD_LOGIC; + O_instTick : out STD_LOGIC; + + I_misalignment : in STD_LOGIC; + I_ready : in STD_LOGIC; + O_execute : out STD_LOGIC; + I_dataReady : in STD_LOGIC; + I_aluMultiCy : in STD_LOGIC; + I_aluWait : in STD_LOGIC + ); + end component; + component decoder_RV32 + port ( + I_clk : in std_logic; + I_en : in std_logic; + I_dataInst : in std_logic_vector(31 downto 0); + O_selRS1 : out std_logic_vector(4 downto 0); + O_selRS2 : out std_logic_vector(4 downto 0); + O_selD : out std_logic_vector(4 downto 0); + O_dataIMM : out std_logic_vector(31 downto 0); + O_regDwe : out std_logic; + O_aluOp : out std_logic_vector(6 downto 0); + O_aluFunc : out std_logic_vector(15 downto 0); + O_memOp : out STD_LOGIC_VECTOR(4 downto 0); + O_csrOP : out STD_LOGIC_VECTOR(4 downto 0); + O_csrAddr : out STD_LOGIC_VECTOR(11 downto 0); + O_trapExit : out STD_LOGIC; + O_multycyAlu : out STD_LOGIC; + O_int : out STD_LOGIC; + O_int_data : out STD_LOGIC_VECTOR (31 downto 0); + I_int_ack : in STD_LOGIC + ); + end component; + component alu_RV32I is + port ( + I_clk : in STD_LOGIC; + I_en : in STD_LOGIC; + I_dataA : in STD_LOGIC_VECTOR (XLEN32M1 downto 0); + I_dataB : in STD_LOGIC_VECTOR (XLEN32M1 downto 0); + I_dataDwe : in STD_LOGIC; + I_aluop : in STD_LOGIC_VECTOR (4 downto 0); + I_aluFunc : in STD_LOGIC_VECTOR (15 downto 0); + I_PC : in STD_LOGIC_VECTOR (XLEN32M1 downto 0); + I_epc : in STD_LOGIC_VECTOR (XLENM1 downto 0); + I_dataIMM : in STD_LOGIC_VECTOR (XLEN32M1 downto 0); + I_clear : in STD_LOGIC; + O_dataResult : out STD_LOGIC_VECTOR (XLEN32M1 downto 0); + O_branchTarget : out STD_LOGIC_VECTOR (XLEN32M1 downto 0); + O_dataWriteReg : out STD_LOGIC; + O_lastPC : out STD_LOGIC_VECTOR(XLEN32M1 downto 0); + O_shouldBranch : out std_logic; + O_wait : out std_logic + + ); + end component; + + component register_set + port ( + I_clk : in std_logic; + I_en : in std_logic; + I_dataD : in std_logic_vector(31 downto 0); + I_selRS1 : in std_logic_vector(4 downto 0); + I_selRS2 : in std_logic_vector(4 downto 0); + I_selD : in std_logic_vector(4 downto 0); + I_we : in std_logic; + O_dataA : out std_logic_vector(31 downto 0); + O_dataB : out std_logic_vector(31 downto 0) + ); + end component; + + component csr_unit + port ( + I_clk : in STD_LOGIC; + I_en : in STD_LOGIC; + + I_dataIn : in STD_LOGIC_VECTOR(XLENM1 downto 0); + O_dataOut : out STD_LOGIC_VECTOR(XLENM1 downto 0); + + I_csrOp : in STD_LOGIC_VECTOR (4 downto 0); + I_csrAddr : in STD_LOGIC_VECTOR (11 downto 0); + + -- This unit can raise exceptions + O_int : out STD_LOGIC; + O_int_data : out STD_LOGIC_VECTOR (31 downto 0); + + I_instRetTick : in STD_LOGIC; + -- interrupt handling causes many data dependencies + -- mcause has a fast path in from other units + I_int_cause : in STD_LOGIC_VECTOR (XLENM1 downto 0); + I_int_pc : in STD_LOGIC_VECTOR (XLENM1 downto 0); + I_int_mtval : in STD_LOGIC_VECTOR (XLENM1 downto 0); + -- We need to know when an interrupt occurs as to perform the + -- relevant csr modifications. Same with exit. + I_int_entry : in STD_LOGIC; + I_int_exit : in STD_LOGIC; + + -- Currently just feeds machine level CSR values + O_csr_status : out STD_LOGIC_VECTOR (XLENM1 downto 0); + O_csr_cause : out STD_LOGIC_VECTOR (XLENM1 downto 0); + O_csr_ie : out STD_LOGIC_VECTOR (XLENM1 downto 0); + O_csr_tvec : out STD_LOGIC_VECTOR (XLENM1 downto 0); + O_csr_epc : out STD_LOGIC_VECTOR (XLENM1 downto 0) + ); + end component; + + component lint_unit + port ( + I_clk : in STD_LOGIC; + I_reset : in STD_LOGIC; + I_nextPc : in STD_LOGIC_VECTOR (31 downto 0); + I_enMask : in STD_LOGIC_VECTOR (3 downto 0); + I_pc : in STD_LOGIC_VECTOR (31 downto 0); + I_int0 : in STD_LOGIC; + I_int_data0 : in STD_LOGIC_VECTOR (31 downto 0); + O_int0_ack : out STD_LOGIC; + I_int1 : in STD_LOGIC; + I_int_data1 : in STD_LOGIC_VECTOR (31 downto 0); + O_int1_ack : out STD_LOGIC; + I_int2 : in STD_LOGIC; + I_int_data2 : in STD_LOGIC_VECTOR (31 downto 0); + O_int2_ack : out STD_LOGIC; + I_int3 : in STD_LOGIC; + I_int_data3 : in STD_LOGIC_VECTOR (31 downto 0); + O_int3_ack : out STD_LOGIC; + O_int : out STD_LOGIC; + O_int_data : out STD_LOGIC_VECTOR (31 downto 0); + O_int_epc : out STD_LOGIC_VECTOR (31 downto 0) + ); + end component; + + component mem_controller + port ( + I_clk : in std_logic; + I_reset : in std_logic; + O_ready : out std_logic; + I_execute : in std_logic; + I_dataWe : in std_logic; + I_address : in std_logic_vector(XLENM1 downto 0); + I_data : in std_logic_vector(XLENM1 downto 0); + I_dataByteEn : in std_logic_vector(1 downto 0); + I_signExtend : in STD_LOGIC; + O_data : out std_logic_vector(XLENM1 downto 0); + O_dataReady : out std_logic; + MEM_I_ready : in std_logic; + MEM_O_cmd : out std_logic; + MEM_O_we : out std_logic; + MEM_O_byteEnable : out std_logic_vector(1 downto 0); + MEM_O_addr : out std_logic_vector(XLENM1 downto 0); + MEM_O_data : out std_logic_vector(XLENM1 downto 0); + MEM_I_data : in std_logic_vector(XLENM1 downto 0); + MEM_I_dataReady : in std_logic + ); + end component; signal state : std_logic_vector(6 downto 0) := (others => '0'); - - - signal pcop: std_logic_vector(1 downto 0); - signal in_pc: std_logic_vector(XLENM1 downto 0); - - signal aluFunc: std_logic_vector(15 downto 0); - signal memOp: std_logic_vector(4 downto 0); - - signal branchTarget:std_logic_vector(XLENM1 downto 0) := (others => '0'); - + signal pcop : std_logic_vector(1 downto 0); + signal in_pc : std_logic_vector(XLENM1 downto 0); + + signal aluFunc : std_logic_vector(15 downto 0); + signal memOp : std_logic_vector(4 downto 0); + signal branchTarget : std_logic_vector(XLENM1 downto 0) := (others => '0'); + signal instruction : std_logic_vector(XLENM1 downto 0) := (others => '0'); signal dataA : std_logic_vector(XLENM1 downto 0) := (others => '0'); signal dataB : std_logic_vector(XLENM1 downto 0) := (others => '0'); @@ -255,26 +252,26 @@ architecture Behavioral of core is signal selRS1 : std_logic_vector(4 downto 0) := (others => '0'); signal selRS2 : std_logic_vector(4 downto 0) := (others => '0'); signal selD : std_logic_vector(4 downto 0) := (others => '0'); - signal dataregWrite: std_logic := '0'; + signal dataregWrite : std_logic := '0'; signal dataResult : std_logic_vector(XLENM1 downto 0) := (others => '0'); + signal latchedDataResult : std_logic_vector(XLENM1 downto 0) := (others => '0'); signal dataWriteReg : std_logic := '0'; signal shouldBranch : std_logic := '0'; signal memMode : std_logic := '0'; signal ram_req_size : std_logic := '0'; - - - signal decoder_int: STD_LOGIC; - signal decoder_int_data: STD_LOGIC_VECTOR(XLENM1 downto 0); - signal decoder_int_ack: STD_LOGIC := '0'; - signal decoder_trap_exit: STD_LOGIC := '0'; - - - signal reg_en: std_logic := '0'; - signal reg_we: std_logic := '0'; - - signal registerWriteData : std_logic_vector(XLENM1 downto 0) := (others=>'0'); - signal alu_or_csr_output : std_logic_vector(XLENM1 downto 0) := (others=>'0'); - + signal alu_wait : std_logic := '0'; + signal alutobemulticycle : std_logic := '0'; + + signal decoder_int : STD_LOGIC; + signal decoder_int_data : STD_LOGIC_VECTOR(XLENM1 downto 0); + signal decoder_int_ack : STD_LOGIC := '0'; + signal decoder_trap_exit : STD_LOGIC := '0'; + signal reg_en : std_logic := '0'; + signal reg_we : std_logic := '0'; + + signal registerWriteData : std_logic_vector(XLENM1 downto 0) := (others => '0'); + signal alu_or_csr_output : std_logic_vector(XLENM1 downto 0) := (others => '0'); + signal en_fetch : std_logic := '0'; signal en_decode : std_logic := '0'; signal en_alu : std_logic := '0'; @@ -282,41 +279,41 @@ architecture Behavioral of core is signal en_memory : std_logic := '0'; signal en_regwrite : std_logic := '0'; signal en_stall : std_logic := '0'; - + signal PC : std_logic_vector(XLENM1 downto 0) := (others => '0'); signal PC_at_int : std_logic_vector(XLENM1 downto 0) := (others => '0'); signal lastPC_dec : std_logic_vector(XLENM1 downto 0) := (others => '0'); signal lastPC_alu : std_logic_vector(XLENM1 downto 0) := (others => '0'); signal nextPC_stall : std_logic_vector(XLENM1 downto 0) := (others => '0'); - - signal memctl_ready : std_logic; - signal memctl_execute : std_logic := '0'; - signal memctl_dataWe : std_logic; - signal memctl_address : std_logic_vector(XLENM1 downto 0); - signal memctl_in_data : std_logic_vector(XLENM1 downto 0); - signal memctl_dataByteEn : std_logic_vector(1 downto 0); - signal memctl_out_data : std_logic_vector(XLENM1 downto 0) := (others => '0'); - signal memctl_dataReady : std_logic := '0'; + signal mtval : std_logic_vector(XLENM1 downto 0) := (others => '0'); + signal memctl_ready : std_logic; + signal memctl_execute : std_logic := '0'; + signal memctl_dataWe : std_logic; + signal memctl_address : std_logic_vector(XLENM1 downto 0); + signal memctl_in_data : std_logic_vector(XLENM1 downto 0); + signal memctl_dataByteEn : std_logic_vector(1 downto 0); + signal memctl_out_data : std_logic_vector(XLENM1 downto 0) := (others => '0'); + signal memctl_dataReady : std_logic := '0'; signal memctl_size : std_logic_vector(1 downto 0); - signal memctl_signExtend: std_logic := '0'; - - signal PCintVec: STD_LOGIC := '0'; - - signal int_idata: STD_LOGIC_VECTOR(XLENM1 downto 0); - signal int_set_idata: STD_LOGIC; - signal int_enabled: std_logic := '1'; --'0'; - signal int_set_irpc: STD_LOGIC; - - signal I_int_entry: STD_LOGIC := '0'; - signal I_int_exit: STD_LOGIC := '0'; - - signal csru_int: STD_LOGIC; - signal csru_int_data: STD_LOGIC_VECTOR(XLENM1 downto 0); - signal csru_int_ack: STD_LOGIC := '0'; - + signal memctl_signExtend : std_logic := '0'; + + signal PCintVec : STD_LOGIC := '0'; + + signal int_idata : STD_LOGIC_VECTOR(XLENM1 downto 0); + signal int_set_idata : STD_LOGIC; + signal int_enabled : std_logic := '1'; + signal int_set_irpc : STD_LOGIC; + + signal I_int_entry : STD_LOGIC := '0'; + signal I_int_exit : STD_LOGIC := '0'; + + signal csru_int : STD_LOGIC; + signal csru_int_data : STD_LOGIC_VECTOR(XLENM1 downto 0); + signal csru_int_ack : STD_LOGIC := '0'; + signal csru_dataIn : STD_LOGIC_VECTOR(XLENM1 downto 0); signal csru_dataOut : STD_LOGIC_VECTOR(XLENM1 downto 0); - + signal csru_csrOp : STD_LOGIC_VECTOR (4 downto 0); signal csru_csrAddr : STD_LOGIC_VECTOR (11 downto 0); @@ -325,293 +322,345 @@ architecture Behavioral of core is -- Some CSRs are needed in various places easily, so they are distributed signal csr_status : STD_LOGIC_VECTOR(XLENM1 downto 0); signal csr_tvec : STD_LOGIC_VECTOR(XLENM1 downto 0); - signal csr_cause : STD_LOGIC_VECTOR (XLENM1 downto 0); - signal csr_ie : STD_LOGIC_VECTOR (XLENM1 downto 0); - signal csr_epc : STD_LOGIC_VECTOR (XLENM1 downto 0); - - - signal core_clock:STD_LOGIC := '0'; + signal csr_cause : STD_LOGIC_VECTOR (XLENM1 downto 0); + signal csr_ie : STD_LOGIC_VECTOR (XLENM1 downto 0); + signal csr_epc : STD_LOGIC_VECTOR (XLENM1 downto 0); + signal core_clock : STD_LOGIC := '0'; + + signal lint_reset : STD_LOGIC := '0'; + + signal misalign_hint : STD_LOGIC := '0'; -- a signal that is early for use by the control unit to stop the next fetch + signal misalign_branch_hint : STD_LOGIC := '0'; -- a signal that is early for use by the control unit to stop the next fetch + signal misalign_mem_hint : STD_LOGIC := '0'; -- a signal that is early for use by the control unit + + signal misalign_int : STD_LOGIC := '0'; + signal misalign_int_data : STD_LOGIC_VECTOR(XLENM1 downto 0) := (others => '0'); + signal misalign_int_ack : STD_LOGIC := '0'; + signal lint_int : STD_LOGIC; + signal lint_int_data : STD_LOGIC_VECTOR(XLENM1 downto 0); + + signal lint_enable_mask : STD_LOGIC_VECTOR (3 downto 0) := (others => '0'); - signal lint_reset: STD_LOGIC := '0'; - signal lint_nothing: STD_LOGIC := '0'; - signal lint_nothing_data: STD_LOGIC_VECTOR(XLENM1 downto 0):= (others => '0'); - signal lint_int: STD_LOGIC; - signal lint_int_data: STD_LOGIC_VECTOR(XLENM1 downto 0); - - signal lint_enable_mask : STD_LOGIC_VECTOR (3 downto 0):= (others => '0'); - signal external_int_ack : STD_LOGIC := '0'; - - signal dbg_data_line: STD_LOGIC_VECTOR(XLENM1 downto 0); - - signal is_illegal :std_logic:='0'; - - signal should_halt: STD_LOGIC := '0'; + + signal dbg_data_line : STD_LOGIC_VECTOR(XLENM1 downto 0); + + signal is_illegal : std_logic := '0'; + + signal should_halt : STD_LOGIC := '0'; begin - - should_halt <= I_halt; + + should_halt <= I_halt; O_halted <= should_halt; - core_clock <= I_clk; - - memctl: mem_controller PORT MAP ( - I_clk => I_clk, - I_reset => I_reset, - - O_ready => memctl_ready, - I_execute => memctl_execute, - I_dataWe => memctl_dataWe, - I_address => memctl_address, - I_data => memctl_in_data, - I_dataByteEn => memctl_dataByteEn, - I_signExtend => memctl_signExtend, - O_data => memctl_out_data, - O_dataReady => memctl_dataReady, - - MEM_I_ready => MEM_I_ready, - MEM_O_cmd => MEM_O_cmd, - MEM_O_we => MEM_O_we, - MEM_O_byteEnable => MEM_O_byteEnable, - MEM_O_addr => MEM_O_addr, - MEM_O_data => MEM_O_data, - MEM_I_data => MEM_I_data, - MEM_I_dataReady => MEM_I_dataReady - ); + core_clock <= I_clk; - pcunit: pc_unit Port map ( - I_clk => core_clock, - I_nPC => in_pc, - I_nPCop => pcop, - I_intVec => PCintVec, - O_PC => PC - ); + memctl : mem_controller port map( + I_clk => I_clk, + I_reset => I_reset, - control: control_unit PORT MAP ( - I_clk => core_clock, - I_reset => I_reset, - I_halt => should_halt, - I_aluop => aluop, - - I_int => lint_int, - O_int_ack => lint_reset, - I_int_enabled => int_enabled, - I_int_mem_data=> lint_int_data, - O_idata=> int_idata, - O_set_idata=> int_set_idata, - O_set_ipc=> PCintVec, - O_set_irpc => int_set_irpc, - O_instTick => csru_instRetTick, - I_ready => memctl_ready, - O_execute => memctl_execute, - I_dataReady => memctl_dataReady, - O_state => state - ); - - decoder: decoder_RV32 PORT MAP ( - I_clk => core_clock, - I_en => en_decode, - I_dataInst => instruction, - O_selRS1 => selRS1, - O_selRS2 => selRS2, - O_selD => selD, - O_dataIMM => dataIMM, - O_regDwe => dataDwe, - O_aluOp => aluOp, - O_aluFunc => aluFunc, - O_memOp => memOp, - O_csrOp => csru_csrOp, - O_csrAddr => csru_csrAddr, - O_trapExit => decoder_trap_exit, - -- This unit can raise exceptions - O_int => decoder_int, - O_int_data => decoder_int_data, - I_int_ack => decoder_int_ack - ); - - alu: alu_RV32I PORT MAP ( - I_clk => core_clock, - I_en => en_alu, - I_dataA => dataA, - I_dataB => dataB, - I_dataDwe => dataDwe, - I_aluop => aluop(6 downto 2), - I_aluFunc => aluFunc, - I_PC => PC, - I_epc => csr_epc, - I_dataIMM => dataIMM, - O_dataResult => dataResult, - O_branchTarget => branchTarget, - O_dataWriteReg => dataWriteReg, - O_lastPC => lastPC_alu, - O_shouldBranch => shouldBranch - ); - - reg: register_set PORT MAP ( - I_clk => core_clock, - I_en => reg_en, - I_dataD => registerWriteData, - O_dataA => dataA, - O_dataB => dataB, - I_selRS1 => selRS1, - I_selRS2 => selRS2, - I_selD => selD, - I_we => reg_we - ); - - csru: csr_unit PORT MAP ( + O_ready => memctl_ready, + I_execute => memctl_execute, + I_dataWe => memctl_dataWe, + I_address => memctl_address, + I_data => memctl_in_data, + I_dataByteEn => memctl_dataByteEn, + I_signExtend => memctl_signExtend, + O_data => memctl_out_data, + O_dataReady => memctl_dataReady, + + MEM_I_ready => MEM_I_ready, + MEM_O_cmd => MEM_O_cmd, + MEM_O_we => MEM_O_we, + MEM_O_byteEnable => MEM_O_byteEnable, + MEM_O_addr => MEM_O_addr, + MEM_O_data => MEM_O_data, + MEM_I_data => MEM_I_data, + MEM_I_dataReady => MEM_I_dataReady + ); + + pcunit : pc_unit port map( + I_clk => core_clock, + I_nPC => in_pc, + I_nPCop => pcop, + I_intVec => PCintVec, + O_PC => PC + ); + + control : control_unit port map( + I_clk => core_clock, + I_reset => I_reset, + I_halt => should_halt, + I_aluop => aluop, + + I_int => lint_int, + O_int_ack => lint_reset, + I_int_enabled => int_enabled, + I_int_mem_data => lint_int_data, + O_idata => int_idata, + O_set_idata => int_set_idata, + O_set_ipc => PCintVec, + O_set_irpc => int_set_irpc, + O_instTick => csru_instRetTick, + I_misalignment => misalign_hint, + I_ready => memctl_ready, + O_execute => memctl_execute, + I_dataReady => memctl_dataReady, + I_aluWait => alu_wait, + I_aluMultiCy => alutobemulticycle, + O_state => state + + ); + + decoder : decoder_RV32 port map( + I_clk => core_clock, + I_en => en_decode, + I_dataInst => instruction, + O_selRS1 => selRS1, + O_selRS2 => selRS2, + O_selD => selD, + O_dataIMM => dataIMM, + O_regDwe => dataDwe, + O_aluOp => aluOp, + O_aluFunc => aluFunc, + O_memOp => memOp, + O_csrOp => csru_csrOp, + O_csrAddr => csru_csrAddr, + O_trapExit => decoder_trap_exit, + O_multycyAlu => alutobemulticycle, + -- This unit can raise exceptions + O_int => decoder_int, + O_int_data => decoder_int_data, + I_int_ack => decoder_int_ack + ); + + alu : alu_RV32I port map( + I_clk => core_clock, + I_en => en_alu, + I_dataA => dataA, + I_dataB => dataB, + I_dataDwe => dataDwe, + I_aluop => aluop(6 downto 2), + I_aluFunc => aluFunc, + I_PC => PC, + I_epc => csr_epc, + I_dataIMM => dataIMM, + I_clear => misalign_int, + O_dataResult => dataResult, + O_branchTarget => branchTarget, + O_dataWriteReg => dataWriteReg, + O_lastPC => lastPC_alu, + O_shouldBranch => shouldBranch, + O_wait => alu_wait + ); + + reg : register_set port map( + I_clk => core_clock, + I_en => reg_en, + I_dataD => registerWriteData, + O_dataA => dataA, + O_dataB => dataB, + I_selRS1 => selRS1, + I_selRS2 => selRS2, + I_selD => selD, + I_we => reg_we + ); + + csru : csr_unit port map( I_clk => core_clock, I_en => en_csru, - + I_dataIn => csru_dataIn, O_dataOut => csru_dataOut, - + I_csrOp => csru_csrOp, I_csrAddr => csru_csrAddr, - + -- This unit can raise exceptions O_int => csru_int, O_int_data => csru_int_data, --I_int_ack => csru_int_ack, - + I_instRetTick => csru_instRetTick, - + I_int_cause => lint_int_data, I_int_pc => PC_at_int, - + I_int_mtval => mtval, + I_int_entry => I_int_entry, I_int_exit => I_int_exit, - + O_csr_status => csr_status, O_csr_tvec => csr_tvec, O_csr_cause => csr_cause, O_csr_ie => csr_ie, O_csr_epc => csr_epc ); - - lint: lint_unit PORT MAP ( + + lint : lint_unit port map( I_clk => core_clock, I_reset => lint_reset, I_nextPc => nextPC_stall, - + I_enMask => lint_enable_mask, I_pc => lastPC_dec, - + I_int0 => decoder_int, I_int_data0 => decoder_int_data, O_int0_ack => decoder_int_ack, - + I_int1 => csru_int, I_int_data1 => csru_int_data, O_int1_ack => csru_int_ack, - + I_int2 => I_int, I_int_data2 => I_int_data, O_int2_ack => external_int_ack, - - - I_int3 => lint_nothing, - I_int_data3 => lint_nothing_data, - + I_int3 => misalign_int, -- this should be used for misaligned jump and misaligned memory op + I_int_data3 => misalign_int_data, + O_int3_ack => misalign_int_ack, + O_int => lint_int, O_int_data => lint_int_data--, - -- O_int_epc => PC_at_int + -- O_int_epc => PC_at_int ); -O_int_ack <= external_int_ack; + O_int_ack <= external_int_ack; + + + state_latcher : process (core_clock) + begin + if rising_edge(core_clock) then + if en_decode = '1' then + lastPC_dec <= PC; + end if; + if state(6) = '1' then + nextPC_stall <= PC; + end if; + if state(0) = '1' then + instruction <= memctl_out_data; + end if; + end if; + end process; - state_latcher: process(core_clock) - begin - if rising_edge(core_clock) then - if en_decode = '1' then - lastPC_dec <= PC; - end if; - if state(6) = '1' then - nextPC_stall <= PC; - end if; - end if; - end process; - -- Register file controls - reg_en <= en_decode or en_regwrite; - reg_we <= dataWriteReg and en_regwrite; - + reg_en <= en_decode or en_regwrite; + reg_we <= dataWriteReg and en_regwrite;-- and not misalign_mem_hint; + -- These are the pipeline stage enable bits - en_fetch <= state(0); - en_decode <= state(1); - en_alu <= state(3); - en_csru <= state(3) when (aluop(6 downto 2) = OPCODE_SYSTEM and aluFunc(2 downto 0) /= "000") else '0'; - en_memory <= state(4); - en_regwrite <= state(5); - en_stall <= state(6); - - -- This decides what the next PC should be - pcop <= PCU_OP_RESET when I_reset = '1' else - PCU_OP_ASSIGN when shouldBranch = '1' and state(5) = '1' else - PCU_OP_INC when shouldBranch = '0' and state(5) = '1' else - PCU_OP_ASSIGN when PCintvec = '1' else - PCU_OP_NOP; - - -- this is lint interrupt enable for consuming the interrupt - -- unused/external/crsu/decoder - -- Only accept external on ALU stage to prevent issues with externals taking decode int's in fetch cycles - -- externals are also programmable via csr register bit - lint_enable_mask <= '0' & (csr_status(3)and state(3)) & '1' & '1'; - + en_fetch <= state(0); + en_decode <= state(1); + en_alu <= state(3); + en_csru <= state(3) when (aluop(6 downto 2) = OPCODE_SYSTEM and aluFunc(2 downto 0) /= "000") else '0'; + en_memory <= state(4); + en_regwrite <= state(5); + en_stall <= state(6); + + -- This decides what the next PC should be + pcop <= PCU_OP_RESET when I_reset = '1' else + PCU_OP_ASSIGN when shouldBranch = '1' and state(5) = '1' else + PCU_OP_INC when shouldBranch = '0' and state(5) = '1' else + PCU_OP_ASSIGN when PCintvec = '1' else + PCU_OP_NOP; + + -- this is lint interrupt enable for consuming the interrupt + -- misalignment/external/crsu/decoder + -- Only accept external on ALU stage to prevent issues with externals taking decode int's in fetch cycles + -- externals are also programmable via csr register bit + lint_enable_mask <= '1' & (csr_status(3)and state(3)) & '1' & '1'; -- interrupts are controlled by mstatus.mie - this is proper control unit acceptance int_enabled <= '1' when (lint_int_data(31) = '0' and lint_int = '1') else csr_status(3); - - PC_at_int <= branchTarget when (shouldBranch = '1' and lint_int_data(31) = '1' and state(6) = '1' and lint_int = '1') else PC when (lint_int_data(31) = '1' and lint_int = '1') else lastPC_dec; + PC_at_int <= branchTarget when (shouldBranch = '1' and lint_int_data(31) = '1' and state(6) = '1' and lint_int = '1') else PC when (lint_int_data(31) = '1' and lint_int = '1') else lastPC_dec; + + -- This tries to find misaligned access issues and forward data to the LINT + -- theres a hacky thing here in that we ignore misaligned memory ops if the + -- address has first 4 bits set; as this is the mmio space, and I've got some + -- misaligned legacy devices/code in various places + -- additionally, misaligned traps can't handle the latency that the LINT incurs whilst + -- dealing with priorities, so we have hint signals to insert dummy "int stalls" into the pipeline. + misalign_branch_hint <= lint_enable_mask(3) when (I_reset = '0' and misalign_int = '0' and en_regwrite = '1' and shouldBranch = '1' and branchTarget(1 downto 0) /= "00") else '0'; + misalign_mem_hint <= lint_enable_mask(3) when (I_reset = '0' and en_memory = '1' and memctl_address(31 downto 28) /= X"F" and ((memctl_dataByteEn = F2_MEM_LS_SIZE_H and memctl_address(0) = '1') or (memctl_dataByteEn = F2_MEM_LS_SIZE_W and memctl_address(1 downto 0) /= "00"))) else '0'; + misalign_hint <= misalign_branch_hint or misalign_mem_hint; + + misalign_int_finder : process (core_clock) + begin + if rising_edge(core_clock) then + if I_reset = '0' and misalign_int = '0' and en_regwrite = '1' and shouldBranch = '1' and branchTarget(1 downto 0) /= "00" then + -- jump misalign + misalign_int <= lint_enable_mask(3); + misalign_int_data <= EXCEPTION_INSTRUCTION_ADDR_MISALIGNED; + mtval <= branchTarget; + + elsif I_reset = '0' and misalign_int = '0' and en_memory = '1' and memctl_dataByteEn = F2_MEM_LS_SIZE_H and memctl_address(0) = '1' and memctl_address(31 downto 28) /= X"F" then -- dont misalign trap on MMIO (Fxxxxxx addr) + -- half load misalign + misalign_int <= lint_enable_mask(3); + if memctl_dataWe = '0' then + misalign_int_data <= EXCEPTION_LOAD_ADDRESS_MISALIGNED; + else + misalign_int_data <= EXCEPTION_STORE_AMO_ADDRESS_MISALIGNED; + end if; + mtval <= memctl_address; + + elsif I_reset = '0' and misalign_int = '0' and en_memory = '1' and memctl_dataByteEn = F2_MEM_LS_SIZE_W and memctl_address(1 downto 0) /= "00" and memctl_address(31 downto 28) /= X"F" then -- dont misalign trap on MMIO (Fxxxxxx addr) + -- word load misalign + misalign_int <= lint_enable_mask(3); + if memctl_dataWe = '0' then + misalign_int_data <= EXCEPTION_LOAD_ADDRESS_MISALIGNED; + else + misalign_int_data <= EXCEPTION_STORE_AMO_ADDRESS_MISALIGNED; + end if; + mtval <= memctl_address; + + elsif misalign_int = '1' and misalign_int_ack = '1' then + misalign_int <= '0'; + end if; + end if; + end process; + -- On Interrupt service entry, CSRs need some maintenance. -- We need to strobe the CSR unit on this event. I_int_entry <= PCintvec; -- To detect exit, we strobe using the ALU enable with the decoder trap request bit I_int_exit <= decoder_trap_exit and en_alu; - + -- The input PC is just always the branch target output from ALU -- todo: tvec needs modified for vectored exceptions - in_pc <= csr_tvec when PCintvec = '1' else branchTarget; - - -- input data from the register file, or use immediate if the OP specifies it - csru_dataIn <= dataIMM when csru_csrOp(CSR_OP_BITS_IMM) = '1' else dataA; - - --dbg_data_line can be used to aid debugging cpu issues using trace dumps. + in_pc <= csr_tvec when PCintvec = '1' else branchTarget; + + -- input data from the register file, or use immediate if the OP specifies it + csru_dataIn <= dataIMM when csru_csrOp(CSR_OP_BITS_IMM) = '1' else dataA; + + --dbg_data_line can be used to aid debugging cpu issues using trace dumps. --dbg_data_line <= csr_tvec when memctl_execute = '1' else csru_dataIn when en_csru = '1' else registerWriteData when state(5) = '1' else X"000000" & "000" & selD when state(3) = '1' else instruction when state(1)='1' else memctl_address; - --dbg_data_line <= memctl_address when memctl_execute = '1' else MEM_I_data; - dbg_data_line <= X"ABCDEF01" when (decoder_int_data = EXCEPTION_INSTRUCTION_ILLEGAL and X"00000010" = csr_epc ) else csru_dataIn when en_csru = '1' else registerWriteData when state(5) = '1' else X"000000" & "000" & selD when state(3) = '1' else instruction when state(1)='1' else memctl_address; - --dbg_data_line <= PC_at_int;--registerWriteData when state(5) = '1' else X"000000" & "000" & selD when state(3) = '1' else instruction when state(1)='1' else csr_epc when ( lint_reset = '1') else memctl_address; - - is_illegal <= '1' when decoder_int_data = EXCEPTION_INSTRUCTION_ILLEGAL else '0'; - - -- The debug output just allows some internal state to be visible outside the core black box + --dbg_data_line <= memctl_address when memctl_execute = '1' else MEM_I_data; + dbg_data_line <= X"ABCDEF01" when (decoder_int_data = EXCEPTION_INSTRUCTION_ILLEGAL and X"00000010" = csr_epc) else csru_dataIn when en_csru = '1' else registerWriteData when state(5) = '1' else X"000000" & "000" & selD when state(3) = '1' else instruction when state(1) = '1' else memctl_address; + --dbg_data_line <= PC_at_int;--registerWriteData when state(5) = '1' else X"000000" & "000" & selD when state(3) = '1' else instruction when state(1)='1' else csr_epc when ( lint_reset = '1') else memctl_address; + + is_illegal <= '1' when decoder_int_data = EXCEPTION_INSTRUCTION_ILLEGAL else '0'; + + -- The debug output just allows some internal state to be visible outside the core black box -- byte 1 - memctrl&dataready -- byte 2 - dataWriteReg, int_en, lint_reset, lint_int, interrupt_type_ decoder and csru_int -- byte 3 - aluop -- byte 4 - state -- uint32 - data - O_DBG <= "0000" & "00" & memctl_dataReady & MEM_I_dataReady & - -- dataWriteReg & int_enabled & lint_reset & lint_int & lint_int_data(31) & PCintvec & decoder_int & decoder_int_ack &--&csru_int & --I_int & -- - dataWriteReg & int_enabled & lint_reset & lint_int & I_int & external_int_ack & decoder_int & decoder_int_ack &--&csru_int & --I_int & -- - is_illegal & "00" & aluop(6 downto 2) & - "0" & state & - dbg_data_line; - - -- Below statements are for memory interface use. - memctl_address <= dataResult when en_memory = '1' else PC; - ram_req_size <= memMode when en_memory = '1' else '0'; - memctl_dataByteEn <= memctl_size when en_memory = '1' else F2_MEM_LS_SIZE_W; - memctl_in_data <= dataB; - memctl_dataWe <= '1' when en_memory = '1' and memOp(4 downto 3) = "11" else '0'; - memctl_size <= memOp(1 downto 0); - memctl_signExtend <= not memOp(2); - - -- This chooses to write registers with memory data or ALU/csr data - registerWriteData <= memctl_out_data when memOp(4 downto 3) = "10" else dataB when (aluop(6 downto 2) = OPCODE_STORE ) else csru_dataOut when (aluop(6 downto 2) = OPCODE_SYSTEM and aluFunc(2 downto 0) /= "000") else dataResult; - - -- The instructions are delivered from memctl - -- FIXME: The instruction needs LATCHED. Any change to data input at a certain time - -- can confuse the pipeline and get it into an inconsistent state. - instruction <= memctl_out_data; - -end Behavioral; + O_DBG <= "0000" & "0" & memctl_execute & memctl_ready & memctl_dataReady & --alutobemulticycle & alu_wait & -- + -- dataWriteReg & int_enabled & lint_reset & lint_int & lint_int_data(31) & PCintvec & decoder_int & decoder_int_ack &--&csru_int & --I_int & -- + dataWriteReg & int_enabled & lint_reset & lint_int & I_int & external_int_ack & decoder_int & decoder_int_ack & --&csru_int & --I_int & -- + is_illegal & "00" & aluop(6 downto 2) & + "0" & state & + dbg_data_line; + -- Below statements are for memory interface use. + memctl_address <= dataResult when en_memory = '1' else PC; + ram_req_size <= memMode when en_memory = '1' else '0'; + memctl_dataByteEn <= memctl_size when en_memory = '1' else F2_MEM_LS_SIZE_W; + memctl_in_data <= dataB; + memctl_dataWe <= '1' when en_memory = '1' and memOp(4 downto 3) = "11" else '0'; + memctl_size <= memOp(1 downto 0); + memctl_signExtend <= not memOp(2); + + -- This chooses to write registers with memory data or ALU/csr data + registerWriteData <= memctl_out_data when memOp(4 downto 3) = "10" else dataB when (aluop(6 downto 2) = OPCODE_STORE) else csru_dataOut when (aluop(6 downto 2) = OPCODE_SYSTEM and aluFunc(2 downto 0) /= "000") else dataResult; +end Behavioral; \ No newline at end of file diff --git a/vhdl/csr_unit.vhd b/vhdl/csr_unit.vhd index 16ad9aa..bbffcab 100644 --- a/vhdl/csr_unit.vhd +++ b/vhdl/csr_unit.vhd @@ -18,120 +18,118 @@ -- limitations under the License. ---------------------------------------------------------------------------------- library IEEE; -use IEEE.STD_LOGIC_1164.ALL; -use IEEE.NUMERIC_STD.ALL; +use IEEE.STD_LOGIC_1164.all; +use IEEE.NUMERIC_STD.all; library work; use work.constants.all; entity csr_unit is - Port ( I_clk : in STD_LOGIC; - I_en : in STD_LOGIC; - I_dataIn : in STD_LOGIC_VECTOR (XLENM1 downto 0); - O_dataOut : out STD_LOGIC_VECTOR (XLENM1 downto 0); - I_csrOp : in STD_LOGIC_VECTOR (4 downto 0); - I_csrAddr : in STD_LOGIC_VECTOR (11 downto 0); - O_int : out STD_LOGIC; - O_int_data : out STD_LOGIC_VECTOR (31 downto 0); - I_instRetTick : in STD_LOGIC; - - -- interrupt handling causes many data dependencies - -- mcause has a fast path in from other units - I_int_cause: in STD_LOGIC_VECTOR (XLENM1 downto 0); - I_int_pc: in STD_LOGIC_VECTOR (XLENM1 downto 0); - -- We need to know when an interrupt occurs as to perform the - -- relevant csr modifications. Same with exit. - I_int_entry: IN STD_LOGIC; - I_int_exit: IN STD_LOGIC; - - -- Currently just feeds machine level CSR values - O_csr_status : out STD_LOGIC_VECTOR (XLENM1 downto 0); - O_csr_cause : out STD_LOGIC_VECTOR (XLENM1 downto 0); - O_csr_ie : out STD_LOGIC_VECTOR (XLENM1 downto 0); - O_csr_tvec : out STD_LOGIC_VECTOR (XLENM1 downto 0); - O_csr_epc : out STD_LOGIC_VECTOR (XLENM1 downto 0) - ); + port ( + I_clk : in STD_LOGIC; + I_en : in STD_LOGIC; + I_dataIn : in STD_LOGIC_VECTOR (XLENM1 downto 0); + O_dataOut : out STD_LOGIC_VECTOR (XLENM1 downto 0); + I_csrOp : in STD_LOGIC_VECTOR (4 downto 0); + I_csrAddr : in STD_LOGIC_VECTOR (11 downto 0); + O_int : out STD_LOGIC; + O_int_data : out STD_LOGIC_VECTOR (31 downto 0); + I_instRetTick : in STD_LOGIC; + + -- interrupt handling causes many data dependencies + -- mcause has a fast path in from other units + I_int_cause : in STD_LOGIC_VECTOR (XLENM1 downto 0); + I_int_pc : in STD_LOGIC_VECTOR (XLENM1 downto 0); + I_int_mtval : in STD_LOGIC_VECTOR (XLENM1 downto 0); + -- We need to know when an interrupt occurs as to perform the + -- relevant csr modifications. Same with exit. + I_int_entry : in STD_LOGIC; + I_int_exit : in STD_LOGIC; + + -- Currently just feeds machine level CSR values + O_csr_status : out STD_LOGIC_VECTOR (XLENM1 downto 0); + O_csr_cause : out STD_LOGIC_VECTOR (XLENM1 downto 0); + O_csr_ie : out STD_LOGIC_VECTOR (XLENM1 downto 0); + O_csr_tvec : out STD_LOGIC_VECTOR (XLENM1 downto 0); + O_csr_epc : out STD_LOGIC_VECTOR (XLENM1 downto 0) + ); end csr_unit; architecture Behavioral of csr_unit is -constant CSR_ADDR_USTATUS: STD_LOGIC_VECTOR (11 downto 0) := X"000"; -constant CSR_ADDR_UIE: STD_LOGIC_VECTOR (11 downto 0) := X"004"; -constant CSR_ADDR_UTVEC: STD_LOGIC_VECTOR (11 downto 0) := X"005"; + constant CSR_ADDR_USTATUS : STD_LOGIC_VECTOR (11 downto 0) := X"000"; + constant CSR_ADDR_UIE : STD_LOGIC_VECTOR (11 downto 0) := X"004"; + constant CSR_ADDR_UTVEC : STD_LOGIC_VECTOR (11 downto 0) := X"005"; -constant CSR_ADDR_USCRATCH: STD_LOGIC_VECTOR (11 downto 0) := X"040"; -constant CSR_ADDR_UEPC: STD_LOGIC_VECTOR (11 downto 0) := X"041"; -constant CSR_ADDR_UCAUSE: STD_LOGIC_VECTOR (11 downto 0) := X"042"; -constant CSR_ADDR_UTVAL: STD_LOGIC_VECTOR (11 downto 0) := X"043"; -constant CSR_ADDR_UIP: STD_LOGIC_VECTOR (11 downto 0) := X"044"; + constant CSR_ADDR_USCRATCH : STD_LOGIC_VECTOR (11 downto 0) := X"040"; + constant CSR_ADDR_UEPC : STD_LOGIC_VECTOR (11 downto 0) := X"041"; + constant CSR_ADDR_UCAUSE : STD_LOGIC_VECTOR (11 downto 0) := X"042"; + constant CSR_ADDR_UTVAL : STD_LOGIC_VECTOR (11 downto 0) := X"043"; + constant CSR_ADDR_UIP : STD_LOGIC_VECTOR (11 downto 0) := X"044"; -constant CSR_ADDR_CYCLE: STD_LOGIC_VECTOR (11 downto 0) := X"C00"; -constant CSR_ADDR_TIME: STD_LOGIC_VECTOR (11 downto 0) := X"C01"; -constant CSR_ADDR_INSTRET: STD_LOGIC_VECTOR (11 downto 0) := X"C02"; + constant CSR_ADDR_CYCLE : STD_LOGIC_VECTOR (11 downto 0) := X"C00"; + constant CSR_ADDR_TIME : STD_LOGIC_VECTOR (11 downto 0) := X"C01"; + constant CSR_ADDR_INSTRET : STD_LOGIC_VECTOR (11 downto 0) := X"C02"; -constant CSR_ADDR_CYCLEH: STD_LOGIC_VECTOR (11 downto 0) := X"C80"; -constant CSR_ADDR_TIMEH: STD_LOGIC_VECTOR (11 downto 0) := X"C81"; -constant CSR_ADDR_INSTRETH: STD_LOGIC_VECTOR (11 downto 0) := X"C82"; + constant CSR_ADDR_CYCLEH : STD_LOGIC_VECTOR (11 downto 0) := X"C80"; + constant CSR_ADDR_TIMEH : STD_LOGIC_VECTOR (11 downto 0) := X"C81"; + constant CSR_ADDR_INSTRETH : STD_LOGIC_VECTOR (11 downto 0) := X"C82"; + constant CSR_ADDR_TEST_400 : STD_LOGIC_VECTOR (11 downto 0) := X"400"; + constant CSR_ADDR_TEST_401 : STD_LOGIC_VECTOR (11 downto 0) := X"401"; + constant CSR_ADDR_MSTATUS : STD_LOGIC_VECTOR (11 downto 0) := X"300"; + constant CSR_ADDR_MISA : STD_LOGIC_VECTOR (11 downto 0) := X"301"; + constant CSR_ADDR_MEDELEG : STD_LOGIC_VECTOR (11 downto 0) := X"302"; + constant CSR_ADDR_MIDELEG : STD_LOGIC_VECTOR (11 downto 0) := X"303"; + constant CSR_ADDR_MIE : STD_LOGIC_VECTOR (11 downto 0) := X"304"; + constant CSR_ADDR_MTVEC : STD_LOGIC_VECTOR (11 downto 0) := X"305"; + constant CSR_ADDR_MCOUNTEREN : STD_LOGIC_VECTOR (11 downto 0) := X"306"; -constant CSR_ADDR_TEST_400: STD_LOGIC_VECTOR (11 downto 0) := X"400"; -constant CSR_ADDR_TEST_401: STD_LOGIC_VECTOR (11 downto 0) := X"401"; + constant CSR_ADDR_MSCRATCH : STD_LOGIC_VECTOR (11 downto 0) := X"340"; + constant CSR_ADDR_MEPC : STD_LOGIC_VECTOR (11 downto 0) := X"341"; + constant CSR_ADDR_MCAUSE : STD_LOGIC_VECTOR (11 downto 0) := X"342"; + constant CSR_ADDR_MTVAL : STD_LOGIC_VECTOR (11 downto 0) := X"343"; + constant CSR_ADDR_MIP : STD_LOGIC_VECTOR (11 downto 0) := X"344"; -constant CSR_ADDR_MSTATUS: STD_LOGIC_VECTOR (11 downto 0) := X"300"; -constant CSR_ADDR_MISA: STD_LOGIC_VECTOR (11 downto 0) := X"301"; -constant CSR_ADDR_MEDELEG: STD_LOGIC_VECTOR (11 downto 0) := X"302"; -constant CSR_ADDR_MIDELEG: STD_LOGIC_VECTOR (11 downto 0) := X"303"; -constant CSR_ADDR_MIE: STD_LOGIC_VECTOR (11 downto 0) := X"304"; -constant CSR_ADDR_MTVEC: STD_LOGIC_VECTOR (11 downto 0) := X"305"; -constant CSR_ADDR_MCOUNTEREN: STD_LOGIC_VECTOR (11 downto 0) := X"306"; + constant CSR_ADDR_MCYCLE : STD_LOGIC_VECTOR (11 downto 0) := X"B00"; + constant CSR_ADDR_MINSTRET : STD_LOGIC_VECTOR (11 downto 0) := X"B02"; -constant CSR_ADDR_MSCRATCH: STD_LOGIC_VECTOR (11 downto 0) := X"340"; -constant CSR_ADDR_MEPC: STD_LOGIC_VECTOR (11 downto 0) := X"341"; -constant CSR_ADDR_MCAUSE: STD_LOGIC_VECTOR (11 downto 0) := X"342"; -constant CSR_ADDR_MTVAL: STD_LOGIC_VECTOR (11 downto 0) := X"343"; -constant CSR_ADDR_MIP: STD_LOGIC_VECTOR (11 downto 0) := X"344"; + constant CSR_ADDR_MCYCLEH : STD_LOGIC_VECTOR (11 downto 0) := X"B80"; + constant CSR_ADDR_MINSTRETH : STD_LOGIC_VECTOR (11 downto 0) := X"B82"; -constant CSR_ADDR_MCYCLE: STD_LOGIC_VECTOR (11 downto 0) := X"B00"; -constant CSR_ADDR_MINSTRET: STD_LOGIC_VECTOR (11 downto 0) := X"B02"; + constant CSR_ADDR_MVENDORID : STD_LOGIC_VECTOR (11 downto 0) := X"F11"; + constant CSR_ADDR_MARCHID : STD_LOGIC_VECTOR (11 downto 0) := X"F12"; + constant CSR_ADDR_MIMPID : STD_LOGIC_VECTOR (11 downto 0) := X"F13"; + constant CSR_ADDR_MHARDID : STD_LOGIC_VECTOR (11 downto 0) := X"F14"; + signal csr_cycles : STD_LOGIC_VECTOR(63 downto 0) := (others => '0'); + signal csr_instret : STD_LOGIC_VECTOR(63 downto 0) := (others => '0'); -constant CSR_ADDR_MCYCLEH: STD_LOGIC_VECTOR (11 downto 0) := X"B80"; -constant CSR_ADDR_MINSTRETH: STD_LOGIC_VECTOR (11 downto 0) := X"B82"; + signal csr_mstatus : STD_LOGIC_VECTOR (XLENM1 downto 0) := X"00000000"; + signal csr_mie : STD_LOGIC_VECTOR (XLENM1 downto 0) := (others => '0'); + signal csr_mtvec : STD_LOGIC_VECTOR (XLENM1 downto 0) := X"00000004"; + signal csr_mscratch : STD_LOGIC_VECTOR (XLENM1 downto 0) := (others => '0'); + signal csr_mepc : STD_LOGIC_VECTOR (XLENM1 downto 0) := (others => '0'); + signal csr_mcause : STD_LOGIC_VECTOR (XLENM1 downto 0) := (others => '0'); + signal csr_mtval : STD_LOGIC_VECTOR (XLENM1 downto 0) := (others => '0'); + signal csr_mip : STD_LOGIC_VECTOR (XLENM1 downto 0) := (others => '0'); -constant CSR_ADDR_MVENDORID: STD_LOGIC_VECTOR (11 downto 0) := X"F11"; -constant CSR_ADDR_MARCHID: STD_LOGIC_VECTOR (11 downto 0) := X"F12"; -constant CSR_ADDR_MIMPID: STD_LOGIC_VECTOR (11 downto 0) := X"F13"; -constant CSR_ADDR_MHARDID: STD_LOGIC_VECTOR (11 downto 0) := X"F14"; + signal csr_vexrisc_irq_mask : STD_LOGIC_VECTOR (XLENM1 downto 0) := (others => '0'); + signal csr_vexrisc_irq_pending : STD_LOGIC_VECTOR (XLENM1 downto 0) := (others => '0'); --- Will allow some other CSRS to make for easier running of third party sw -constant CSR_ADDR_VEXRISC_IRQ_MASK: STD_LOGIC_VECTOR (11 downto 0) := X"bc0"; -constant CSR_ADDR_VEXRISC_IRQ_PENDING: STD_LOGIC_VECTOR (11 downto 0) := X"fc0"; + signal curr_csr_value : STD_LOGIC_VECTOR(XLENM1 downto 0) := (others => '0'); + signal next_csr_value : STD_LOGIC_VECTOR(XLENM1 downto 0) := (others => '0'); -signal csr_cycles: STD_LOGIC_VECTOR(63 downto 0) := (others => '0'); -signal csr_instret: STD_LOGIC_VECTOR(63 downto 0) := (others => '0'); + signal test0_CSR : STD_LOGIC_VECTOR(XLENM1 downto 0) := X"FEFbbEF0"; + signal test1_CSR : STD_LOGIC_VECTOR(XLENM1 downto 0) := X"FEFbbEF1"; -signal csr_mstatus : STD_LOGIC_VECTOR (XLENM1 downto 0) := X"00000000";-- X"00001800"; -- MIE default 1 -signal csr_mie : STD_LOGIC_VECTOR (XLENM1 downto 0) := (others => '0'); -signal csr_mtvec : STD_LOGIC_VECTOR (XLENM1 downto 0) := X"00000004";-- X"00000010"; -signal csr_mscratch : STD_LOGIC_VECTOR (XLENM1 downto 0) := (others => '0'); -signal csr_mepc : STD_LOGIC_VECTOR (XLENM1 downto 0) := (others => '0'); -signal csr_mcause : STD_LOGIC_VECTOR (XLENM1 downto 0) := (others => '0'); -signal csr_mtval : STD_LOGIC_VECTOR (XLENM1 downto 0) := (others => '0'); -signal csr_mip : STD_LOGIC_VECTOR (XLENM1 downto 0) := (others => '0'); + signal csr_op : STD_LOGIC_VECTOR(4 downto 0) := (others => '0'); + signal opState : integer := 0; -signal csr_vexrisc_irq_mask : STD_LOGIC_VECTOR (XLENM1 downto 0) := (others => '0'); -signal csr_vexrisc_irq_pending : STD_LOGIC_VECTOR (XLENM1 downto 0) := (others => '0'); - -signal curr_csr_value: STD_LOGIC_VECTOR(XLENM1 downto 0) := (others=> '0'); -signal next_csr_value: STD_LOGIC_VECTOR(XLENM1 downto 0) := (others=> '0'); - -signal test0_CSR: STD_LOGIC_VECTOR(XLENM1 downto 0) := X"FEFbbEF0"; -signal test1_CSR: STD_LOGIC_VECTOR(XLENM1 downto 0) := X"FEFbbEF1"; - -signal csr_op: STD_LOGIC_VECTOR(4 downto 0) := (others=>'0'); -signal opState: integer := 0; - -signal raise_int: std_logic := '0'; + signal raise_int : std_logic := '0'; + constant STEP_READ_OR_IDLE : integer := 0; + constant STEP_MODIFY : integer := 1; + constant STEP_WRITE : integer := 2; begin O_int <= raise_int; @@ -141,45 +139,45 @@ begin O_csr_cause <= csr_mcause; O_csr_ie <= csr_mie; O_csr_epc <= csr_mepc; - + O_dataOut <= curr_csr_value; - cycles: process (I_clk) + cycles : process (I_clk) begin if rising_edge(I_clk) then csr_cycles <= std_logic_vector(unsigned(csr_cycles) + 1); end if; end process; - - instret: process (I_clk) + + instret : process (I_clk) begin - if rising_edge(I_clk) and I_instRetTick='1' then + if rising_edge(I_clk) and I_instRetTick = '1' then csr_instret <= std_logic_vector(unsigned(csr_instret) + 1); end if; end process; - - protection: process (I_clk, I_en) + + protection : process (I_clk, I_en) begin - if rising_edge(I_clk) then + if rising_edge(I_clk) then if (I_csrAddr(CSR_ADDR_ACCESS_BIT_START downto CSR_ADDR_ACCESS_BIT_END) = CSR_ADDR_ACCESS_READONLY) and - (I_csrOp(CSR_OP_BITS_WRITTEN) = '1') then - --todo: raise exception - raise_int <= '1'; + (I_csrOp(CSR_OP_BITS_WRITTEN) = '1') then + --todo: raise exception + raise_int <= '1'; else - raise_int <= '0'; + raise_int <= '0'; end if; end if; end process; - + -- Read data is available next cycle, with an additional cycle before another op can be processed -- Write to CSR occurs 3 cycles later. -- cycle 1: read of existing csr available -- cycle 2: update value calculates (whole write, set/clear bit read-modify-write) -- cycle 3: actual write to csr occurs. - datamain: process (I_clk, I_en) + datamain : process (I_clk, I_en) begin if rising_edge(I_clk) then - + if I_int_entry = '1' then -- on entry: -- mstatus.mpie = mstatus.mie @@ -188,9 +186,10 @@ begin csr_mstatus(3) <= '0'; -- mstatus.mpp = current privilege mode csr_mstatus(12 downto 11) <= "11"; - + csr_mcause <= I_int_cause; csr_mepc <= I_int_pc; + csr_mtval <= I_int_mtval; elsif I_int_exit = '1' then -- privilege set to mstatus.mpp @@ -198,9 +197,10 @@ begin csr_mstatus(3) <= csr_mstatus(7); csr_mstatus(7) <= '1'; csr_mstatus(12 downto 11) <= "00"; + + -- interrupt data changes take all priority - -- interrupt data changes take all priority - elsif I_en = '1' and opState = 0 then + elsif I_en = '1' and opState = STEP_READ_OR_IDLE then csr_op <= I_csrOp; case I_csrAddr is when CSR_ADDR_MVENDORID => @@ -208,12 +208,12 @@ begin when CSR_ADDR_MARCHID => curr_csr_value <= X"00000000"; when CSR_ADDR_MIMPID => - curr_csr_value <= X"52505530"; -- "RPU0" + curr_csr_value <= X"52505531"; -- "RPU1" when CSR_ADDR_MHARDID => curr_csr_value <= X"00000000"; when CSR_ADDR_MISA => - curr_csr_value <= X"40000080"; -- XLEN 32, RV32I - + curr_csr_value <= X"40001100"; -- XLEN 32, RV32IM + when CSR_ADDR_MSTATUS => curr_csr_value <= csr_mstatus; when CSR_ADDR_MTVEC => @@ -223,46 +223,40 @@ begin when CSR_ADDR_MIP => curr_csr_value <= csr_mip; when CSR_ADDR_MCAUSE => - curr_csr_value <= csr_mcause; + curr_csr_value <= csr_mcause; when CSR_ADDR_MEPC => - curr_csr_value <= csr_mepc; - - when CSR_ADDR_VEXRISC_IRQ_PENDING => - curr_csr_value <= csr_vexrisc_irq_pending; - when CSR_ADDR_VEXRISC_IRQ_MASK => - curr_csr_value <= csr_vexrisc_irq_mask; - + curr_csr_value <= csr_mepc; + when CSR_ADDR_MTVAL => + curr_csr_value <= csr_mtval; + when CSR_ADDR_MSCRATCH => + curr_csr_value <= csr_mscratch; + when CSR_ADDR_CYCLE => curr_csr_value <= csr_cycles(31 downto 0); when CSR_ADDR_CYCLEH => curr_csr_value <= csr_cycles(63 downto 32); - + when CSR_ADDR_INSTRET => curr_csr_value <= csr_instret(31 downto 0); when CSR_ADDR_INSTRETH => curr_csr_value <= csr_instret(63 downto 32); - + when CSR_ADDR_MCYCLE => curr_csr_value <= csr_cycles(31 downto 0); when CSR_ADDR_MCYCLEH => curr_csr_value <= csr_cycles(63 downto 32); - + when CSR_ADDR_MINSTRET => curr_csr_value <= csr_instret(31 downto 0); when CSR_ADDR_MINSTRETH => curr_csr_value <= csr_instret(63 downto 32); - - when CSR_ADDR_TEST_400 => - curr_csr_value <= test0_CSR; - when CSR_ADDR_TEST_401 => - curr_csr_value <= test1_CSR; - when others => + when others => -- raise exception for unsupported CSR end case; - opState <= 1; - - elsif opState = 1 then + opState <= STEP_MODIFY; + + elsif opState = STEP_MODIFY then -- update stage for sets, clears and writes case csr_op(3 downto 2) is when CSR_MAINOP_WR => @@ -273,43 +267,35 @@ begin next_csr_value <= curr_csr_value and (not I_dataIn); when others => end case; - + if I_csrOp(CSR_OP_BITS_WRITTEN) = '1' then - opState <= 2; - else - opState <= 0; + opState <= STEP_WRITE; + else + opState <= STEP_READ_OR_IDLE; end if; - - elsif opState = 2 then + + elsif opState = STEP_WRITE then -- write stage - opState <= 0; + opState <= STEP_READ_OR_IDLE; case I_csrAddr is - when CSR_ADDR_TEST_400 => - test0_CSR <= next_csr_value; - when CSR_ADDR_TEST_401 => - test1_CSR <= next_csr_value; - - when CSR_ADDR_MSTATUS => - csr_mstatus <= next_csr_value; - when CSR_ADDR_MTVEC => - csr_mtvec <= next_csr_value; - when CSR_ADDR_MIE => - csr_mie <= next_csr_value; - when CSR_ADDR_MIP => - csr_mip <= next_csr_value; - when CSR_ADDR_MEPC => - csr_mepc <= next_csr_value; - - - when CSR_ADDR_VEXRISC_IRQ_PENDING => - csr_vexrisc_irq_pending <= next_csr_value; - when CSR_ADDR_VEXRISC_IRQ_MASK => - csr_vexrisc_irq_mask <= next_csr_value; + + when CSR_ADDR_MSTATUS => + csr_mstatus <= next_csr_value; + when CSR_ADDR_MTVEC => + csr_mtvec <= next_csr_value; + when CSR_ADDR_MIE => + csr_mie <= next_csr_value; + when CSR_ADDR_MIP => + csr_mip <= next_csr_value; + when CSR_ADDR_MEPC => + csr_mepc <= next_csr_value; + when CSR_ADDR_MSCRATCH => + csr_mscratch <= next_csr_value; when others => end case; end if; - end if; - + end if; + end process; -end Behavioral; +end Behavioral; \ No newline at end of file diff --git a/vhdl/lint_unit.vhd b/vhdl/lint_unit.vhd index 2612364..447002f 100644 --- a/vhdl/lint_unit.vhd +++ b/vhdl/lint_unit.vhd @@ -18,48 +18,49 @@ -- limitations under the License. ---------------------------------------------------------------------------------- library IEEE; -use IEEE.STD_LOGIC_1164.ALL; -use IEEE.NUMERIC_STD.ALL; +use IEEE.STD_LOGIC_1164.all; +use IEEE.NUMERIC_STD.all; library work; use work.constants.all; entity lint_unit is - Port ( I_clk : in STD_LOGIC; - I_reset : in STD_LOGIC; - I_nextPc : in STD_LOGIC_VECTOR (31 downto 0); - I_pc : in STD_LOGIC_VECTOR (31 downto 0); - I_enMask : in STD_LOGIC_VECTOR (3 downto 0); - I_int0 : in STD_LOGIC; - I_int_data0 : in STD_LOGIC_VECTOR (31 downto 0); - O_int0_ack: out STD_LOGIC; - I_int1 : in STD_LOGIC; - I_int_data1 : in STD_LOGIC_VECTOR (31 downto 0); - O_int1_ack: out STD_LOGIC; - I_int2 : in STD_LOGIC; - I_int_data2 : in STD_LOGIC_VECTOR (31 downto 0); - O_int2_ack: out STD_LOGIC; - I_int3 : in STD_LOGIC; - I_int_data3 : in STD_LOGIC_VECTOR (31 downto 0); - O_int3_ack: out STD_LOGIC; - O_int : out STD_LOGIC; - O_int_data : out STD_LOGIC_VECTOR (31 downto 0); - O_int_epc : out STD_LOGIC_VECTOR (31 downto 0) - ); + port ( + I_clk : in STD_LOGIC; + I_reset : in STD_LOGIC; + I_nextPc : in STD_LOGIC_VECTOR (31 downto 0); + I_pc : in STD_LOGIC_VECTOR (31 downto 0); + I_enMask : in STD_LOGIC_VECTOR (3 downto 0); + I_int0 : in STD_LOGIC; + I_int_data0 : in STD_LOGIC_VECTOR (31 downto 0); + O_int0_ack : out STD_LOGIC; + I_int1 : in STD_LOGIC; + I_int_data1 : in STD_LOGIC_VECTOR (31 downto 0); + O_int1_ack : out STD_LOGIC; + I_int2 : in STD_LOGIC; + I_int_data2 : in STD_LOGIC_VECTOR (31 downto 0); + O_int2_ack : out STD_LOGIC; + I_int3 : in STD_LOGIC; + I_int_data3 : in STD_LOGIC_VECTOR (31 downto 0); + O_int3_ack : out STD_LOGIC; + O_int : out STD_LOGIC; + O_int_data : out STD_LOGIC_VECTOR (31 downto 0); + O_int_epc : out STD_LOGIC_VECTOR (31 downto 0) + ); end lint_unit; architecture Behavioral of lint_unit is -signal actual_int: std_logic := '0'; -signal actual_int_data: std_logic_vector (31 downto 0) := X"00000000"; -signal actual_int_epc: std_logic_vector (31 downto 0) := X"00000000"; + signal actual_int : std_logic := '0'; + signal actual_int_data : std_logic_vector (31 downto 0) := X"00000000"; + signal actual_int_epc : std_logic_vector (31 downto 0) := X"00000000"; -signal int0_ack: std_logic := '0'; -signal int1_ack: std_logic := '0'; -signal int2_ack: std_logic := '0'; -signal int3_ack: std_logic := '0'; + signal int0_ack : std_logic := '0'; + signal int1_ack : std_logic := '0'; + signal int2_ack : std_logic := '0'; + signal int3_ack : std_logic := '0'; -signal reset_counter: integer := 0; + signal reset_counter : integer := 0; begin @@ -71,47 +72,45 @@ begin O_int1_ack <= int1_ack; O_int2_ack <= int2_ack; O_int3_ack <= int3_ack; - - -- This simply filters one of the 4 int sources to a single one in - -- decreasing priority, latching the data until a reset. - arb: process (I_clk) - begin - if rising_edge(I_clk) then - if I_reset = '1' then - reset_counter <= 1; - int0_ack <= '0'; - int1_ack <= '0'; - int2_ack <= '0'; - int3_ack <= '0'; - elsif reset_counter = 1 then - reset_counter <= 2; - elsif reset_counter = 2 then - reset_counter <= 3; - elsif reset_counter = 3 then - actual_int <= '0'; - reset_counter <= 0; - elsif reset_counter = 0 and actual_int = '0' then - - if I_enMask(0) = '1' and I_int0 = '1' and int0_ack = '0' then - actual_int <= '1'; - actual_int_data <= I_int_data0; - int0_ack <= '1'; - elsif I_enMask(1) = '1' and I_int1 = '1' and int1_ack = '0'then - actual_int <= '1'; - actual_int_data <= I_int_data1; - int1_ack <= '1'; - elsif I_enMask(2) = '1' and I_int2 = '1' and int2_ack = '0' then - actual_int <= '1'; - actual_int_data <= I_int_data2; - int2_ack <= '1'; - elsif I_enMask(3) = '1' and I_int3 = '1' and int3_ack = '0'then - actual_int <= '1'; - actual_int_data <= I_int_data3; - int3_ack <= '1'; - end if; + + -- This simply filters one of the 4 int sources to a single one in + -- decreasing priority, latching the data until a reset. + arb : process (I_clk) + begin + if rising_edge(I_clk) then + if I_reset = '1' then + reset_counter <= 1; + int0_ack <= '0'; + int1_ack <= '0'; + int2_ack <= '0'; + int3_ack <= '0'; + elsif reset_counter = 1 then + reset_counter <= 2; + elsif reset_counter = 2 then + reset_counter <= 3; + elsif reset_counter = 3 then + actual_int <= '0'; + reset_counter <= 0; + elsif reset_counter = 0 and actual_int = '0' then + + if I_enMask(0) = '1' and I_int0 = '1' and int0_ack = '0' then + actual_int <= '1'; + actual_int_data <= I_int_data0; + int0_ack <= '1'; + elsif I_enMask(1) = '1' and I_int1 = '1' and int1_ack = '0'then + actual_int <= '1'; + actual_int_data <= I_int_data1; + int1_ack <= '1'; + elsif I_enMask(2) = '1' and I_int2 = '1' and int2_ack = '0' then + actual_int <= '1'; + actual_int_data <= I_int_data2; + int2_ack <= '1'; + elsif I_enMask(3) = '1' and I_int3 = '1' and int3_ack = '0'then + actual_int <= '1'; + actual_int_data <= I_int_data3; + int3_ack <= '1'; + end if; + end if; end if; - end if; - end process; - - -end Behavioral; + end process; +end Behavioral; \ No newline at end of file diff --git a/vhdl/mem_controller.vhd b/vhdl/mem_controller.vhd index 7b481e6..99f6bb3 100644 --- a/vhdl/mem_controller.vhd +++ b/vhdl/mem_controller.vhd @@ -21,113 +21,107 @@ -- limitations under the License. ---------------------------------------------------------------------------------- library IEEE; -use IEEE.STD_LOGIC_1164.ALL; +use IEEE.STD_LOGIC_1164.all; use ieee.numeric_std.all; library work; use work.constants.all; - - entity mem_controller is - Port ( - I_clk : in STD_LOGIC; + port ( + I_clk : in STD_LOGIC; I_reset : in STD_LOGIC; - + O_ready : out STD_LOGIC; - I_execute: in STD_LOGIC; - I_dataWe : in STD_LOGIC; - I_address : in STD_LOGIC_VECTOR (XLENM1 downto 0); - I_data : in STD_LOGIC_VECTOR (XLENM1 downto 0); + I_execute : in STD_LOGIC; + I_dataWe : in STD_LOGIC; + I_address : in STD_LOGIC_VECTOR (XLENM1 downto 0); + I_data : in STD_LOGIC_VECTOR (XLENM1 downto 0); I_dataByteEn : in STD_LOGIC_VECTOR(1 downto 0); I_signExtend : in STD_LOGIC; - O_data : out STD_LOGIC_VECTOR (XLENM1 downto 0); - O_dataReady: out STD_LOGIC; - - MEM_I_ready: in STD_LOGIC; - MEM_O_cmd: out STD_LOGIC; - MEM_O_we : out STD_LOGIC; + O_data : out STD_LOGIC_VECTOR (XLENM1 downto 0); + O_dataReady : out STD_LOGIC; + + MEM_I_ready : in STD_LOGIC; + MEM_O_cmd : out STD_LOGIC; + MEM_O_we : out STD_LOGIC; MEM_O_byteEnable : out STD_LOGIC_VECTOR (1 downto 0); - MEM_O_addr : out STD_LOGIC_VECTOR (XLENM1 downto 0); - MEM_O_data : out STD_LOGIC_VECTOR (XLENM1 downto 0); - MEM_I_data : in STD_LOGIC_VECTOR (XLENM1 downto 0); + MEM_O_addr : out STD_LOGIC_VECTOR (XLENM1 downto 0); + MEM_O_data : out STD_LOGIC_VECTOR (XLENM1 downto 0); + MEM_I_data : in STD_LOGIC_VECTOR (XLENM1 downto 0); MEM_I_dataReady : in STD_LOGIC ); end mem_controller; architecture Behavioral of mem_controller is - signal we : std_logic := '0'; - signal addr : STD_LOGIC_VECTOR (XLENM1 downto 0) := X"00000000"; - signal indata: STD_LOGIC_VECTOR (XLENM1 downto 0) := X"00000000"; - signal outdata: STD_LOGIC_VECTOR (XLENM1 downto 0) := X"00000000"; - - signal byteEnable: STD_LOGIC_VECTOR ( 1 downto 0) := "11"; - signal cmd : STD_LOGIC := '0'; - signal state: integer := 0; - - signal ready: STD_LOGIC := '0'; - + signal we : std_logic := '0'; + signal addr : STD_LOGIC_VECTOR (XLENM1 downto 0) := X"00000000"; + signal indata : STD_LOGIC_VECTOR (XLENM1 downto 0) := X"00000000"; + signal outdata : STD_LOGIC_VECTOR (XLENM1 downto 0) := X"00000000"; + + signal byteEnable : STD_LOGIC_VECTOR (1 downto 0) := "11"; + signal cmd : STD_LOGIC := '0'; + signal state : integer := 0; + + signal ready : STD_LOGIC := '0'; + begin - process (I_clk, I_execute) - begin - if rising_edge(I_clk) then - if I_reset = '1' then - we <= '0'; - cmd <= '0'; - state <= 0; + process (I_clk, I_execute) + begin + if rising_edge(I_clk) then + if I_reset = '1' then + we <= '0'; + cmd <= '0'; + state <= 0; O_dataReady <= '0'; - elsif state = 0 and I_execute = '1' and MEM_I_ready = '1' then - we <= I_dataWe; - addr <= I_address; - indata <= I_data; - byteEnable <= I_dataByteEn; - cmd <= '1'; - O_dataReady <= '0'; - outdata <= X"ABCDEFEE"; - if I_dataWe = '0' then - state <= 3;-- read - else - state <= 2;-- write - end if; - elsif state = 3 then - cmd <= '0'; - state <= 1; - elsif state = 1 then - cmd <= '0'; - if MEM_I_dataReady = '1' then - O_dataReady <= '1'; - -- sign extend, if required - if I_signExtend = '1' then - if I_dataByteEn = F2_MEM_LS_SIZE_W then - outdata <= MEM_I_data; - elsif I_dataByteEn = F2_MEM_LS_SIZE_H then - outdata <= std_logic_vector(resize(signed(MEM_I_data(15 downto 0)), XLEN)); - elsif I_dataByteEn = F2_MEM_LS_SIZE_B then - outdata <= std_logic_vector(resize(signed(MEM_I_data(7 downto 0)), XLEN)); - end if; - else - outdata <= MEM_I_data; - end if; - state <= 2; - end if; - elsif state = 2 then - cmd <= '0'; - state <= 0; - O_dataReady <= '0'; - end if; - end if; - end process; - - O_data <= outdata; - O_ready <= ( MEM_I_ready and not I_execute ) when state = 0 else '0'; - - MEM_O_cmd <= cmd; - MEM_O_byteEnable <= byteEnable; - MEM_O_data <= indata; - MEM_O_addr <= addr; - MEM_O_we <= we; + elsif state = 0 and I_execute = '1' and MEM_I_ready = '1' then + we <= I_dataWe; + addr <= I_address; + indata <= I_data; + byteEnable <= I_dataByteEn; + cmd <= '1'; + O_dataReady <= '0'; + outdata <= X"ABCDEFEE"; + if I_dataWe = '0' then + state <= 1;-- read + else + state <= 2;-- write + end if; + elsif state = 1 then + cmd <= '0'; + if MEM_I_dataReady = '1' then + O_dataReady <= '1'; + -- sign extend, if required + if I_signExtend = '1' then + if I_dataByteEn = F2_MEM_LS_SIZE_W then + outdata <= MEM_I_data; + elsif I_dataByteEn = F2_MEM_LS_SIZE_H then + outdata <= std_logic_vector(resize(signed(MEM_I_data(15 downto 0)), XLEN)); + elsif I_dataByteEn = F2_MEM_LS_SIZE_B then + outdata <= std_logic_vector(resize(signed(MEM_I_data(7 downto 0)), XLEN)); + end if; + else + outdata <= MEM_I_data; + end if; + state <= 2; + end if; + elsif state = 2 then + cmd <= '0'; + state <= 0; + O_dataReady <= '0'; + end if; + end if; + end process; -end Behavioral; + O_data <= outdata; + O_ready <= (MEM_I_ready and not I_execute) when state = 0 else '0'; + MEM_O_cmd <= cmd; + MEM_O_byteEnable <= byteEnable; + MEM_O_data <= indata; + MEM_O_addr <= addr; + MEM_O_we <= we; + +end Behavioral; \ No newline at end of file diff --git a/vhdl/unit_alu_RV32_I.vhd b/vhdl/unit_alu_RV32_I.vhd index 82070c3..3900192 100644 --- a/vhdl/unit_alu_RV32_I.vhd +++ b/vhdl/unit_alu_RV32_I.vhd @@ -18,226 +18,346 @@ -- limitations under the License. ---------------------------------------------------------------------------------- library IEEE; -use IEEE.STD_LOGIC_1164.ALL; +use IEEE.STD_LOGIC_1164.all; -use IEEE.NUMERIC_STD.ALL; +use IEEE.NUMERIC_STD.all; library work; use work.constants.all; entity alu_RV32I is - Port ( - I_clk : in STD_LOGIC; - I_en : in STD_LOGIC; - I_dataA : in STD_LOGIC_VECTOR (XLEN32M1 downto 0); - I_dataB : in STD_LOGIC_VECTOR (XLEN32M1 downto 0); + port ( + I_clk : in STD_LOGIC; + I_en : in STD_LOGIC; + I_dataA : in STD_LOGIC_VECTOR (XLEN32M1 downto 0); + I_dataB : in STD_LOGIC_VECTOR (XLEN32M1 downto 0); I_dataDwe : in STD_LOGIC; - I_aluop : in STD_LOGIC_VECTOR (4 downto 0); + I_aluop : in STD_LOGIC_VECTOR (4 downto 0); I_aluFunc : in STD_LOGIC_VECTOR (15 downto 0); I_PC : in STD_LOGIC_VECTOR (XLEN32M1 downto 0); I_epc : in STD_LOGIC_VECTOR (XLENM1 downto 0); - I_dataIMM : in STD_LOGIC_VECTOR (XLEN32M1 downto 0); - O_dataResult : out STD_LOGIC_VECTOR (XLEN32M1 downto 0); - O_branchTarget : out STD_LOGIC_VECTOR (XLEN32M1 downto 0); + I_dataIMM : in STD_LOGIC_VECTOR (XLEN32M1 downto 0); + I_clear : in STD_LOGIC; + O_dataResult : out STD_LOGIC_VECTOR (XLEN32M1 downto 0); + O_branchTarget : out STD_LOGIC_VECTOR (XLEN32M1 downto 0); O_dataWriteReg : out STD_LOGIC; - O_lastPC: out STD_LOGIC_VECTOR(XLEN32M1 downto 0); - O_shouldBranch : out std_logic + O_lastPC : out STD_LOGIC_VECTOR(XLEN32M1 downto 0); + O_shouldBranch : out std_logic; + O_wait : out std_logic ); end alu_RV32I; architecture Behavioral of alu_RV32I is -- The internal register for results of operations. -- 32 bit + carry/overflow - + signal s_aluFunc : STD_LOGIC_VECTOR (15 downto 0) := (others => '0'); signal s_branchTarget : STD_LOGIC_VECTOR (XLEN32M1 downto 0) := (others => '0'); - signal s_result: STD_LOGIC_VECTOR(XLEN32M1+2 downto 0) := (others => '0'); - signal s_shouldBranch: STD_LOGIC := '0'; - signal s_lastPC: STD_LOGIC_VECTOR(XLEN32M1 downto 0) := (others => '0'); -begin - process (I_clk, I_en) - begin - if rising_edge(I_clk) and I_en = '1' then - s_lastPC <= I_PC; - O_dataWriteReg <= I_dataDwe; - case I_aluop is - when OPCODE_OPIMM => - s_shouldBranch <= '0'; - case I_aluFunc(2 downto 0) is - when F3_OPIMM_ADDI => - s_result(31 downto 0) <= std_logic_vector(signed( I_dataA) + signed( I_dataIMM)); - - when F3_OPIMM_XORI => - s_result(31 downto 0) <= I_dataA xor I_dataIMM; - - when F3_OPIMM_ORI => - s_result(31 downto 0) <= I_dataA or I_dataIMM; - - when F3_OPIMM_ANDI => - s_result(31 downto 0) <= I_dataA and I_dataIMM; - - when F3_OPIMM_SLTI => - if signed(I_dataA) < signed(I_dataIMM) then - s_result(31 downto 0) <= X"00000001"; - else - s_result(31 downto 0) <= X"00000000"; - end if; - - when F3_OPIMM_SLTIU => - if unsigned(I_dataA) < unsigned(I_dataIMM) then - s_result(31 downto 0) <= X"00000001"; - else - s_result(31 downto 0) <= X"00000000"; - end if; - - when F3_OPIMM_SLLI => - s_result(31 downto 0) <= std_logic_vector(shift_left(unsigned(I_dataA), to_integer(unsigned(I_dataIMM(4 downto 0))))); - - when F3_OPIMM_SRLI => - case I_aluFunc(9 downto 3) is - when F7_OPIMM_SRLI => - s_result(31 downto 0) <= std_logic_vector(shift_right(unsigned(I_dataA), to_integer(unsigned(I_dataIMM(4 downto 0))))); - when F7_OPIMM_SRAI => - s_result(31 downto 0) <= std_logic_vector(shift_right(signed(I_dataA), to_integer(unsigned(I_dataIMM(4 downto 0))))); - when others=> - end case; - when others => - end case; - - when OPCODE_OP => - case I_aluFunc(9 downto 0) is - when F7_OP_ADD & F3_OP_ADD => - s_result(31 downto 0) <= std_logic_vector(signed( I_dataA) + signed( I_dataB)); - - when F7_OP_SUB & F3_OP_SUB => - s_result(31 downto 0) <= std_logic_vector(signed( I_dataA) - signed( I_dataB)); - - when F7_OP_SLT & F3_OP_SLT => - if signed(I_dataA) < signed(I_dataB) then - s_result(31 downto 0) <= X"00000001"; - else - s_result(31 downto 0) <= X"00000000"; - end if; - - when F7_OP_SLTU & F3_OP_SLTU => - if unsigned(I_dataA) < unsigned(I_dataB) then - s_result(31 downto 0) <= X"00000001"; - else - s_result(31 downto 0) <= X"00000000"; - end if; - - when F7_OP_XOR & F3_OP_XOR => - s_result(31 downto 0) <= I_dataA xor I_dataB; - - when F7_OP_OR & F3_OP_OR => - s_result(31 downto 0) <= I_dataA or I_dataB; - - when F7_OP_AND & F3_OP_AND => - s_result(31 downto 0) <= I_dataA and I_dataB; - - when F7_OP_SLL & F3_OP_SLL => - s_result(31 downto 0) <= std_logic_vector(shift_left(unsigned(I_dataA), to_integer(unsigned(I_dataB(4 downto 0))))); - - when F7_OP_SRL & F3_OP_SRL => - s_result(31 downto 0) <= std_logic_vector(shift_right(unsigned(I_dataA), to_integer(unsigned(I_dataB(4 downto 0))))); - - when F7_OP_SRA & F3_OP_SRA => - s_result(31 downto 0) <= std_logic_vector(shift_right(signed(I_dataA), to_integer(unsigned(I_dataB(4 downto 0))))); - - when others=> - s_result <= "00" & X"CDC1FEF1"; - end case; - - s_shouldBranch <= '0'; - - when OPCODE_LOAD | OPCODE_STORE => - s_shouldBranch <= '0'; - s_result(31 downto 0) <= std_logic_vector(signed( I_dataA) + signed( I_dataIMM)); - - when OPCODE_JALR => - s_branchTarget <= std_logic_vector(signed( I_dataA) + signed( I_dataIMM)); - s_shouldBranch <= '1'; - s_result(31 downto 0) <= std_logic_vector(signed( I_PC) + 4); - - when OPCODE_JAL => - s_branchTarget <= std_logic_vector(signed( I_PC) + signed( I_dataIMM)); - s_shouldBranch <= '1'; - s_result(31 downto 0) <= std_logic_vector(signed( I_PC) + 4); - - when OPCODE_SYSTEM => - if I_aluFunc(9 downto 0) = F7_PRIVOP_MRET&F3_PRIVOP then - s_branchTarget <= I_epc; - s_shouldBranch <= '1'; - s_result(31 downto 0) <= std_logic_vector(signed( I_PC) + 4); - elsif I_aluFunc(2 downto 0) /= F3_PRIVOP then - -- do not branch on CSR unit work - s_shouldBranch <= '0'; - end if; - when OPCODE_LUI => - s_shouldBranch <= '0'; - s_result(31 downto 0) <= I_dataIMM; - - when OPCODE_AUIPC => - s_shouldBranch <= '0'; - s_result(31 downto 0) <= std_logic_vector( signed( I_PC) + signed( I_dataIMM)); - - when OPCODE_BRANCH => - s_branchTarget <= std_logic_vector(signed( I_PC) + signed( I_dataIMM)); - case I_aluFunc(2 downto 0) is - when F3_BRANCH_BEQ => - if I_dataA = I_dataB then - s_shouldBranch <= '1'; - else - s_shouldBranch <= '0'; - end if; - - when F3_BRANCH_BNE => - if I_dataA /= I_dataB then - s_shouldBranch <= '1'; - else - s_shouldBranch <= '0'; - end if; - - when F3_BRANCH_BLT => - if signed(I_dataA) < signed(I_dataB) then - s_shouldBranch <= '1'; - else - s_shouldBranch <= '0'; - end if; - - when F3_BRANCH_BGE => - if signed(I_dataA) >= signed(I_dataB) then - s_shouldBranch <= '1'; - else - s_shouldBranch <= '0'; - end if; - - when F3_BRANCH_BLTU => - if unsigned(I_dataA) < unsigned(I_dataB) then - s_shouldBranch <= '1'; - else - s_shouldBranch <= '0'; - end if; - - when F3_BRANCH_BGEU => - if unsigned(I_dataA) >= unsigned(I_dataB) then - s_shouldBranch <= '1'; - else - s_shouldBranch <= '0'; - end if; - - when others => - end case; - - when others => - s_result <= "00" & X"CDCDFEFE"; - end case; - end if; - end process; - - O_dataResult <= s_result(XLEN32M1 downto 0); - O_shouldBranch <= s_shouldBranch; - O_branchTarget <= s_branchTarget; - O_lastPC <= s_lastPC; - + signal s_result : STD_LOGIC_VECTOR(63 downto 0) := (others => '0'); + signal s_resultms : STD_LOGIC_VECTOR(63 downto 0) := (others => '0'); + signal s_resultmu : STD_LOGIC_VECTOR(63 downto 0) := (others => '0'); + signal s_resultmsu : STD_LOGIC_VECTOR(65 downto 0) := (others => '0'); -- result has 66 bits to accomodate mulhsu with it's additional-bit-fakery + signal s_shouldBranch : STD_LOGIC := '0'; + signal s_lastPC : STD_LOGIC_VECTOR(XLEN32M1 downto 0) := (others => '0'); + signal s_wait : std_logic := '0'; + component alu_int32_div is + port ( + I_clk : in STD_LOGIC; + I_exec : in STD_LOGIC; + I_dividend : in STD_LOGIC_VECTOR (XLEN32M1 downto 0); + I_divisor : in STD_LOGIC_VECTOR (XLEN32M1 downto 0); + I_op : in STD_LOGIC_VECTOR (1 downto 0); + O_dataResult : out STD_LOGIC_VECTOR (XLEN32M1 downto 0); + O_done : out STD_LOGIC; + O_int : out std_logic + ); + end component; + signal s_div_exec : std_logic := '0'; + signal s_div_dividend : std_logic_vector(31 downto 0) := (others => '0'); + signal s_div_divisor : std_logic_vector(31 downto 0) := (others => '0'); + signal s_div_op : std_logic_vector(1 downto 0) := (others => '0'); + signal s_div_dataResult : std_logic_vector(31 downto 0) := (others => '0'); + signal s_div_done : std_logic := '0'; + signal s_div_int : std_logic := '0'; + constant DIVUNIT_STATE_IDLE : integer := 0; + constant DIVUNIT_STATE_INFLIGHT : integer := 1; + constant DIVUNIT_STATE_COMPLETE : integer := 2; + + constant MUL_STATE_IDLE : integer := 0; + constant MUL_STATE_COMPLETE : integer := 2; + + signal s_mul_state : integer := 0; + signal s_divunit_state : integer := 0; + +begin + div_rem_unit : alu_int32_div port map( + I_clk => I_clk, + I_exec => s_div_exec, + I_dividend => s_div_dividend, + I_divisor => s_div_divisor, + I_op => s_div_op, + O_dataResult => s_div_dataResult, + O_done => s_div_done, + O_int => s_div_int + ); + + s_div_dividend <= I_dataA; + s_div_divisor <= I_dataB; + + process (I_clk, I_en) + begin + if rising_edge(I_clk) then + if I_clear = '1' and I_en = '0' then + s_branchTarget <= X"00000000"; + s_result <= X"0000000000000000"; + + elsif I_en = '1' then + s_lastPC <= I_PC; + O_dataWriteReg <= I_dataDwe; + s_aluFunc <= I_aluFunc; + case I_aluop is + when OPCODE_OPIMM => + s_wait <= '0'; + s_shouldBranch <= '0'; + case I_aluFunc(2 downto 0) is + when F3_OPIMM_ADDI => + s_result(31 downto 0) <= std_logic_vector(signed(I_dataA) + signed(I_dataIMM)); + + when F3_OPIMM_XORI => + s_result(31 downto 0) <= I_dataA xor I_dataIMM; + + when F3_OPIMM_ORI => + s_result(31 downto 0) <= I_dataA or I_dataIMM; + + when F3_OPIMM_ANDI => + s_result(31 downto 0) <= I_dataA and I_dataIMM; + + when F3_OPIMM_SLTI => + if signed(I_dataA) < signed(I_dataIMM) then + s_result(31 downto 0) <= X"00000001"; + else + s_result(31 downto 0) <= X"00000000"; + end if; + + when F3_OPIMM_SLTIU => + if unsigned(I_dataA) < unsigned(I_dataIMM) then + s_result(31 downto 0) <= X"00000001"; + else + s_result(31 downto 0) <= X"00000000"; + end if; + + when F3_OPIMM_SLLI => + s_result(31 downto 0) <= std_logic_vector(shift_left(unsigned(I_dataA), to_integer(unsigned(I_dataIMM(4 downto 0))))); + + when F3_OPIMM_SRLI => + case I_aluFunc(9 downto 3) is + when F7_OPIMM_SRLI => + s_result(31 downto 0) <= std_logic_vector(shift_right(unsigned(I_dataA), to_integer(unsigned(I_dataIMM(4 downto 0))))); + when F7_OPIMM_SRAI => + s_result(31 downto 0) <= std_logic_vector(shift_right(signed(I_dataA), to_integer(unsigned(I_dataIMM(4 downto 0))))); + when others => + end case; + when others => + end case; + + when OPCODE_OP => + + if I_aluFunc(9 downto 3) = F7_OP_M_EXT then + + if I_aluFunc(2) = '0' then -- mul ops + if s_mul_state = MUL_STATE_IDLE then + s_resultms(63 downto 0) <= std_logic_vector(signed(I_dataA) * signed(I_dataB)); + s_resultmu(63 downto 0) <= std_logic_vector(unsigned(I_dataA) * unsigned(I_dataB)); + s_resultmsu(65 downto 0) <= std_logic_vector(signed(I_dataA(31) & I_dataA) * signed('0' & I_dataB)); + + s_wait <= '0'; -- there is _always_ a 1 cycle additional wait for a multicycle alu, so immediately flag complete + s_mul_state <= MUL_STATE_COMPLETE; + + elsif s_mul_state = MUL_STATE_COMPLETE then + + if I_aluFunc(2 downto 0) = F3_OP_M_MUL then + s_result(31 downto 0) <= s_resultms(31 downto 0); + + elsif I_aluFunc(2 downto 0) = F3_OP_M_MULH then + s_result(31 downto 0) <= s_resultms(63 downto 32); + + elsif I_aluFunc(2 downto 0) = F3_OP_M_MULHU then + s_result(31 downto 0) <= s_resultmu(63 downto 32); + + elsif I_aluFunc(2 downto 0) = F3_OP_M_MULHSU then + s_result(31 downto 0) <= s_resultmsu(63 downto 32); + end if; + + s_mul_state <= MUL_STATE_IDLE; + + end if; + else + -- div & rem + if s_divunit_state = DIVUNIT_STATE_IDLE then + s_div_exec <= '1'; + s_div_op <= I_aluFunc(1 downto 0); + s_divunit_state <= DIVUNIT_STATE_INFLIGHT; + + s_wait <= '1'; -- stall the cpu until done + elsif s_divunit_state = DIVUNIT_STATE_INFLIGHT then + s_div_exec <= '0'; + + if s_div_done = '1' then + s_divunit_state <= DIVUNIT_STATE_COMPLETE; + s_wait <= '0'; + end if; + elsif s_divunit_state = DIVUNIT_STATE_COMPLETE then + + s_divunit_state <= DIVUNIT_STATE_IDLE; + s_result(31 downto 0) <= s_div_dataResult; + end if; + + end if; + else + s_wait <= '0'; + case I_aluFunc(9 downto 0) is + when F7_OP_ADD & F3_OP_ADD => + s_result(31 downto 0) <= std_logic_vector(signed(I_dataA) + signed(I_dataB)); + + when F7_OP_SUB & F3_OP_SUB => + s_result(31 downto 0) <= std_logic_vector(signed(I_dataA) - signed(I_dataB)); + + when F7_OP_SLT & F3_OP_SLT => + if signed(I_dataA) < signed(I_dataB) then + s_result(31 downto 0) <= X"00000001"; + else + s_result(31 downto 0) <= X"00000000"; + end if; + + when F7_OP_SLTU & F3_OP_SLTU => + if unsigned(I_dataA) < unsigned(I_dataB) then + s_result(31 downto 0) <= X"00000001"; + else + s_result(31 downto 0) <= X"00000000"; + end if; + + when F7_OP_XOR & F3_OP_XOR => + s_result(31 downto 0) <= I_dataA xor I_dataB; + + when F7_OP_OR & F3_OP_OR => + s_result(31 downto 0) <= I_dataA or I_dataB; + + when F7_OP_AND & F3_OP_AND => + s_result(31 downto 0) <= I_dataA and I_dataB; + + when F7_OP_SLL & F3_OP_SLL => + s_result(31 downto 0) <= std_logic_vector(shift_left(unsigned(I_dataA), to_integer(unsigned(I_dataB(4 downto 0))))); + + when F7_OP_SRL & F3_OP_SRL => + s_result(31 downto 0) <= std_logic_vector(shift_right(unsigned(I_dataA), to_integer(unsigned(I_dataB(4 downto 0))))); + + when F7_OP_SRA & F3_OP_SRA => + s_result(31 downto 0) <= std_logic_vector(shift_right(signed(I_dataA), to_integer(unsigned(I_dataB(4 downto 0))))); + + when others => + s_result <= X"00000000" & X"CDC1FEF1"; + end case; + end if; + s_shouldBranch <= '0'; + + when OPCODE_LOAD | OPCODE_STORE => + s_wait <= '0'; + s_shouldBranch <= '0'; + s_result(31 downto 0) <= std_logic_vector(signed(I_dataA) + signed(I_dataIMM)); + + when OPCODE_JALR => + s_wait <= '0'; + s_branchTarget <= std_logic_vector(signed(I_dataA) + signed(I_dataIMM)) and X"FFFFFFFE"; -- jalr clears the lowest bit + s_shouldBranch <= '1'; + s_result(31 downto 0) <= std_logic_vector(signed(I_PC) + 4); + + when OPCODE_JAL => + s_wait <= '0'; + s_branchTarget <= std_logic_vector(signed(I_PC) + signed(I_dataIMM)); + s_shouldBranch <= '1'; + s_result(31 downto 0) <= std_logic_vector(signed(I_PC) + 4); + + when OPCODE_SYSTEM => + s_wait <= '0'; + if I_aluFunc(9 downto 0) = F7_PRIVOP_MRET & F3_PRIVOP then + s_branchTarget <= I_epc; + s_shouldBranch <= '1'; + s_result(31 downto 0) <= std_logic_vector(signed(I_PC) + 4); + elsif I_aluFunc(2 downto 0) /= F3_PRIVOP then + -- do not branch on CSR unit work + s_shouldBranch <= '0'; + end if; + when OPCODE_LUI => + s_wait <= '0'; + s_shouldBranch <= '0'; + s_result(31 downto 0) <= I_dataIMM; + + when OPCODE_AUIPC => + s_wait <= '0'; + s_shouldBranch <= '0'; + s_result(31 downto 0) <= std_logic_vector(signed(I_PC) + signed(I_dataIMM)); + + when OPCODE_BRANCH => + s_wait <= '0'; + s_branchTarget <= std_logic_vector(signed(I_PC) + signed(I_dataIMM)); + case I_aluFunc(2 downto 0) is + when F3_BRANCH_BEQ => + if I_dataA = I_dataB then + s_shouldBranch <= '1'; + else + s_shouldBranch <= '0'; + end if; + + when F3_BRANCH_BNE => + if I_dataA /= I_dataB then + s_shouldBranch <= '1'; + else + s_shouldBranch <= '0'; + end if; + + when F3_BRANCH_BLT => + if signed(I_dataA) < signed(I_dataB) then + s_shouldBranch <= '1'; + else + s_shouldBranch <= '0'; + end if; + + when F3_BRANCH_BGE => + if signed(I_dataA) >= signed(I_dataB) then + s_shouldBranch <= '1'; + else + s_shouldBranch <= '0'; + end if; + + when F3_BRANCH_BLTU => + if unsigned(I_dataA) < unsigned(I_dataB) then + s_shouldBranch <= '1'; + else + s_shouldBranch <= '0'; + end if; + + when F3_BRANCH_BGEU => + if unsigned(I_dataA) >= unsigned(I_dataB) then + s_shouldBranch <= '1'; + else + s_shouldBranch <= '0'; + end if; + + when others => + end case; + + when others => + s_result <= X"00000000" & X"CDCDFEFE"; + end case; + end if; + end if; + end process; + + O_wait <= s_wait; + + O_dataResult <= s_result(XLEN32M1 downto 0); + O_shouldBranch <= s_shouldBranch; + O_branchTarget <= s_branchTarget; + O_lastPC <= s_lastPC; + end Behavioral; \ No newline at end of file diff --git a/vhdl/unit_decoder_RV32I.vhd b/vhdl/unit_decoder_RV32I.vhd index 823a5bf..bdf1297 100644 --- a/vhdl/unit_decoder_RV32I.vhd +++ b/vhdl/unit_decoder_RV32I.vhd @@ -18,41 +18,44 @@ -- limitations under the License. ---------------------------------------------------------------------------------- library IEEE; -use IEEE.STD_LOGIC_1164.ALL; -use IEEE.NUMERIC_STD.ALL; +use IEEE.STD_LOGIC_1164.all; +use IEEE.NUMERIC_STD.all; library work; use work.constants.all; entity decoder_RV32 is - Port ( - I_clk : in STD_LOGIC; - I_en : in STD_LOGIC; - I_dataInst : in STD_LOGIC_VECTOR (31 downto 0); -- Instruction to be decoded - O_selRS1 : out STD_LOGIC_VECTOR (4 downto 0); -- Selection out for regrs1 - O_selRS2 : out STD_LOGIC_VECTOR (4 downto 0); -- Selection out for regrs2 - O_selD : out STD_LOGIC_VECTOR (4 downto 0); -- Selection out for regD - O_dataIMM : out STD_LOGIC_VECTOR (31 downto 0); -- Immediate value out - O_regDwe : out STD_LOGIC; -- RegD wrtite enable - O_aluOp : out STD_LOGIC_VECTOR (6 downto 0); -- ALU opcode - O_aluFunc : out STD_LOGIC_VECTOR (15 downto 0); -- ALU function - O_memOp : out STD_LOGIC_VECTOR(4 downto 0); -- Memory operation - O_csrOP : out STD_LOGIC_VECTOR(4 downto 0); -- CSR operations - O_csrAddr : out STD_LOGIC_VECTOR(11 downto 0); -- CSR address - O_trapExit: out STD_LOGIC; -- request to exit trap handler - O_int : out STD_LOGIC; -- is there a trap? - O_int_data : out STD_LOGIC_VECTOR (31 downto 0); -- trap descriptor - I_int_ack: in STD_LOGIC -- our int is now being serviced + port ( + I_clk : in STD_LOGIC; + I_en : in STD_LOGIC; + I_dataInst : in STD_LOGIC_VECTOR (31 downto 0); -- Instruction to be decoded + O_selRS1 : out STD_LOGIC_VECTOR (4 downto 0); -- Selection out for regrs1 + O_selRS2 : out STD_LOGIC_VECTOR (4 downto 0); -- Selection out for regrs2 + O_selD : out STD_LOGIC_VECTOR (4 downto 0); -- Selection out for regD + O_dataIMM : out STD_LOGIC_VECTOR (31 downto 0); -- Immediate value out + O_regDwe : out STD_LOGIC; -- RegD wrtite enable + O_aluOp : out STD_LOGIC_VECTOR (6 downto 0); -- ALU opcode + O_aluFunc : out STD_LOGIC_VECTOR (15 downto 0); -- ALU function + O_memOp : out STD_LOGIC_VECTOR(4 downto 0); -- Memory operation + O_csrOP : out STD_LOGIC_VECTOR(4 downto 0); -- CSR operations + O_csrAddr : out STD_LOGIC_VECTOR(11 downto 0); -- CSR address + O_trapExit : out STD_LOGIC; -- request to exit trap handler + O_multycyAlu : out STD_LOGIC; -- is this a multi-cycle alu op? + O_int : out STD_LOGIC; -- is there a trap? + O_int_data : out STD_LOGIC_VECTOR (31 downto 0);-- trap descriptor + I_int_ack : in STD_LOGIC -- our int is now being serviced ); end decoder_RV32; architecture Behavioral of decoder_RV32 is - signal s_trapExit: STD_LOGIC := '0'; - signal s_csrOP : STD_LOGIC_VECTOR(4 downto 0) := (others=> '0'); - signal s_csrAddr : STD_LOGIC_VECTOR(11 downto 0) := (others=> '0'); + signal s_trapExit : STD_LOGIC := '0'; + signal s_csrOP : STD_LOGIC_VECTOR(4 downto 0) := (others => '0'); + signal s_csrAddr : STD_LOGIC_VECTOR(11 downto 0) := (others => '0'); signal s_int : STD_LOGIC := '0'; - signal s_intdata: STD_LOGIC_VECTOR(31 downto 0) := (others=> '0'); + signal s_intdata : STD_LOGIC_VECTOR(31 downto 0) := (others => '0'); + signal s_multicy : std_logic := '0'; begin + O_multycyAlu <= s_multicy; O_int <= s_int; O_int_data <= s_intdata; O_csrOP <= s_csrOP; @@ -60,222 +63,240 @@ begin O_trapExit <= s_trapExit; -- Register selects for reads are async - O_selRS1 <= I_dataInst(R1_START downto R1_END); - O_selRS2 <= I_dataInst(R2_START downto R2_END); - - process (I_clk, I_en) - begin + O_selRS1 <= I_dataInst(R1_START downto R1_END); + O_selRS2 <= I_dataInst(R2_START downto R2_END); - if rising_edge(I_clk) then - if I_en = '1' then - - O_selD <= I_dataInst(RD_START downto RD_END); - - O_aluOp <= I_dataInst(OPCODE_START downto OPCODE_END); - - O_aluFunc <= "000000" & I_dataInst(FUNCT7_START downto FUNCT7_END) - & I_dataInst(FUNCT3_START downto FUNCT3_END); + process (I_clk, I_en) + begin - case I_dataInst(OPCODE_START downto OPCODE_END_2) is - when OPCODE_LUI => - s_trapExit <= '0'; - s_csrOP <= "00000"; - s_int <= '0'; - O_regDwe <= '1'; - O_memOp <= "00000"; - O_dataIMM <= I_dataInst(IMM_U_START downto IMM_U_END) - & "000000000000"; - when OPCODE_AUIPC => - s_trapExit <= '0'; - s_csrOP <= "00000"; - s_int <= '0'; - O_regDwe <= '1'; - O_memOp <= "00000"; - O_dataIMM <= I_dataInst(IMM_U_START downto IMM_U_END) - & "000000000000"; - when OPCODE_JAL => - s_trapExit <= '0'; - s_csrOP <= "00000"; - s_int <= '0'; - if I_dataInst(RD_START downto RD_END) = "00000" then - O_regDwe <= '0'; - else - O_regDwe <= '1'; - end if; - O_memOp <= "00000"; - if I_dataInst(IMM_U_START) = '1' then - O_dataIMM <= "111111111111" & I_dataInst(19 downto 12) & I_dataInst(20) & I_dataInst(30 downto 21) & '0'; - else - O_dataIMM <= "000000000000" & I_dataInst(19 downto 12) & I_dataInst(20) & I_dataInst(30 downto 21) & '0'; - end if; - when OPCODE_JALR => - s_trapExit <= '0'; - s_csrOP <= "00000"; - s_int <= '0'; - if I_dataInst(RD_START downto RD_END) = "00000" then - O_regDwe <= '0'; - else - O_regDwe <= '1'; - end if; - O_memOp <= "00000"; - if I_dataInst(IMM_U_START) = '1' then - O_dataIMM <= X"FFFF" & "1111" & I_dataInst(IMM_I_START downto IMM_I_END); - else - O_dataIMM <= X"0000" & "0000" & I_dataInst(IMM_I_START downto IMM_I_END); - end if; - when OPCODE_OPIMM => - s_trapExit <= '0'; - s_csrOP <= "00000"; - s_int <= '0'; - O_regDwe <= '1'; - O_memOp <= "00000"; - if I_dataInst(IMM_U_START) = '1' then - O_dataIMM <= X"FFFF" & "1111" & I_dataInst(IMM_I_START downto IMM_I_END); - else - O_dataIMM <= X"0000" & "0000" & I_dataInst(IMM_I_START downto IMM_I_END); - end if; - - when OPCODE_OP => - if I_dataInst(FUNCT7_START downto FUNCT7_END) = "0000001" then - -- RV M EXTENSION - NOT SUPPORTED! - s_trapExit <= '0'; - s_csrOP <= "00000"; - s_int <= '1'; - s_intdata <= EXCEPTION_INSTRUCTION_ILLEGAL; - O_regDwe <= '0'; - O_memOp <= "00000"; - else - s_trapExit <= '0'; - s_csrOP <= "00000"; - s_int <= '0'; - O_regDwe <= '1'; - O_memOp <= "00000"; - end if; - when OPCODE_LOAD => - -- Load's opcode is all 0s - but the first two bits of the word should be '11' - -- we check this here, because if we do not, null instructions will be treated as loads... - if I_dataInst(1 downto 0) = "11" then - s_trapExit <= '0'; - s_csrOP <= "00000"; - s_int <= '0'; - O_regDwe <= '1'; - O_memOp <= "10" & I_dataInst(FUNCT3_START downto FUNCT3_END); - if I_dataInst(IMM_U_START) = '1' then - O_dataIMM <= X"FFFF" & "1111" & I_dataInst(IMM_I_START downto IMM_I_END); - else - O_dataIMM <= X"0000" & "0000" & I_dataInst(IMM_I_START downto IMM_I_END); - end if; - else - -- likely a null instruction - fault! - s_trapExit <= '0'; - s_csrOP <= "00000"; - s_int <= '1'; --------------- - s_intdata <= EXCEPTION_INSTRUCTION_ILLEGAL; - O_memOp <= "00000"; - O_regDwe <= '0'; - O_dataIMM <= I_dataInst(IMM_I_START downto IMM_S_B_END) - & "0000000"; - end if; - when OPCODE_STORE => - s_trapExit <= '0'; - s_csrOP <= "00000"; - s_int <= '0'; - O_regDwe <= '0'; - O_memOp <= "11" & I_dataInst(FUNCT3_START downto FUNCT3_END); - if I_dataInst(IMM_U_START) = '1' then - O_dataIMM <= X"FFFF" & "1111" & I_dataInst(IMM_S_A_START downto IMM_S_A_END) & I_dataInst(IMM_S_B_START downto IMM_S_B_END); - else - O_dataIMM <= X"0000" & "0000" & I_dataInst(IMM_S_A_START downto IMM_S_A_END) & I_dataInst(IMM_S_B_START downto IMM_S_B_END); - end if; - when OPCODE_BRANCH => - s_trapExit <= '0'; - s_csrOP <= "00000"; - s_int <= '0'; - O_regDwe <= '0'; - O_memOp <= "00000"; - if I_dataInst(IMM_U_START) = '1' then - O_dataIMM <= X"FFFF" & "1111" & I_dataInst(7) & I_dataInst(30 downto 25) & I_dataInst(11 downto 8) & '0'; - else - O_dataIMM <= X"0000" & "0000" & I_dataInst(7) & I_dataInst(30 downto 25) & I_dataInst(11 downto 8) & '0'; - end if; - when OPCODE_MISCMEM => - s_trapExit <= '0'; - s_csrOP <= "00000"; - s_int <= '0'; - O_regDwe <= '0'; - O_memOp <= "01000"; - O_dataIMM <= I_dataInst; - when OPCODE_SYSTEM => - O_memOp <= "00000"; - if I_dataInst(FUNCT3_START downto FUNCT3_END) = F3_PRIVOP then - -- ECALL or EBREAK - case I_dataInst(IMM_I_START downto IMM_I_END) is - when IMM_I_SYSTEM_ECALL => - -- raise trap, save pc, perform requiredCSR operations - s_trapExit <= '0'; - s_csrOP <= "00000"; - s_int <= '1'; - O_regDwe <= '0'; - s_intdata <= EXCEPTION_ENVIRONMENT_CALL_FROM_MMODE; - --todo: Priv level needs checked as to mask this to user/supervisor/machine level - when IMM_I_SYSTEM_EBREAK => - s_trapExit <= '0'; - s_csrOP <= "00000"; - s_int <= '1'; - s_intdata <= EXCEPTION_BREAKPOINT; - O_regDwe <= '0'; - when F7_PRIVOP_MRET & R2_PRIV_RET => - s_trapExit <= '1'; - s_csrOP <= "00000"; - s_int <= '0'; - O_regDwe <= '0'; - -- return from interrupt. implement as a branch - alu will branch to epc. - when others => - end case; - else - s_trapExit <= '0'; - s_int <= '0'; - -- CSR - -- The immediate output is the zero-extended R1 value for Imm-form CSR ops - O_dataIMM <= X"000000" & "000" & I_dataInst(R1_START downto R1_END); - - -- The 12bit immediate in the instruction forms the csr address. - s_csrAddr <= I_dataInst(IMM_I_START downto IMM_I_END); - - -- is there a destination? if not, CSR is not read - if I_dataInst(RD_START downto RD_END) = "00000" then - s_csrOP(0) <= '0'; - O_regDwe <= '0'; - else - O_regDwe <= '1'; - s_csrOP(0) <= '1'; - end if; - - -- is there source data? if not, CSR value is not written - if I_dataInst(R1_START downto R1_END) = "00000" then - s_csrOP(1) <= '0'; - else - s_csrOP(1) <= '1'; - end if; - - s_csrOp(4 downto 2) <= I_dataInst(FUNCT3_START downto FUNCT3_END); + if rising_edge(I_clk) then + if I_en = '1' then - end if; - when others => - s_trapExit <= '0'; - s_csrOP <= "00000"; - s_int <= '1'; --------------- - s_intdata <= EXCEPTION_INSTRUCTION_ILLEGAL; - O_memOp <= "00000"; - O_regDwe <= '0'; - O_dataIMM <= I_dataInst(IMM_I_START downto IMM_S_B_END) - & "0000000"; - end case; - elsif I_int_ack = '1' then - s_int <= '0'; - end if; - end if; - end process; + O_selD <= I_dataInst(RD_START downto RD_END); -end Behavioral; + O_aluOp <= I_dataInst(OPCODE_START downto OPCODE_END); + O_aluFunc <= "000000" & I_dataInst(FUNCT7_START downto FUNCT7_END) + & I_dataInst(FUNCT3_START downto FUNCT3_END); + + case I_dataInst(OPCODE_START downto OPCODE_END_2) is + when OPCODE_LUI => + s_multicy <= '0'; + s_trapExit <= '0'; + s_csrOP <= "00000"; + s_int <= '0'; + O_regDwe <= '1'; + O_memOp <= "00000"; + O_dataIMM <= I_dataInst(IMM_U_START downto IMM_U_END) + & "000000000000"; + + when OPCODE_AUIPC => + s_multicy <= '0'; + s_trapExit <= '0'; + s_csrOP <= "00000"; + s_int <= '0'; + O_regDwe <= '1'; + O_memOp <= "00000"; + O_dataIMM <= I_dataInst(IMM_U_START downto IMM_U_END) + & "000000000000"; + + when OPCODE_JAL => + s_multicy <= '0'; + s_trapExit <= '0'; + s_csrOP <= "00000"; + s_int <= '0'; + if I_dataInst(RD_START downto RD_END) = "00000" then + O_regDwe <= '0'; + else + O_regDwe <= '1'; + end if; + O_memOp <= "00000"; + if I_dataInst(IMM_U_START) = '1' then + O_dataIMM <= "111111111111" & I_dataInst(19 downto 12) & I_dataInst(20) & I_dataInst(30 downto 21) & '0'; + else + O_dataIMM <= "000000000000" & I_dataInst(19 downto 12) & I_dataInst(20) & I_dataInst(30 downto 21) & '0'; + end if; + + when OPCODE_JALR => + s_multicy <= '0'; + s_trapExit <= '0'; + s_csrOP <= "00000"; + s_int <= '0'; + if I_dataInst(RD_START downto RD_END) = "00000" then + O_regDwe <= '0'; + else + O_regDwe <= '1'; + end if; + O_memOp <= "00000"; + if I_dataInst(IMM_U_START) = '1' then + O_dataIMM <= X"FFFF" & "1111" & I_dataInst(IMM_I_START downto IMM_I_END); + else + O_dataIMM <= X"0000" & "0000" & I_dataInst(IMM_I_START downto IMM_I_END); + end if; + + when OPCODE_OPIMM => + s_multicy <= '0'; + s_trapExit <= '0'; + s_csrOP <= "00000"; + s_int <= '0'; + O_regDwe <= '1'; + O_memOp <= "00000"; + if I_dataInst(IMM_U_START) = '1' then + O_dataIMM <= X"FFFF" & "1111" & I_dataInst(IMM_I_START downto IMM_I_END); + else + O_dataIMM <= X"0000" & "0000" & I_dataInst(IMM_I_START downto IMM_I_END); + end if; + + when OPCODE_OP => + s_trapExit <= '0'; + s_csrOP <= "00000"; + O_memOp <= "00000"; + + -- M based extension ops are multicycle, otherwise they are single-cycle + if (I_dataInst(FUNCT7_START downto FUNCT7_END) = F7_OP_M_EXT) then + s_multicy <= '1'; + else + s_multicy <= '0'; + end if; + + s_int <= '0'; + O_regDwe <= '1'; + + when OPCODE_LOAD => + s_multicy <= '0'; + -- Load's opcode is all 0s - but the first two bits of the word should be '11' + -- we check this here, because if we do not, null instructions will be treated as loads... + if I_dataInst(1 downto 0) = "11" then + s_trapExit <= '0'; + s_csrOP <= "00000"; + s_int <= '0'; + O_regDwe <= '1'; + O_memOp <= "10" & I_dataInst(FUNCT3_START downto FUNCT3_END); + if I_dataInst(IMM_U_START) = '1' then + O_dataIMM <= X"FFFF" & "1111" & I_dataInst(IMM_I_START downto IMM_I_END); + else + O_dataIMM <= X"0000" & "0000" & I_dataInst(IMM_I_START downto IMM_I_END); + end if; + else + -- likely a null instruction - fault! + s_trapExit <= '0'; + s_csrOP <= "00000"; + s_int <= '1'; --------------- + s_intdata <= EXCEPTION_INSTRUCTION_ILLEGAL; + O_memOp <= "00000"; + O_regDwe <= '0'; + O_dataIMM <= I_dataInst(IMM_I_START downto IMM_S_B_END) + & "0000000"; + end if; + + when OPCODE_STORE => + s_multicy <= '0'; + s_trapExit <= '0'; + s_csrOP <= "00000"; + s_int <= '0'; + O_regDwe <= '0'; + O_memOp <= "11" & I_dataInst(FUNCT3_START downto FUNCT3_END); + if I_dataInst(IMM_U_START) = '1' then + O_dataIMM <= X"FFFF" & "1111" & I_dataInst(IMM_S_A_START downto IMM_S_A_END) & I_dataInst(IMM_S_B_START downto IMM_S_B_END); + else + O_dataIMM <= X"0000" & "0000" & I_dataInst(IMM_S_A_START downto IMM_S_A_END) & I_dataInst(IMM_S_B_START downto IMM_S_B_END); + end if; + + when OPCODE_BRANCH => + s_multicy <= '0'; + s_trapExit <= '0'; + s_csrOP <= "00000"; + s_int <= '0'; + O_regDwe <= '0'; + O_memOp <= "00000"; + if I_dataInst(IMM_U_START) = '1' then + O_dataIMM <= X"FFFF" & "1111" & I_dataInst(7) & I_dataInst(30 downto 25) & I_dataInst(11 downto 8) & '0'; + else + O_dataIMM <= X"0000" & "0000" & I_dataInst(7) & I_dataInst(30 downto 25) & I_dataInst(11 downto 8) & '0'; + end if; + + when OPCODE_MISCMEM => + s_multicy <= '0'; + s_trapExit <= '0'; + s_csrOP <= "00000"; + s_int <= '0'; + O_regDwe <= '0'; + O_memOp <= "01000"; + O_dataIMM <= I_dataInst; + + when OPCODE_SYSTEM => + s_multicy <= '0'; + O_memOp <= "00000"; + if I_dataInst(FUNCT3_START downto FUNCT3_END) = F3_PRIVOP then + -- ECALL or EBREAK + case I_dataInst(IMM_I_START downto IMM_I_END) is + when IMM_I_SYSTEM_ECALL => + -- raise trap, save pc, perform requiredCSR operations + s_trapExit <= '0'; + s_csrOP <= "00000"; + s_int <= '1'; + O_regDwe <= '0'; + s_intdata <= EXCEPTION_ENVIRONMENT_CALL_FROM_MMODE; + --todo: Priv level needs checked as to mask this to user/supervisor/machine level + when IMM_I_SYSTEM_EBREAK => + s_trapExit <= '0'; + s_csrOP <= "00000"; + s_int <= '1'; + s_intdata <= EXCEPTION_BREAKPOINT; + O_regDwe <= '0'; + when F7_PRIVOP_MRET & R2_PRIV_RET => + s_trapExit <= '1'; + s_csrOP <= "00000"; + s_int <= '0'; + O_regDwe <= '0'; + -- return from interrupt. implement as a branch - alu will branch to epc. + when others => + end case; + else + s_trapExit <= '0'; + s_int <= '0'; + -- CSR + -- The immediate output is the zero-extended R1 value for Imm-form CSR ops + O_dataIMM <= X"000000" & "000" & I_dataInst(R1_START downto R1_END); + + -- The 12bit immediate in the instruction forms the csr address. + s_csrAddr <= I_dataInst(IMM_I_START downto IMM_I_END); + + -- is there a destination? if not, CSR is not read + if I_dataInst(RD_START downto RD_END) = "00000" then + s_csrOP(0) <= '0'; + O_regDwe <= '0'; + else + O_regDwe <= '1'; + s_csrOP(0) <= '1'; + end if; + + -- is there source data? if not, CSR value is not written + -- is it's CSRRS/CSRRC/CSRRSI/CSRRCI ONLY! I.E (Func3 and 010) != 0 + if (I_dataInst(FUNCT3_END + 1) = '1') and I_dataInst(R1_START downto R1_END) = "00000" then + s_csrOP(1) <= '0'; + else + s_csrOP(1) <= '1'; + end if; + + s_csrOp(4 downto 2) <= I_dataInst(FUNCT3_START downto FUNCT3_END); + + end if; + when others => + s_multicy <= '0'; + s_trapExit <= '0'; + s_csrOP <= "00000"; + s_int <= '1'; --------------- + s_intdata <= EXCEPTION_INSTRUCTION_ILLEGAL; + O_memOp <= "00000"; + O_regDwe <= '0'; + O_dataIMM <= I_dataInst(IMM_I_START downto IMM_S_B_END) + & "0000000"; + end case; + elsif I_int_ack = '1' then + s_int <= '0'; + end if; + end if; + end process; + +end Behavioral; \ No newline at end of file