diff --git a/core/alu.sv b/core/alu.sv index 0c52db558..98a267540 100644 --- a/core/alu.sv +++ b/core/alu.sv @@ -63,7 +63,12 @@ module alu logic [ 31:0] aes32esmi_gen; logic [ 63:0] aes64es_gen; logic [ 63:0] aes64esm_gen; - + logic [ 31:0] aes32dsi_gen; + logic [ 31:0] aes32dsmi_gen; + logic [ 63:0] sr_inv; + logic [ 63:0] aes64ds_gen; + logic [ 63:0] aes64dsm_gen; + logic [ 63:0] aes64im_gen; // logic [ 31:0] tmp1; // logic [ 31:0] tmp2; // logic [ 31:0] tmp3; @@ -310,13 +315,20 @@ module alu end assign aes32esi_gen = (fu_data_i.operand_a ^ ({24'b0, aes_sbox_fwd((fu_data_i.operand_b >> {orig_instr_aes[5:4], 3'b000}[7:0]))} << {orig_instr_aes[5:4], 3'b000}) | ({24'b0, aes_sbox_fwd((fu_data_i.operand_b >> {orig_instr_aes[5:4], 3'b000}[7:0]))} >> (32 - {orig_instr_aes[5:4], 3'b000}))); assign aes32esmi_gen = fu_data_i.operand_a ^ ((aes_mixcolumn_fwd({24'h000000, aes_sbox_fwd((fu_data_i.operand_b >> {orig_instr_aes[5:4], 3'b000}[7:0]))}) << {orig_instr_aes[5:4], 3'b000}) | (aes_mixcolumn_fwd({24'h000000, aes_sbox_fwd((fu_data_i.operand_b >> {orig_instr_aes[5:4], 3'b000}[7:0]))}) >> (32 - {orig_instr_aes[5:4], 3'b000}))); + assign aes32dsi_gen = (fu_data_i.operand_a ^ ({24'b0, aes_sbox_inv((fu_data_i.operand_b >> {orig_instr_aes[5:4], 3'b000}[7:0]))} << {orig_instr_aes[5:4], 3'b000}) | ({24'b0, aes_sbox_inv((fu_data_i.operand_b >> {orig_instr_aes[5:4], 3'b000}[7:0]))} >> (32 - {orig_instr_aes[5:4], 3'b000}))); + assign aes32dsmi_gen = fu_data_i.operand_a ^ ((aes_mixcolumn_inv({24'h000000, aes_sbox_inv((fu_data_i.operand_b >> {orig_instr_aes[5:4], 3'b000}[7:0]))}) << {orig_instr_aes[5:4], 3'b000}) | (aes_mixcolumn_inv({24'h000000, aes_sbox_inv((fu_data_i.operand_b >> {orig_instr_aes[5:4], 3'b000}[7:0]))}) >> (32 - {orig_instr_aes[5:4], 3'b000}))); end else if (CVA6Cfg.IS_XLEN64) begin // Shift rows step assign sr = {fu_data_i.operand_a[31:24], fu_data_i.operand_b[55:48], fu_data_i.operand_b[15:8], fu_data_i.operand_a[39:32], fu_data_i.operand_b[63:56], fu_data_i.operand_b[23:16], fu_data_i.operand_a[47:40], fu_data_i.operand_a[7:0]}; + assign sr_inv = {fu_data_i.operand_b[31:24], fu_data_i.operand_b[55:48], fu_data_i.operand_a[15:8], fu_data_i.operand_a[39:32], fu_data_i.operand_a[63:56], fu_data_i.operand_b[23:16], fu_data_i.operand_b[47:40], fu_data_i.operand_a[7:0]}; // AES64 encryption results assign aes64es_gen = {aes_sbox_fwd(sr[63:56]), aes_sbox_fwd(sr[55:48]), aes_sbox_fwd(sr[47:40]), aes_sbox_fwd(sr[39:32]), aes_sbox_fwd(sr[31:24]), aes_sbox_fwd(sr[23:16]), aes_sbox_fwd(sr[15:8]), aes_sbox_fwd(sr[7:0])}; assign aes64esm_gen = {aes_mixcolumn_fwd(aes64es_gen[63:32]), aes_mixcolumn_fwd(aes64es_gen[31:0])}; + assign aes64ds_gen = {aes_sbox_inv(sr_inv[63:56]), aes_sbox_inv(sr_inv[55:48]), aes_sbox_inv(sr_inv[47:40]), aes_sbox_inv(sr_inv[39:32]), aes_sbox_inv(sr_inv[31:24]), aes_sbox_inv(sr_inv[23:16]), aes_sbox_inv(sr_inv[15:8]), aes_sbox_inv(sr_inv[7:0])}; + assign aes64dsm_gen = {aes_mixcolumn_inv(aes64ds_gen[63:32]), aes_mixcolumn_inv(aes64ds_gen[31:0])}; + assign aes64im_gen = {aes_mixcolumn_inv(fu_data_i.operand_a[63:32]), aes_mixcolumn_inv(fu_data_i.operand_a[31:0])}; + // AES Key Schedule results assign aes64ks2_gen = {(fu_data_i.operand_a[63:32] ^ fu_data_i.operand_b[31:0] ^ fu_data_i.operand_b[63:32]), (fu_data_i.operand_a[63:32] ^ fu_data_i.operand_b[31:0])}; // assign tmp1 = fu_data_i.operand_a[63:32]; // assign rc = orig_instr_aes[3:0]; @@ -443,6 +455,11 @@ module alu if (fu_data_i.operation == AES32ESMI && CVA6Cfg.IS_XLEN32) result_o = aes32esmi_gen; if (fu_data_i.operation == AES64ES && CVA6Cfg.IS_XLEN64) result_o = aes64es_gen; if (fu_data_i.operation == AES64ESM && CVA6Cfg.IS_XLEN64) result_o = aes64esm_gen; + if (fu_data_i.operation == AES32DSI && CVA6Cfg.IS_XLEN32) result_o = aes32dsi_gen; + if (fu_data_i.operation == AES32DSMI && CVA6Cfg.IS_XLEN32) result_o = aes32dsmi_gen; + if (fu_data_i.operation == AES64DS && CVA6Cfg.IS_XLEN64) result_o = aes64ds_gen; + if (fu_data_i.operation == AES64DSM && CVA6Cfg.IS_XLEN64) result_o = aes64dsm_gen; + if (fu_data_i.operation == AES64IM && CVA6Cfg.IS_XLEN64) result_o = aes64im_gen; //if (fu_data_i.operation == AES64KS1I && CVA6Cfg.IS_XLEN64) result_o = aes64ks1i_gen; if (fu_data_i.operation == AES64KS2 && CVA6Cfg.IS_XLEN64) result_o = aes64ks2_gen; end diff --git a/core/decoder.sv b/core/decoder.sv index d0af25731..49c0baf7f 100644 --- a/core/decoder.sv +++ b/core/decoder.sv @@ -835,6 +835,26 @@ module decoder if (CVA6Cfg.ZKN) instruction_o.op = ariane_pkg::AES32ESMI; // aes32esmi else illegal_instr_bm = 1'b1; end + {7'b0010101, 3'b000}, {7'b0110101, 3'b000}, {7'b1010101, 3'b000}, {7'b1110101, 3'b000}: begin + if (CVA6Cfg.ZKN) instruction_o.op = ariane_pkg::AES32DSI; // aes32dsi + else illegal_instr_bm = 1'b1; + end + {7'b0010111, 3'b000}, {7'b0110111, 3'b000}, {7'b1010111, 3'b000}, {7'b1110111, 3'b000}: begin + if (CVA6Cfg.ZKN) instruction_o.op = ariane_pkg::AES32DSMI; // aes32dsmi + else illegal_instr_bm = 1'b1; + end + { + 7'b001_1101, 3'b000 + } : begin + if (CVA6Cfg.ZKN) instruction_o.op = ariane_pkg::AES64DS; // aes64ds + else illegal_instr_bm = 1'b1; + end + { + 7'b001_1111, 3'b000 + } : begin + if (CVA6Cfg.ZKN) instruction_o.op = ariane_pkg::AES64DSM; // aes64dsm + else illegal_instr_bm = 1'b1; + end default: begin illegal_instr_bm = 1'b1; end @@ -977,6 +997,8 @@ module decoder instruction_o.op = ariane_pkg::ZIP; // else if (CVA6Cfg.ZKN && instr.instr[31:24] == 8'b00110001) // instruction_o.op = ariane_pkg::AES64KS1I; + else if (CVA6Cfg.ZKN && instr.instr[31:20] == 12'b001100000000) + instruction_o.op = ariane_pkg::AES64IM; else illegal_instr_bm = 1'b1; end 3'b101: begin diff --git a/core/include/ariane_pkg.sv b/core/include/ariane_pkg.sv index 6a2995c8a..b46ef8f6f 100644 --- a/core/include/ariane_pkg.sv +++ b/core/include/ariane_pkg.sv @@ -505,6 +505,12 @@ package ariane_pkg; AES32ESMI, AES64ES, AES64ESM, + // AES Decryption instructions + AES32DSI, + AES32DSMI, + AES64DS, + AES64DSM, + AES64IM, // AES Key-Schedule instructions //AES64KS1I, AES64KS2 @@ -914,4 +920,82 @@ package ariane_pkg; default: aes_decode_rcon = 32'h00000000; endcase endfunction + // AES Sbox Inverse + function [7:0] aes_sbox_inv(input [7:0] si); + case (si) + 8'h00: aes_sbox_inv = 8'h52; 8'h01: aes_sbox_inv = 8'h09; 8'h02: aes_sbox_inv = 8'h6a; 8'h03: aes_sbox_inv = 8'hd5; 8'h04: aes_sbox_inv = 8'h30; 8'h05: aes_sbox_inv = 8'h36; + 8'h06: aes_sbox_inv = 8'ha5; 8'h07: aes_sbox_inv = 8'h38; 8'h08: aes_sbox_inv = 8'hbf; 8'h09: aes_sbox_inv = 8'h40; 8'h0a: aes_sbox_inv = 8'ha3; 8'h0b: aes_sbox_inv = 8'h9e; + 8'h0c: aes_sbox_inv = 8'h81; 8'h0d: aes_sbox_inv = 8'hf3; 8'h0e: aes_sbox_inv = 8'hd7; 8'h0f: aes_sbox_inv = 8'hfb; 8'h10: aes_sbox_inv = 8'h7c; 8'h11: aes_sbox_inv = 8'he3; + 8'h12: aes_sbox_inv = 8'h39; 8'h13: aes_sbox_inv = 8'h82; 8'h14: aes_sbox_inv = 8'h9b; 8'h15: aes_sbox_inv = 8'h2f; 8'h16: aes_sbox_inv = 8'hff; 8'h17: aes_sbox_inv = 8'h87; + 8'h18: aes_sbox_inv = 8'h34; 8'h19: aes_sbox_inv = 8'h8e; 8'h1a: aes_sbox_inv = 8'h43; 8'h1b: aes_sbox_inv = 8'h44; 8'h1c: aes_sbox_inv = 8'hc4; 8'h1d: aes_sbox_inv = 8'hde; + 8'h1e: aes_sbox_inv = 8'he9; 8'h1f: aes_sbox_inv = 8'hcb; 8'h20: aes_sbox_inv = 8'h54; 8'h21: aes_sbox_inv = 8'h7b; 8'h22: aes_sbox_inv = 8'h94; 8'h23: aes_sbox_inv = 8'h32; + 8'h24: aes_sbox_inv = 8'ha6; 8'h25: aes_sbox_inv = 8'hc2; 8'h26: aes_sbox_inv = 8'h23; 8'h27: aes_sbox_inv = 8'h3d; 8'h28: aes_sbox_inv = 8'hee; 8'h29: aes_sbox_inv = 8'h4c; + 8'h2a: aes_sbox_inv = 8'h95; 8'h2b: aes_sbox_inv = 8'h0b; 8'h2c: aes_sbox_inv = 8'h42; 8'h2d: aes_sbox_inv = 8'hfa; 8'h2e: aes_sbox_inv = 8'hc3; 8'h2f: aes_sbox_inv = 8'h4e; + 8'h30: aes_sbox_inv = 8'h08; 8'h31: aes_sbox_inv = 8'h2e; 8'h32: aes_sbox_inv = 8'ha1; 8'h33: aes_sbox_inv = 8'h66; 8'h34: aes_sbox_inv = 8'h28; 8'h35: aes_sbox_inv = 8'hd9; + 8'h36: aes_sbox_inv = 8'h24; 8'h37: aes_sbox_inv = 8'hb2; 8'h38: aes_sbox_inv = 8'h76; 8'h39: aes_sbox_inv = 8'h5b; 8'h3a: aes_sbox_inv = 8'ha2; 8'h3b: aes_sbox_inv = 8'h49; + 8'h3c: aes_sbox_inv = 8'h6d; 8'h3d: aes_sbox_inv = 8'h8b; 8'h3e: aes_sbox_inv = 8'hd1; 8'h3f: aes_sbox_inv = 8'h25; 8'h40: aes_sbox_inv = 8'h72; 8'h41: aes_sbox_inv = 8'hf8; + 8'h42: aes_sbox_inv = 8'hf6; 8'h43: aes_sbox_inv = 8'h64; 8'h44: aes_sbox_inv = 8'h86; 8'h45: aes_sbox_inv = 8'h68; 8'h46: aes_sbox_inv = 8'h98; 8'h47: aes_sbox_inv = 8'h16; + 8'h48: aes_sbox_inv = 8'hd4; 8'h49: aes_sbox_inv = 8'ha4; 8'h4a: aes_sbox_inv = 8'h5c; 8'h4b: aes_sbox_inv = 8'hcc; 8'h4c: aes_sbox_inv = 8'h5d; 8'h4d: aes_sbox_inv = 8'h65; + 8'h4e: aes_sbox_inv = 8'hb6; 8'h4f: aes_sbox_inv = 8'h92; 8'h50: aes_sbox_inv = 8'h6c; 8'h51: aes_sbox_inv = 8'h70; 8'h52: aes_sbox_inv = 8'h48; 8'h53: aes_sbox_inv = 8'h50; + 8'h54: aes_sbox_inv = 8'hfd; 8'h55: aes_sbox_inv = 8'hed; 8'h56: aes_sbox_inv = 8'hb9; 8'h57: aes_sbox_inv = 8'hda; 8'h58: aes_sbox_inv = 8'h5e; 8'h59: aes_sbox_inv = 8'h15; + 8'h5a: aes_sbox_inv = 8'h46; 8'h5b: aes_sbox_inv = 8'h57; 8'h5c: aes_sbox_inv = 8'ha7; 8'h5d: aes_sbox_inv = 8'h8d; 8'h5e: aes_sbox_inv = 8'h9d; 8'h5f: aes_sbox_inv = 8'h84; + 8'h60: aes_sbox_inv = 8'h90; 8'h61: aes_sbox_inv = 8'hd8; 8'h62: aes_sbox_inv = 8'hab; 8'h63: aes_sbox_inv = 8'h00; 8'h64: aes_sbox_inv = 8'h8c; 8'h65: aes_sbox_inv = 8'hbc; + 8'h66: aes_sbox_inv = 8'hd3; 8'h67: aes_sbox_inv = 8'h0a; 8'h68: aes_sbox_inv = 8'hf7; 8'h69: aes_sbox_inv = 8'he4; 8'h6a: aes_sbox_inv = 8'h58; 8'h6b: aes_sbox_inv = 8'h05; + 8'h6c: aes_sbox_inv = 8'hb8; 8'h6d: aes_sbox_inv = 8'hb3; 8'h6e: aes_sbox_inv = 8'h45; 8'h6f: aes_sbox_inv = 8'h06; 8'h70: aes_sbox_inv = 8'hd0; 8'h71: aes_sbox_inv = 8'h2c; + 8'h72: aes_sbox_inv = 8'h1e; 8'h73: aes_sbox_inv = 8'h8f; 8'h74: aes_sbox_inv = 8'hca; 8'h75: aes_sbox_inv = 8'h3f; 8'h76: aes_sbox_inv = 8'h0f; 8'h77: aes_sbox_inv = 8'h02; + 8'h78: aes_sbox_inv = 8'hc1; 8'h79: aes_sbox_inv = 8'haf; 8'h7a: aes_sbox_inv = 8'hbd; 8'h7b: aes_sbox_inv = 8'h03; 8'h7c: aes_sbox_inv = 8'h01; 8'h7d: aes_sbox_inv = 8'h13; + 8'h7e: aes_sbox_inv = 8'h8a; 8'h7f: aes_sbox_inv = 8'h6b; 8'h80: aes_sbox_inv = 8'h3a; 8'h81: aes_sbox_inv = 8'h91; 8'h82: aes_sbox_inv = 8'h11; 8'h83: aes_sbox_inv = 8'h41; + 8'h84: aes_sbox_inv = 8'h4f; 8'h85: aes_sbox_inv = 8'h67; 8'h86: aes_sbox_inv = 8'hdc; 8'h87: aes_sbox_inv = 8'hea; 8'h88: aes_sbox_inv = 8'h97; 8'h89: aes_sbox_inv = 8'hf2; + 8'h8a: aes_sbox_inv = 8'hcf; 8'h8b: aes_sbox_inv = 8'hce; 8'h8c: aes_sbox_inv = 8'hf0; 8'h8d: aes_sbox_inv = 8'hb4; 8'h8e: aes_sbox_inv = 8'he6; 8'h8f: aes_sbox_inv = 8'h73; + 8'h90: aes_sbox_inv = 8'h96; 8'h91: aes_sbox_inv = 8'hac; 8'h92: aes_sbox_inv = 8'h74; 8'h93: aes_sbox_inv = 8'h22; 8'h94: aes_sbox_inv = 8'he7; 8'h95: aes_sbox_inv = 8'had; 8'h96: aes_sbox_inv = 8'h35; 8'h97: aes_sbox_inv = 8'h85; + 8'h98: aes_sbox_inv = 8'he2; 8'h99: aes_sbox_inv = 8'hf9; 8'h9a: aes_sbox_inv = 8'h37; 8'h9b: aes_sbox_inv = 8'he8; 8'h9c: aes_sbox_inv = 8'h1c; 8'h9d: aes_sbox_inv = 8'h75; + 8'h9e: aes_sbox_inv = 8'hdf; 8'h9f: aes_sbox_inv = 8'h6e; 8'ha0: aes_sbox_inv = 8'h47; 8'ha1: aes_sbox_inv = 8'hf1; 8'ha2: aes_sbox_inv = 8'h1a; 8'ha3: aes_sbox_inv = 8'h71; + 8'ha4: aes_sbox_inv = 8'h1d; 8'ha5: aes_sbox_inv = 8'h29; 8'ha6: aes_sbox_inv = 8'hc5; 8'ha7: aes_sbox_inv = 8'h89; 8'ha8: aes_sbox_inv = 8'h6f; 8'ha9: aes_sbox_inv = 8'hb7; + 8'haa: aes_sbox_inv = 8'h62; 8'hab: aes_sbox_inv = 8'h0e; 8'hac: aes_sbox_inv = 8'haa; 8'had: aes_sbox_inv = 8'h18; 8'hae: aes_sbox_inv = 8'hbe; 8'haf: aes_sbox_inv = 8'h1b; + 8'hb0: aes_sbox_inv = 8'hfc; 8'hb1: aes_sbox_inv = 8'h56; 8'hb2: aes_sbox_inv = 8'h3e; 8'hb3: aes_sbox_inv = 8'h4b; 8'hb4: aes_sbox_inv = 8'hc6; 8'hb5: aes_sbox_inv = 8'hd2; + 8'hb6: aes_sbox_inv = 8'h79; 8'hb7: aes_sbox_inv = 8'h20; 8'hb8: aes_sbox_inv = 8'h9a; 8'hb9: aes_sbox_inv = 8'hdb; 8'hba: aes_sbox_inv = 8'hc0; 8'hbb: aes_sbox_inv = 8'hfe; + 8'hbc: aes_sbox_inv = 8'h78; 8'hbd: aes_sbox_inv = 8'hcd; 8'hbe: aes_sbox_inv = 8'h5a; 8'hbf: aes_sbox_inv = 8'hf4; 8'hc0: aes_sbox_inv = 8'h1f; 8'hc1: aes_sbox_inv = 8'hdd; + 8'hc2: aes_sbox_inv = 8'ha8; 8'hc3: aes_sbox_inv = 8'h33; 8'hc4: aes_sbox_inv = 8'h88; 8'hc5: aes_sbox_inv = 8'h07; 8'hc6: aes_sbox_inv = 8'hc7; 8'hc7: aes_sbox_inv = 8'h31; + 8'hc8: aes_sbox_inv = 8'hb1; 8'hc9: aes_sbox_inv = 8'h12; 8'hca: aes_sbox_inv = 8'h10; 8'hcb: aes_sbox_inv = 8'h59; 8'hcc: aes_sbox_inv = 8'h27; 8'hcd: aes_sbox_inv = 8'h80; + 8'hce: aes_sbox_inv = 8'hec; 8'hcf: aes_sbox_inv = 8'h5f; 8'hd0: aes_sbox_inv = 8'h60; 8'hd1: aes_sbox_inv = 8'h51; 8'hd2: aes_sbox_inv = 8'h7f; 8'hd3: aes_sbox_inv = 8'ha9; + 8'hd4: aes_sbox_inv = 8'h19; 8'hd5: aes_sbox_inv = 8'hb5; 8'hd6: aes_sbox_inv = 8'h4a; 8'hd7: aes_sbox_inv = 8'h0d; 8'hd8: aes_sbox_inv = 8'h2d; 8'hd9: aes_sbox_inv = 8'he5; + 8'hda: aes_sbox_inv = 8'h7a; 8'hdb: aes_sbox_inv = 8'h9f; 8'hdc: aes_sbox_inv = 8'h93; 8'hdd: aes_sbox_inv = 8'hc9; 8'hde: aes_sbox_inv = 8'h9c; 8'hdf: aes_sbox_inv = 8'hef; + 8'he0: aes_sbox_inv = 8'ha0; 8'he1: aes_sbox_inv = 8'he0; 8'he2: aes_sbox_inv = 8'h3b; 8'he3: aes_sbox_inv = 8'h4d; 8'he4: aes_sbox_inv = 8'hae; 8'he5: aes_sbox_inv = 8'h2a; + 8'he6: aes_sbox_inv = 8'hf5; 8'he7: aes_sbox_inv = 8'hb0; 8'he8: aes_sbox_inv = 8'hc8; 8'he9: aes_sbox_inv = 8'heb; 8'hea: aes_sbox_inv = 8'hbb; 8'heb: aes_sbox_inv = 8'h3c; + 8'hec: aes_sbox_inv = 8'h83; 8'hed: aes_sbox_inv = 8'h53; 8'hee: aes_sbox_inv = 8'h99; 8'hef: aes_sbox_inv = 8'h61; 8'hf0: aes_sbox_inv = 8'h17; 8'hf1: aes_sbox_inv = 8'h2b; + 8'hf2: aes_sbox_inv = 8'h04; 8'hf3: aes_sbox_inv = 8'h7e; 8'hf4: aes_sbox_inv = 8'hba; 8'hf5: aes_sbox_inv = 8'h77; 8'hf6: aes_sbox_inv = 8'hd6; 8'hf7: aes_sbox_inv = 8'h26; + 8'hf8: aes_sbox_inv = 8'he1; 8'hf9: aes_sbox_inv = 8'h69; 8'hfa: aes_sbox_inv = 8'h14; 8'hfb: aes_sbox_inv = 8'h63; 8'hfc: aes_sbox_inv = 8'h55; 8'hfd: aes_sbox_inv = 8'h21; + 8'hfe: aes_sbox_inv = 8'h0c; 8'hff: aes_sbox_inv = 8'h7d; + default: aes_sbox_inv = 8'h00; + endcase + endfunction + // AES MixColumns Inverse + function logic [31:0] aes_mixcolumn_inv(input logic [31:0] x); + aes_mixcolumn_inv = {(gfmul(x[7:0], 4'hB) ^ gfmul(x[15:8], 4'hD) ^ gfmul(x[23:16], 4'h9) ^ gfmul(x[31:24], 4'hE)), + (gfmul(x[7:0], 4'hD) ^ gfmul(x[15:8], 4'h9) ^ gfmul(x[23:16], 4'hE) ^ gfmul(x[31:24], 4'hB)), + (gfmul(x[7:0], 4'h9) ^ gfmul(x[15:8], 4'hE) ^ gfmul(x[23:16], 4'hB) ^ gfmul(x[31:24], 4'hD)), + (gfmul(x[7:0], 4'hE) ^ gfmul(x[15:8], 4'hB) ^ gfmul(x[23:16], 4'hD) ^ gfmul(x[31:24], 4'h9))}; + endfunction + // GF multiplication + function logic [7:0] gfmul(input logic [7:0] x, input logic [3:0] y); + logic [7:0] result, temp; + result = 8'h00; + if (y[0]) result ^= x; + if (y[1]) begin + temp = (x << 1) ^ ((x[7]) ? 8'h1B : 8'h00); + result ^= (x << 1) ^ ((x[7]) ? 8'h1B : 8'h00); + end + if (y[2]) begin + temp = (x << 1) ^ ((x[7]) ? 8'h1B : 8'h00); + temp = (temp << 1) ^ ((temp[7]) ? 8'h1B : 8'h00); + result ^= temp; + end + if (y[3]) begin + temp = (x << 1) ^ ((x[7]) ? 8'h1B : 8'h00); + temp = (temp << 1) ^ ((temp[7]) ? 8'h1B : 8'h00); + temp = (temp << 1) ^ ((temp[7]) ? 8'h1B : 8'h00); + result ^= temp; + end + return result; + endfunction endpackage diff --git a/docs/01_cva6_user/RISCV_Instructions_RVZbkx.rst b/docs/01_cva6_user/RISCV_Instructions_RVZbkx.rst new file mode 100644 index 000000000..f6c85f7f9 --- /dev/null +++ b/docs/01_cva6_user/RISCV_Instructions_RVZbkx.rst @@ -0,0 +1,86 @@ +.. + Copyright (c) 2023 OpenHW Group + Copyright (c) 2023 10xEngineers + + SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + +.. Level 1 + ======= + + Level 2 + ------- + + Level 3 + ~~~~~~~ + + Level 4 + ^^^^^^^ + +.. _cva6_riscv_instructions_RV32Zbkx: + +*Applicability of this chapter to configurations:* + +.. csv-table:: + :widths: auto + :align: left + :header: "Configuration", "Implementation" + + "CV32A60AX", "Implemented extension" + "CV64A6_MMU", "Implemented extension" + +============================= +RVZbkx: Crossbar permutation instructions +============================= + +The following instructions comprise the Zbkx extension: + +Xperm instructions +-------------------- +The xperm instructions perform permutation operations on a register. They use indices extracted from rs2 to select data chunks (bytes for xperm8 or nibbles for xperm4) from rs1. The selected data is then placed into the destination register (rd) at positions corresponding to the extracted indices in rs2. If an index in rs2 is out of range, the corresponding chunk in rd is set to 0. + ++-----------+-----------+-----------------------+ +| RV32 | RV64 | Mnemonic | ++===========+===========+=======================+ +| ✔ | ✔ | xperm8 rd, rs1, rs2 | ++-----------+-----------+-----------------------+ +| ✔ | ✔ | xperm4 rd, rs1, rs2 | ++-----------+-----------+-----------------------+ + + +RV32 and RV64 Instructions +~~~~~~~~~~~~~~~~~~~~~~~~~~ + + +- **XPERM8**: Crossbar permutation (bytes) + + **Format**: xperm8 rd, rs1, rs2 + + **Description**: The xperm8 instruction operates on bytes. The rs1 register contains a vector of XLEN/8 8-bit elements. The rs2 register contains a vector of XLEN/8 8-bit indexes. The result is each element in rs2 replaced by the indexed element in rs1, or zero if the index into rs2 is out of bounds. + + **Pseudocode**: foreach (i from 0 to xlen by 8) { + if (rs2[i*8+:8]<(xlen/8)) + X(rd)[i*8+:8] = rs1[rs2[i*8+:8]*8+:8]; + else + X(rd)[i*8+:8] = 8'b0; + } + + **Invalid values**: NONE + + **Exception raised**: NONE + +- **XPERM4**: Crossbar permutation (nibbles) + + **Format**: xperm4 rd, rs1, rs2 + + **Description**: The xperm4 instruction operates on nibbles. The rs1 register contains a vector of XLEN/4 4-bit elements. The rs2 register contains a vector of XLEN/4 4-bit indexes. The result is each element in rs2 replaced by the indexed element in rs1, or zero if the index into rs2 is out of bounds. + + **Pseudocode**: foreach (i from 0 to xlen by 4) { + if (rs2[i*4+:4]<(xlen/4)) + X(rd)[i*4+:4] = rs1[rs2[i*4+:4]*4+:4]; + else + X(rd)[i*4+:4] = 4'b0; + } + + **Invalid values**: NONE + + **Exception raised**: NONE diff --git a/verif/sim/cva6.py b/verif/sim/cva6.py index 985be86a4..d16ebf888 100644 --- a/verif/sim/cva6.py +++ b/verif/sim/cva6.py @@ -889,7 +889,7 @@ def load_config(args, cwd): args.isa = "rv64gc_zba_zbb_zbs_zbc" elif base in ("cv64a6_imafdc_sv39", "cv64a6_imafdc_sv39_hpdcache", "cv64a6_imafdc_sv39_hpdcache_wb"): args.mabi = "lp64d" - args.isa = "rv64gc_zba_zbb_zbs_zbc_zbkb_zbkx_zkne" + args.isa = "rv64gc_zba_zbb_zbs_zbc_zbkb_zbkx_zkne_zknd" elif base == "cv32a60x": args.mabi = "ilp32" args.isa = "rv32imc_zba_zbb_zbs_zbc" @@ -906,7 +906,7 @@ def load_config(args, cwd): args.isa = "rv32imac" elif base == "cv32a6_imac_sv32": args.mabi = "ilp32" - args.isa = "rv32imac_zbkb_zbkx_zkne" + args.isa = "rv32imac_zbkb_zbkx_zkne_zknd" elif base == "cv32a6_imafc_sv32": args.mabi = "ilp32f" args.isa = "rv32imafc" diff --git a/verif/tests/testlist_riscv-arch-test-cv64a6_imafdc_sv39.yaml b/verif/tests/testlist_riscv-arch-test-cv64a6_imafdc_sv39.yaml index c5d6e2233..5cad103f2 100644 --- a/verif/tests/testlist_riscv-arch-test-cv64a6_imafdc_sv39.yaml +++ b/verif/tests/testlist_riscv-arch-test-cv64a6_imafdc_sv39.yaml @@ -1017,3 +1017,18 @@ testlist: # iterations: 1 # <<: *common_test_config # asm_tests: /riscv-arch-test/riscv-test-suite/rv64i_m/K/src/aes64ks1i-01.S + + - test: rv64i_m-aes64ds-01 + iterations: 1 + <<: *common_test_config + asm_tests: /riscv-arch-test/riscv-test-suite/rv64i_m/K/src/aes64ds-01.S + + - test: rv64i_m-aes64dsm-01 + iterations: 1 + <<: *common_test_config + asm_tests: /riscv-arch-test/riscv-test-suite/rv64i_m/K/src/aes64dsm-01.S + + - test: rv64i_m-aes64im-01 + iterations: 1 + <<: *common_test_config + asm_tests: /riscv-arch-test/riscv-test-suite/rv64i_m/K/src/aes64im-01.S