From 47f8e847f09e7a08d9684962c6b13f55cab61304 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 24 Mar 2023 10:51:04 -0500 Subject: [PATCH 1/5] Renamed ebu signal. --- src/ebu/ebufsmarb.sv | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ebu/ebufsmarb.sv b/src/ebu/ebufsmarb.sv index 11d3eb2b6..1990e8f5d 100644 --- a/src/ebu/ebufsmarb.sv +++ b/src/ebu/ebufsmarb.sv @@ -57,7 +57,7 @@ module ebufsmarb ( logic FinalBeat, FinalBeatD; // Indicates the last beat of a burst logic BeatCntEn; logic [3:0] BeatCount; // Position within a burst transfer - logic CntReset; + logic BeatCntReset; logic [3:0] Threshold; // Number of beats derived from HBURST //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -98,13 +98,13 @@ module ebufsmarb ( // Burst mode logic //////////////////////////////////////////////////////////////////////////////////////////////////// - assign CntReset = NextState == IDLE; + assign BeatCntReset = NextState == IDLE; assign FinalBeat = (BeatCount == Threshold); // Detect when we are waiting on the final access. assign BeatCntEn = (NextState == ARBITRATE) & HREADY; - counter #(4) BeatCounter(HCLK, ~HRESETn | CntReset | FinalBeat, BeatCntEn, BeatCount); + counter #(4) BeatCounter(HCLK, ~HRESETn | BeatCntReset | FinalBeat, BeatCntEn, BeatCount); // Used to store data from data phase of AHB. - flopenr #(1) FinalBeatReg(HCLK, ~HRESETn | CntReset, BeatCntEn, FinalBeat, FinalBeatD); + flopenr #(1) FinalBeatReg(HCLK, ~HRESETn | BeatCntReset, BeatCntEn, FinalBeat, FinalBeatD); // unlike the bus fsm in lsu/ifu, we need to derive the number of beats from HBURST. // HBURST[2:1] Beats From b70ab0fa5aa5ef95df3475db3ad8e623e7e965c5 Mon Sep 17 00:00:00 2001 From: Kevin Kim Date: Fri, 24 Mar 2023 11:52:51 -0700 Subject: [PATCH 2/5] Zero/Sign extend mux in Shifter, Zero extend mux in Bitmanip alu --- src/ieu/alu.sv | 11 ++-------- src/ieu/bmu/bitmanipalu.sv | 7 +++++-- src/ieu/shifter.sv | 43 +++++++++++++++++++------------------- 3 files changed, 28 insertions(+), 33 deletions(-) diff --git a/src/ieu/alu.sv b/src/ieu/alu.sv index 15328bb2f..c4e0f3906 100644 --- a/src/ieu/alu.sv +++ b/src/ieu/alu.sv @@ -52,19 +52,12 @@ module alu #(parameter WIDTH=32) ( logic LT, LTU; // Less than, Less than unsigned logic Asign, Bsign; // Sign bits of A, B - // *** explain this part better; possibly move into shifter and BMU? - if (WIDTH == 64) begin - mux3 #(64) extendmux({{32{1'b0}}, A[31:0]}, {{32{A[31]}}, A[31:0]}, A, {~W64, SubArith}, CondExtA); // bottom 32 bits are always A[31:0], so effectively a 32-bit upper mux - end else begin - assign CondExtA = A; - end - // Addition assign CondMaskInvB = SubArith ? ~CondMaskB : CondMaskB; assign {Carry, Sum} = CondShiftA + CondMaskInvB + {{(WIDTH-1){1'b0}}, SubArith}; // Shifts (configurable for rotation) - shifter sh(.A(CondExtA), .Amt(B[`LOG_XLEN-1:0]), .Right(Funct3[2]), .W64, .SubArith, .Y(Shift), .Rotate(BALUControl[2])); + shifter sh(.A, .Amt(B[`LOG_XLEN-1:0]), .Right(Funct3[2]), .W64, .SubArith, .Y(Shift), .Rotate(BALUControl[2])); // Condition code flags are based on subtraction output Sum = A-B. // Overflow occurs when the numbers being subtracted have the opposite sign @@ -97,7 +90,7 @@ module alu #(parameter WIDTH=32) ( // Final Result B instruction select mux if (`ZBC_SUPPORTED | `ZBS_SUPPORTED | `ZBA_SUPPORTED | `ZBB_SUPPORTED) begin : bitmanipalu bitmanipalu #(WIDTH) balu(.A, .B, .W64, .BSelect, .ZBBSelect, - .Funct3, .CompFlags, .BALUControl, .CondExtA, .ALUResult, .FullResult, + .Funct3, .CompFlags, .BALUControl, .ALUResult, .FullResult, .CondMaskB, .CondShiftA, .Result); end else begin assign Result = ALUResult; diff --git a/src/ieu/bmu/bitmanipalu.sv b/src/ieu/bmu/bitmanipalu.sv index 1cf1cd084..07c7e5343 100644 --- a/src/ieu/bmu/bitmanipalu.sv +++ b/src/ieu/bmu/bitmanipalu.sv @@ -37,7 +37,6 @@ module bitmanipalu #(parameter WIDTH=32) ( input logic [2:0] Funct3, // Funct3 field of opcode indicates operation to perform input logic [1:0] CompFlags, // Comparator flags input logic [2:0] BALUControl, // ALU Control signals for B instructions in Execute Stage - input logic [WIDTH-1:0] CondExtA, // A Conditional Extend Intermediary Signal input logic [WIDTH-1:0] ALUResult, FullResult, // ALUResult, FullResult signals output logic [WIDTH-1:0] CondMaskB, // B is conditionally masked for ZBS instructions output logic [WIDTH-1:0] CondShiftA, // A is conditionally shifted for ShAdd instructions @@ -50,6 +49,7 @@ module bitmanipalu #(parameter WIDTH=32) ( logic Mask; // Indicates if it is ZBS instruction logic PreShift; // Inidicates if it is sh1add, sh2add, sh3add instruction logic [1:0] PreShiftAmt; // Amount to Pre-Shift A + logic [WIDTH-1:0] CondZextA; // A Conditional Extend Intermediary Signal // Extract control signals from bitmanip ALUControl. assign {Mask, PreShift} = BALUControl[1:0]; @@ -62,8 +62,11 @@ module bitmanipalu #(parameter WIDTH=32) ( // 0-3 bit Pre-Shift Mux if (`ZBA_SUPPORTED) begin: zbapreshift + if (WIDTH == 64) begin + mux2 #(64) zextmux(A, {{32{1'b0}}, A[31:0]}, W64, CondZextA); + end else assign CondZextA = A; assign PreShiftAmt = Funct3[2:1] & {2{PreShift}}; - assign CondShiftA = CondExtA << (PreShiftAmt); + assign CondShiftA = CondZextA << (PreShiftAmt); end else begin assign PreShiftAmt = 2'b0; assign CondShiftA = A; diff --git a/src/ieu/shifter.sv b/src/ieu/shifter.sv index 8dbdf88e4..1c5128be4 100644 --- a/src/ieu/shifter.sv +++ b/src/ieu/shifter.sv @@ -40,42 +40,41 @@ module shifter ( logic Sign; // Sign bit for sign extension assign Sign = A[`XLEN-1] & SubArith; // sign bit for sign extension - - if (`ZBB_SUPPORTED) begin: rotfunnel - if (`XLEN==32) begin // rv32 with rotates + if (`XLEN==32) begin // rv32 + if (`ZBB_SUPPORTED) begin: rotfunnel32 //rv32 shifter with rotates always_comb // funnel mux case({Right, Rotate}) 2'b00: z = {A[31:0], 31'b0}; 2'b01: z = {A[31:0], A[31:1]}; 2'b10: z = {{31{Sign}}, A[31:0]}; - 2'b11: z = {A[30:0], A}; + 2'b11: z = {A[30:0], A[31:0]}; endcase - assign amttrunc = Amt; // shift amount - end else begin // rv64 with rotates + end else begin: norotfunnel32 //rv32 shifter without rotates + always_comb // funnel mux + if (Right) z = {{31{Sign}}, A[31:0]}; + else z = {A[31:0], 31'b0}; + end + assign amttrunc = Amt; // shift amount + end else begin // rv64 + logic [`XLEN-1:0] A64; + mux3 #(64) extendmux({{32{1'b0}}, A[31:0]}, {{32{A[31]}}, A[31:0]}, A, {~W64, SubArith}, A64); // bottom 32 bits are always A[31:0], so effectively a 32-bit upper mux + if (`ZBB_SUPPORTED) begin: rotfunnel64 // rv64 shifter with rotates // shifter rotate source select mux logic [`XLEN-1:0] RotA; // rotate source mux2 #(`XLEN) rotmux(A, {A[31:0], A[31:0]}, W64, RotA); // W64 rotatons always_comb // funnel mux case ({Right, Rotate}) - 2'b00: z = {A[63:0],{63'b0}}; - 2'b01: z = {RotA, RotA[63:1]}; - 2'b10: z = {{63{Sign}}, A[63:0]}; - 2'b11: z = {RotA[62:0], RotA}; + 2'b00: z = {A64[63:0],{63'b0}}; + 2'b01: z = {RotA[63:0], RotA[63:1]}; + 2'b10: z = {{63{Sign}}, A64[63:0]}; + 2'b11: z = {RotA[62:0], RotA[63:0]}; endcase - assign amttrunc = W64 ? {1'b0, Amt[4:0]} : Amt; // 32- or 64-bit shift - end - end else begin: norotfunnel - if (`XLEN==32) begin:shifter // RV32 + end else begin: norotfunnel64 // rv64 shifter without rotates always_comb // funnel mux - if (Right) z = {{31{Sign}}, A[31:0]}; - else z = {A[31:0], 31'b0}; - assign amttrunc = Amt; // shift amount - end else begin:shifter // RV64 - always_comb // funnel mux - if (Right) z = {{63{Sign}}, A[63:0]}; - else z = {A[63:0], {63'b0}}; - assign amttrunc = W64 ? {1'b0, Amt[4:0]} : Amt; // 32- or 64-bit shift + if (Right) z = {{63{Sign}}, A64[63:0]}; + else z = {A64[63:0], {63'b0}}; end + assign amttrunc = W64 ? {1'b0, Amt[4:0]} : Amt; // 32- or 64-bit shift end // Opposite offset for right shifts From b518177a45061492f51dbdc1a57b896dacef77ac Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 24 Mar 2023 15:01:38 -0500 Subject: [PATCH 3/5] Updated EBU to replace tabs with spaces. --- src/ebu/ahbcacheinterface.sv | 12 +++--- src/ebu/ahbinterface.sv | 34 ++++++++-------- src/ebu/buscachefsm.sv | 26 ++++++------ src/ebu/busfsm.sv | 22 +++++------ src/ebu/controllerinputstage.sv | 32 +++++++-------- src/ebu/ebu.sv | 2 - src/ebu/ebufsmarb.sv | 70 ++++++++++++++++----------------- 7 files changed, 96 insertions(+), 102 deletions(-) diff --git a/src/ebu/ahbcacheinterface.sv b/src/ebu/ahbcacheinterface.sv index 7278f4f93..b30a15096 100644 --- a/src/ebu/ahbcacheinterface.sv +++ b/src/ebu/ahbcacheinterface.sv @@ -35,7 +35,7 @@ module ahbcacheinterface #( parameter LINELEN, // Number of bits in cacheline parameter LLENPOVERAHBW // Number of AHB beats in a LLEN word. AHBW cannot be larger than LLEN. (implementation limitation) )( - input logic HCLK, HRESETn, + input logic HCLK, HRESETn, // bus interface controls input logic HREADY, // AHB peripheral ready output logic [1:0] HTRANS, // AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ @@ -56,7 +56,7 @@ module ahbcacheinterface #( input logic [1:0] CacheBusRW, // Cache bus operation, 01: writeback, 10: fetch output logic CacheBusAck, // Handshack to $ indicating bus transaction completed output logic [LINELEN-1:0] FetchBuffer, // Register to hold beats of cache line as the arrive from bus - output logic [AHBWLOGBWPL-1:0] BeatCount, // Beat position within the cache line in the Address Phase + output logic [AHBWLOGBWPL-1:0] BeatCount, // Beat position within the cache line in the Address Phase output logic SelBusBeat, // Tells the cache to select the word from ReadData or WriteData from BeatCount rather than PAdr // uncached interface @@ -76,10 +76,10 @@ module ahbcacheinterface #( logic [`PA_BITS-1:0] LocalHADDR; // Address after selecting between cached and uncached operation logic [AHBWLOGBWPL-1:0] BeatCountDelayed; // Beat within the cache line in the second (Data) cache stage logic CaptureEn; // Enable updating the Fetch buffer with valid data from HRDATA - logic [`AHBW/8-1:0] BusByteMaskM; // Byte enables within a word. For cache request all 1s + logic [`AHBW/8-1:0] BusByteMaskM; // Byte enables within a word. For cache request all 1s logic [`AHBW-1:0] PreHWDATA; // AHB Address phase write data - genvar index; + genvar index; // fetch buffer is made of BEATSPERLINE flip-flops for (index = 0; index < BEATSPERLINE; index++) begin:fetchbuffer @@ -100,7 +100,7 @@ module ahbcacheinterface #( logic [`AHBW-1:0] AHBWordSets [(LLENPOVERAHBW)-1:0]; genvar index; for (index = 0; index < LLENPOVERAHBW; index++) begin:readdatalinesetsmux - assign AHBWordSets[index] = CacheReadDataWordM[(index*`AHBW)+`AHBW-1: (index*`AHBW)]; + assign AHBWordSets[index] = CacheReadDataWordM[(index*`AHBW)+`AHBW-1: (index*`AHBW)]; end assign CacheReadDataWordAHB = AHBWordSets[BeatCount[$clog2(LLENPOVERAHBW)-1:0]]; end else assign CacheReadDataWordAHB = CacheReadDataWordM[`AHBW-1:0]; @@ -118,5 +118,5 @@ module ahbcacheinterface #( buscachefsm #(BeatCountThreshold, AHBWLOGBWPL) AHBBuscachefsm( .HCLK, .HRESETn, .Flush, .BusRW, .Stall, .BusCommitted, .BusStall, .CaptureEn, .SelBusBeat, .CacheBusRW, .CacheBusAck, .BeatCount, .BeatCountDelayed, - .HREADY, .HTRANS, .HWRITE, .HBURST); + .HREADY, .HTRANS, .HWRITE, .HBURST); endmodule diff --git a/src/ebu/ahbinterface.sv b/src/ebu/ahbinterface.sv index ff50f54f5..579791032 100644 --- a/src/ebu/ahbinterface.sv +++ b/src/ebu/ahbinterface.sv @@ -32,29 +32,28 @@ module ahbinterface #( parameter LSU = 0 // 1: LSU bus width is `XLEN, 0: IFU bus width is 32 bits )( - input logic HCLK, HRESETn, + input logic HCLK, HRESETn, // bus interface - input logic HREADY, // AHB peripheral ready - output logic [1:0] HTRANS, // AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ - output logic HWRITE, // AHB 0: Read operation 1: Write operation - input logic [`XLEN-1:0] HRDATA, // AHB read data - output logic [`XLEN-1:0] HWDATA, // AHB write data - output logic [`XLEN/8-1:0] HWSTRB, // AHB byte mask + input logic HREADY, // AHB peripheral ready + output logic [1:0] HTRANS, // AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ + output logic HWRITE, // AHB 0: Read operation 1: Write operation + input logic [`XLEN-1:0] HRDATA, // AHB read data + output logic [`XLEN-1:0] HWDATA, // AHB write data + output logic [`XLEN/8-1:0] HWSTRB, // AHB byte mask // lsu/ifu interface - input logic Stall, // Core pipeline is stalled - input logic Flush, // Pipeline stage flush. Prevents bus transaction from starting - input logic [1:0] BusRW, // Memory operation read/write control: 10: read, 01: write - input logic [`XLEN/8-1:0] ByteMask, // Bytes enables within a word - input logic [`XLEN-1:0] WriteData, // IEU write data for a store - output logic BusStall, // Bus is busy with an in flight memory operation - output logic BusCommitted, // Bus is busy with an in flight memory operation and it is not safe to take an interrupt + input logic Stall, // Core pipeline is stalled + input logic Flush, // Pipeline stage flush. Prevents bus transaction from starting + input logic [1:0] BusRW, // Memory operation read/write control: 10: read, 01: write + input logic [`XLEN/8-1:0] ByteMask, // Bytes enables within a word + input logic [`XLEN-1:0] WriteData, // IEU write data for a store + output logic BusStall, // Bus is busy with an in flight memory operation + output logic BusCommitted, // Bus is busy with an in flight memory operation and it is not safe to take an interrupt output logic [(LSU ? `XLEN : 32)-1:0] FetchBuffer // Register to hold HRDATA after arriving from the bus ); - logic CaptureEn; - - localparam LEN = (LSU ? `XLEN : 32); // 32 bits for IFU, XLEN for LSU + logic CaptureEn; + localparam LEN = (LSU ? `XLEN : 32); // 32 bits for IFU, XLEN for LSU flopen #(LEN) fb(.clk(HCLK), .en(CaptureEn), .d(HRDATA[LEN-1:0]), .q(FetchBuffer)); @@ -70,4 +69,5 @@ module ahbinterface #( busfsm busfsm(.HCLK, .HRESETn, .Flush, .BusRW, .BusCommitted, .Stall, .BusStall, .CaptureEn, .HREADY, .HTRANS, .HWRITE); + endmodule diff --git a/src/ebu/buscachefsm.sv b/src/ebu/buscachefsm.sv index 2f3e99228..508a49ff2 100644 --- a/src/ebu/buscachefsm.sv +++ b/src/ebu/buscachefsm.sv @@ -81,15 +81,15 @@ module buscachefsm #( else CurrState <= #1 NextState; always_comb begin - case(CurrState) - ADR_PHASE: if (HREADY & |BusRW) NextState = DATA_PHASE; - else if (HREADY & CacheBusRW[0]) NextState = CACHE_WRITEBACK; - else if (HREADY & CacheBusRW[1]) NextState = CACHE_FETCH; - else NextState = ADR_PHASE; - DATA_PHASE: if(HREADY) NextState = MEM3; - else NextState = DATA_PHASE; - MEM3: if(Stall) NextState = MEM3; - else NextState = ADR_PHASE; + case(CurrState) + ADR_PHASE: if (HREADY & |BusRW) NextState = DATA_PHASE; + else if (HREADY & CacheBusRW[0]) NextState = CACHE_WRITEBACK; + else if (HREADY & CacheBusRW[1]) NextState = CACHE_FETCH; + else NextState = ADR_PHASE; + DATA_PHASE: if(HREADY) NextState = MEM3; + else NextState = DATA_PHASE; + MEM3: if(Stall) NextState = MEM3; + else NextState = ADR_PHASE; CACHE_FETCH: if(HREADY & FinalBeatCount & CacheBusRW[0]) NextState = CACHE_WRITEBACK; else if(HREADY & FinalBeatCount & CacheBusRW[1]) NextState = CACHE_FETCH; else if(HREADY & FinalBeatCount & ~|CacheBusRW) NextState = ADR_PHASE; @@ -98,8 +98,8 @@ module buscachefsm #( else if(HREADY & FinalBeatCount & CacheBusRW[1]) NextState = CACHE_FETCH; else if(HREADY & FinalBeatCount & ~|CacheBusRW) NextState = ADR_PHASE; else NextState = CACHE_WRITEBACK; - default: NextState = ADR_PHASE; - endcase + default: NextState = ADR_PHASE; + endcase end // IEU, LSU, and IFU controls @@ -117,8 +117,8 @@ module buscachefsm #( assign CacheAccess = CurrState == CACHE_FETCH | CurrState == CACHE_WRITEBACK; assign BusStall = (CurrState == ADR_PHASE & ((|BusRW) | (|CacheBusRW))) | - //(CurrState == DATA_PHASE & ~BusRW[0]) | // *** replace the next line with this. Fails uart test but i think it's a test problem not a hardware problem. - (CurrState == DATA_PHASE) | + //(CurrState == DATA_PHASE & ~BusRW[0]) | // *** replace the next line with this. Fails uart test but i think it's a test problem not a hardware problem. + (CurrState == DATA_PHASE) | (CurrState == CACHE_FETCH & ~HREADY) | (CurrState == CACHE_WRITEBACK & ~HREADY); assign BusCommitted = CurrState != ADR_PHASE; diff --git a/src/ebu/busfsm.sv b/src/ebu/busfsm.sv index 019708a3d..de1dd7583 100644 --- a/src/ebu/busfsm.sv +++ b/src/ebu/busfsm.sv @@ -57,20 +57,20 @@ module busfsm ( else CurrState <= #1 NextState; always_comb begin - case(CurrState) - ADR_PHASE: if(HREADY & |BusRW) NextState = DATA_PHASE; - else NextState = ADR_PHASE; - DATA_PHASE: if(HREADY) NextState = MEM3; - else NextState = DATA_PHASE; - MEM3: if(Stall) NextState = MEM3; - else NextState = ADR_PHASE; - default: NextState = ADR_PHASE; - endcase + case(CurrState) + ADR_PHASE: if(HREADY & |BusRW) NextState = DATA_PHASE; + else NextState = ADR_PHASE; + DATA_PHASE: if(HREADY) NextState = MEM3; + else NextState = DATA_PHASE; + MEM3: if(Stall) NextState = MEM3; + else NextState = ADR_PHASE; + default: NextState = ADR_PHASE; + endcase end assign BusStall = (CurrState == ADR_PHASE & |BusRW) | -// (CurrState == DATA_PHASE & ~BusRW[0]); // possible optimization here. fails uart test, but i'm not sure the failure is valid. - (CurrState == DATA_PHASE); +// (CurrState == DATA_PHASE & ~BusRW[0]); // possible optimization here. fails uart test, but i'm not sure the failure is valid. + (CurrState == DATA_PHASE); assign BusCommitted = CurrState != ADR_PHASE; diff --git a/src/ebu/controllerinputstage.sv b/src/ebu/controllerinputstage.sv index 681f12bc9..7a6c76bb9 100644 --- a/src/ebu/controllerinputstage.sv +++ b/src/ebu/controllerinputstage.sv @@ -36,26 +36,26 @@ module controllerinputstage #( parameter SAVE_ENABLED = 1 // 1: Save manager inputs if Save = 1, 0: Don't save inputs )( - input logic HCLK, - input logic HRESETn, - input logic Save, // Two or more managers requesting (HTRANS != 00) at the same time. Save the non-granted manager inputs - input logic Restore, // Restore a saved manager inputs when it is finally granted - input logic Disable, // Supress HREADY to the non-granted manager - output logic Request, // This manager is making a request + input logic HCLK, + input logic HRESETn, + input logic Save, // Two or more managers requesting (HTRANS != 00) at the same time. Save the non-granted manager inputs + input logic Restore, // Restore a saved manager inputs when it is finally granted + input logic Disable, // Supress HREADY to the non-granted manager + output logic Request, // This manager is making a request // controller input - input logic [1:0] HTRANSIn, // Manager input. AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ - input logic HWRITEIn, // Manager input. AHB 0: Read operation 1: Write operation - input logic [2:0] HSIZEIn, // Manager input. AHB transaction width - input logic [2:0] HBURSTIn, // Manager input. AHB burst length + input logic [1:0] HTRANSIn, // Manager input. AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ + input logic HWRITEIn, // Manager input. AHB 0: Read operation 1: Write operation + input logic [2:0] HSIZEIn, // Manager input. AHB transaction width + input logic [2:0] HBURSTIn, // Manager input. AHB burst length input logic [`PA_BITS-1:0] HADDRIn, // Manager input. AHB address - output logic HREADYOut, // Indicate to manager the peripherial is not busy and another manager does not have priority + output logic HREADYOut, // Indicate to manager the peripherial is not busy and another manager does not have priority // controller output - output logic [1:0] HTRANSOut, // Aribrated manager transaction. AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ - output logic HWRITEOut, // Aribrated manager transaction. AHB 0: Read operation 1: Write operation - output logic [2:0] HSIZEOut, // Aribrated manager transaction. AHB transaction width - output logic [2:0] HBURSTOut, // Aribrated manager transaction. AHB burst length + output logic [1:0] HTRANSOut, // Aribrated manager transaction. AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ + output logic HWRITEOut, // Aribrated manager transaction. AHB 0: Read operation 1: Write operation + output logic [2:0] HSIZEOut, // Aribrated manager transaction. AHB transaction width + output logic [2:0] HBURSTOut, // Aribrated manager transaction. AHB burst length output logic [`PA_BITS-1:0] HADDROut, // Aribrated manager transaction. AHB address - input logic HREADYIn // Peripherial ready + input logic HREADYIn // Peripherial ready ); logic HWRITESave; diff --git a/src/ebu/ebu.sv b/src/ebu/ebu.sv index d4e87de2a..17ba080fb 100644 --- a/src/ebu/ebu.sv +++ b/src/ebu/ebu.sv @@ -89,8 +89,6 @@ module ebu ( logic IFUReq; logic LSUReq; - - assign HCLK = clk; assign HRESETn = ~reset; diff --git a/src/ebu/ebufsmarb.sv b/src/ebu/ebufsmarb.sv index 1990e8f5d..ec1a3d674 100644 --- a/src/ebu/ebufsmarb.sv +++ b/src/ebu/ebufsmarb.sv @@ -31,34 +31,33 @@ `include "wally-config.vh" module ebufsmarb ( - input logic HCLK, - input logic HRESETn, + input logic HCLK, + input logic HRESETn, input logic [2:0] HBURST, // AHB burst length - input logic HREADY, + input logic HREADY, - input logic LSUReq, - input logic IFUReq, + input logic LSUReq, + input logic IFUReq, + output logic IFUSave, + output logic IFURestore, + output logic IFUDisable, + output logic IFUSelect, + output logic LSUDisable, + output logic LSUSelect); - output logic IFUSave, - output logic IFURestore, - output logic IFUDisable, - output logic IFUSelect, - output logic LSUDisable, - output logic LSUSelect); - - typedef enum logic [1:0] {IDLE, ARBITRATE} statetype; + typedef enum logic [1:0] {IDLE, ARBITRATE} statetype; statetype CurrState, NextState; - logic both; // Both the LSU and IFU request at the same time - logic IFUReqD; // 1 cycle delayed IFU request. Part of arbitration - logic FinalBeat, FinalBeatD; // Indicates the last beat of a burst - logic BeatCntEn; - logic [3:0] BeatCount; // Position within a burst transfer - logic BeatCntReset; - logic [3:0] Threshold; // Number of beats derived from HBURST + logic both; // Both the LSU and IFU request at the same time + logic IFUReqD; // 1 cycle delayed IFU request. Part of arbitration + logic FinalBeat, FinalBeatD; // Indicates the last beat of a burst + logic BeatCntEn; + logic [3:0] BeatCount; // Position within a burst transfer + logic BeatCntReset; + logic [3:0] Threshold; // Number of beats derived from HBURST //////////////////////////////////////////////////////////////////////////////////////////////////// // Aribtration scheme @@ -70,8 +69,8 @@ module ebufsmarb ( flopenl #(.TYPE(statetype)) busreg(HCLK, ~HRESETn, 1'b1, NextState, IDLE, CurrState); always_comb case (CurrState) - IDLE: if (both) NextState = ARBITRATE; - else NextState = IDLE; + IDLE: if (both) NextState = ARBITRATE; + else NextState = IDLE; ARBITRATE: if (HREADY & FinalBeatD & ~(LSUReq & IFUReq)) NextState = IDLE; else NextState = ARBITRATE; default: NextState = IDLE; @@ -100,27 +99,24 @@ module ebufsmarb ( assign BeatCntReset = NextState == IDLE; assign FinalBeat = (BeatCount == Threshold); // Detect when we are waiting on the final access. - assign BeatCntEn = (NextState == ARBITRATE) & HREADY; + // Counting the beats in the EBU is only necessary when both the LSU and IFU request concurrently. + // LSU has priority. HREADY serves double duty during a burst transaction. It indicates when the + // beat completes and when the transaction finishes. However there is nothing external to + // differentiate them. The EBU counts the HREADY beats so it knows when to switch to the IFU's + // request. + assign BeatCntEn = (NextState == ARBITRATE) & HREADY; counter #(4) BeatCounter(HCLK, ~HRESETn | BeatCntReset | FinalBeat, BeatCntEn, BeatCount); // Used to store data from data phase of AHB. flopenr #(1) FinalBeatReg(HCLK, ~HRESETn | BeatCntReset, BeatCntEn, FinalBeat, FinalBeatD); - // unlike the bus fsm in lsu/ifu, we need to derive the number of beats from HBURST. - // HBURST[2:1] Beats - // 00 1 - // 01 4 - // 10 8 - // 11 16 + // unlike the bus fsm in lsu/ifu, we need to derive the number of beats from HBURST, Threshold = num beats - 1. + // HBURST[2:1] Beats threshold + // 00 1 0 + // 01 4 3 + // 10 8 7 + // 11 16 15 always_comb if (HBURST[2:1] == 2'b00) Threshold = 4'b0000; else Threshold = (2 << HBURST[2:1]) - 1; -/* case(HBURST) - 0: Threshold = 4'b0000; - 3: Threshold = 4'b0011; // INCR4 - 5: Threshold = 4'b0111; // INCR8 - 7: Threshold = 4'b1111; // INCR16 - default: Threshold = 4'b0000; // INCR without end. - endcase - end */ endmodule From b5a58502d06c6cba4b3fb7a43bf74d441b67e101 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 24 Mar 2023 15:15:38 -0500 Subject: [PATCH 4/5] Replaced tabs -> spaces cache. --- src/cache/cache.sv | 22 +++++----- src/cache/cachefsm.sv | 76 +++++++++++++++++------------------ src/cache/cacheway.sv | 14 ++----- src/cache/subcachelineread.sv | 6 +-- src/ebu/buscachefsm.sv | 50 +++++++++++------------ src/ebu/ebu.sv | 37 +++++++++-------- 6 files changed, 98 insertions(+), 107 deletions(-) diff --git a/src/cache/cache.sv b/src/cache/cache.sv index 5d4fca7d5..da7f83276 100644 --- a/src/cache/cache.sv +++ b/src/cache/cache.sv @@ -98,9 +98,9 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE logic CacheEn; logic [CACHEWORDSPERLINE-1:0] MemPAdrDecoded; logic [LINELEN/8-1:0] LineByteMask, DemuxedByteMask, FetchBufferByteSel; - logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1:0] WordOffsetAddr; + logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1:0] WordOffsetAddr; - genvar index; + genvar index; ///////////////////////////////////////////////////////////////////////////////////////////// // Read Path @@ -154,9 +154,9 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE // Bus address for fetch, writeback, or flush writeback mux3 #(`PA_BITS) CacheBusAdrMux(.d0({PAdr[`PA_BITS-1:OFFSETLEN], {OFFSETLEN{1'b0}}}), - .d1({Tag, PAdr[SETTOP-1:OFFSETLEN], {OFFSETLEN{1'b0}}}), - .d2({Tag, FlushAdr, {OFFSETLEN{1'b0}}}), - .s({SelFlush, SelWriteback}), .y(CacheBusAdr)); + .d1({Tag, PAdr[SETTOP-1:OFFSETLEN], {OFFSETLEN{1'b0}}}), + .d2({Tag, FlushAdr, {OFFSETLEN{1'b0}}}), + .s({SelFlush, SelWriteback}), .y(CacheBusAdr)); ///////////////////////////////////////////////////////////////////////////////////////////// // Write Path @@ -198,11 +198,11 @@ module cache #(parameter LINELEN, NUMLINES, NUMWAYS, LOGBWPL, WORDLEN, MUXINTE ///////////////////////////////////////////////////////////////////////////////////////////// cachefsm #(READ_ONLY_CACHE) cachefsm(.clk, .reset, .CacheBusRW, .CacheBusAck, - .FlushStage, .CacheRW, .CacheAtomic, .Stall, - .CacheHit, .LineDirty, .CacheStall, .CacheCommitted, - .CacheMiss, .CacheAccess, .SelAdr, - .ClearValid, .ClearDirty, .SetDirty, .SetValid, .SelWriteback, .SelFlush, - .FlushAdrCntEn, .FlushWayCntEn, .FlushCntRst, - .FlushAdrFlag, .FlushWayFlag, .FlushCache, .SelFetchBuffer, + .FlushStage, .CacheRW, .CacheAtomic, .Stall, + .CacheHit, .LineDirty, .CacheStall, .CacheCommitted, + .CacheMiss, .CacheAccess, .SelAdr, + .ClearValid, .ClearDirty, .SetDirty, .SetValid, .SelWriteback, .SelFlush, + .FlushAdrCntEn, .FlushWayCntEn, .FlushCntRst, + .FlushAdrFlag, .FlushWayFlag, .FlushCache, .SelFetchBuffer, .InvalidateCache, .CacheEn, .LRUWriteEn); endmodule diff --git a/src/cache/cachefsm.sv b/src/cache/cachefsm.sv index 1edb0b65a..c51257be7 100644 --- a/src/cache/cachefsm.sv +++ b/src/cache/cachefsm.sv @@ -47,7 +47,7 @@ module cachefsm #(parameter READ_ONLY_CACHE = 0) ( output logic [1:0] CacheBusRW, // [1] Read (cache line fetch) or [0] write bus (cache line writeback) // performance counter outputs output logic CacheMiss, // Cache miss - output logic CacheAccess, // Cache access + output logic CacheAccess, // Cache access // cache internals input logic CacheHit, // Exactly 1 way hits @@ -69,21 +69,21 @@ module cachefsm #(parameter READ_ONLY_CACHE = 0) ( output logic CacheEn // Enable the cache memory arrays. Disable hold read data constant ); - logic resetDelay; - logic AMO, StoreAMO; - logic AnyUpdateHit, AnyHit; - logic AnyMiss; - logic FlushFlag; + logic resetDelay; + logic AMO, StoreAMO; + logic AnyUpdateHit, AnyHit; + logic AnyMiss; + logic FlushFlag; typedef enum logic [3:0]{STATE_READY, // hit states - // miss states - STATE_FETCH, - STATE_WRITEBACK, - STATE_WRITE_LINE, - STATE_READ_HOLD, // required for back to back reads. structural hazard on writting SRAM - // flush cache - STATE_FLUSH, - STATE_FLUSH_WRITEBACK} statetype; + // miss states + STATE_FETCH, + STATE_WRITEBACK, + STATE_WRITE_LINE, + STATE_READ_HOLD, // required for back to back reads. structural hazard on writting SRAM + // flush cache + STATE_FLUSH, + STATE_FLUSH_WRITEBACK} statetype; statetype CurrState, NextState; @@ -111,26 +111,26 @@ module cachefsm #(parameter READ_ONLY_CACHE = 0) ( always_comb begin NextState = STATE_READY; case (CurrState) - STATE_READY: if(InvalidateCache) NextState = STATE_READY; - else if(FlushCache & ~READ_ONLY_CACHE) NextState = STATE_FLUSH; - else if(AnyMiss & (READ_ONLY_CACHE | ~LineDirty)) NextState = STATE_FETCH; - else if(AnyMiss & LineDirty) NextState = STATE_WRITEBACK; - else NextState = STATE_READY; - STATE_FETCH: if(CacheBusAck) NextState = STATE_WRITE_LINE; - else NextState = STATE_FETCH; - STATE_WRITE_LINE: NextState = STATE_READ_HOLD; - STATE_READ_HOLD: if(Stall) NextState = STATE_READ_HOLD; - else NextState = STATE_READY; - STATE_WRITEBACK: if(CacheBusAck) NextState = STATE_FETCH; - else NextState = STATE_WRITEBACK; + STATE_READY: if(InvalidateCache) NextState = STATE_READY; + else if(FlushCache & ~READ_ONLY_CACHE) NextState = STATE_FLUSH; + else if(AnyMiss & (READ_ONLY_CACHE | ~LineDirty)) NextState = STATE_FETCH; + else if(AnyMiss & LineDirty) NextState = STATE_WRITEBACK; + else NextState = STATE_READY; + STATE_FETCH: if(CacheBusAck) NextState = STATE_WRITE_LINE; + else NextState = STATE_FETCH; + STATE_WRITE_LINE: NextState = STATE_READ_HOLD; + STATE_READ_HOLD: if(Stall) NextState = STATE_READ_HOLD; + else NextState = STATE_READY; + STATE_WRITEBACK: if(CacheBusAck) NextState = STATE_FETCH; + else NextState = STATE_WRITEBACK; // eviction needs a delay as the bus fsm does not correctly handle sending the write command at the same time as getting back the bus ack. - STATE_FLUSH: if(LineDirty) NextState = STATE_FLUSH_WRITEBACK; - else if (FlushFlag) NextState = STATE_READ_HOLD; - else NextState = STATE_FLUSH; - STATE_FLUSH_WRITEBACK: if(CacheBusAck & ~FlushFlag) NextState = STATE_FLUSH; - else if(CacheBusAck) NextState = STATE_READ_HOLD; - else NextState = STATE_FLUSH_WRITEBACK; - default: NextState = STATE_READY; + STATE_FLUSH: if(LineDirty) NextState = STATE_FLUSH_WRITEBACK; + else if (FlushFlag) NextState = STATE_READ_HOLD; + else NextState = STATE_FLUSH; + STATE_FLUSH_WRITEBACK: if(CacheBusAck & ~FlushFlag) NextState = STATE_FLUSH; + else if(CacheBusAck) NextState = STATE_READ_HOLD; + else NextState = STATE_FLUSH_WRITEBACK; + default: NextState = STATE_READY; endcase end @@ -156,14 +156,14 @@ module cachefsm #(parameter READ_ONLY_CACHE = 0) ( (CurrState == STATE_READY & AnyMiss & LineDirty); assign SelFlush = (CurrState == STATE_READY & FlushCache) | - (CurrState == STATE_FLUSH) | - (CurrState == STATE_FLUSH_WRITEBACK); + (CurrState == STATE_FLUSH) | + (CurrState == STATE_FLUSH_WRITEBACK); assign FlushAdrCntEn = (CurrState == STATE_FLUSH_WRITEBACK & FlushWayFlag & CacheBusAck) | - (CurrState == STATE_FLUSH & FlushWayFlag & ~LineDirty); + (CurrState == STATE_FLUSH & FlushWayFlag & ~LineDirty); assign FlushWayCntEn = (CurrState == STATE_FLUSH & ~LineDirty) | - (CurrState == STATE_FLUSH_WRITEBACK & CacheBusAck); + (CurrState == STATE_FLUSH_WRITEBACK & CacheBusAck); assign FlushCntRst = (CurrState == STATE_FLUSH & FlushFlag & ~LineDirty) | - (CurrState == STATE_FLUSH_WRITEBACK & FlushFlag & CacheBusAck); + (CurrState == STATE_FLUSH_WRITEBACK & FlushFlag & CacheBusAck); // Bus interface controls assign CacheBusRW[1] = (CurrState == STATE_READY & AnyMiss & ~LineDirty) | (CurrState == STATE_FETCH & ~CacheBusAck) | diff --git a/src/cache/cacheway.sv b/src/cache/cacheway.sv index da40ab705..d7cc0792d 100644 --- a/src/cache/cacheway.sv +++ b/src/cache/cacheway.sv @@ -30,7 +30,7 @@ `include "wally-config.vh" module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26, - OFFSETLEN = 5, INDEXLEN = 9, READ_ONLY_CACHE = 0) ( + OFFSETLEN = 5, INDEXLEN = 9, READ_ONLY_CACHE = 0) ( input logic clk, input logic reset, input logic FlushStage, // Pipeline flush of second stage (prevent writes and bus operations) @@ -86,8 +86,6 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26, assign SelNonHit = FlushWayEn | SetValid | SelWriteback; mux2 #(1) seltagmux(VictimWay, FlushWay, SelFlush, SelTag); - //assign SelTag = VictimWay | FlushWay; - //assign SelData = HitWay | FlushWayEn | VictimWayEn; mux2 #(1) selectedwaymux(HitWay, SelTag, SelNonHit , SelData); @@ -95,10 +93,6 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26, // Write Enable demux ///////////////////////////////////////////////////////////////////////////////////////////// - // RT: Can we merge these two muxes? This is also shared in cacheLRU. - //mux3 #(1) selectwaymux(HitWay, VictimWay, FlushWay, {SelFlush, SetValid}, SelData); - //mux3 #(1) selecteddatamux(HitWay, VictimWay, FlushWay, {SelFlush, SelNonHit}, SelData); - assign SetValidWay = SetValid & SelData; assign ClearValidWay = ClearValid & SelData; assign SetDirtyWay = SetDirty & SelData; @@ -117,8 +111,6 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26, .addr(CacheSet), .dout(ReadTag), .bwe('1), .din(PAdr[`PA_BITS-1:OFFSETLEN+INDEXLEN]), .we(SetValidEN)); - - // AND portion of distributed tag multiplexer assign TagWay = SelTag ? ReadTag : '0; // AND part of AOMux assign DirtyWay = SelTag & Dirty & ValidWay; @@ -152,8 +144,8 @@ module cacheway #(parameter NUMLINES=512, LINELEN = 256, TAGLEN = 26, always_ff @(posedge clk) begin // Valid bit array, if (reset) ValidBits <= #1 '0; if(CacheEn) begin - ValidWay <= #1 ValidBits[CacheSet]; - if(InvalidateCache) ValidBits <= #1 '0; + ValidWay <= #1 ValidBits[CacheSet]; + if(InvalidateCache) ValidBits <= #1 '0; else if (SetValidEN | (ClearValidWay & ~FlushStage)) ValidBits[CacheSet] <= #1 SetValidWay; end end diff --git a/src/cache/subcachelineread.sv b/src/cache/subcachelineread.sv index 490618070..58d022a71 100644 --- a/src/cache/subcachelineread.sv +++ b/src/cache/subcachelineread.sv @@ -33,8 +33,8 @@ module subcachelineread #(parameter LINELEN, WORDLEN, parameter MUXINTERVAL )( // The number of bits between mux. Set to 16 for I$ to support compressed. Set to `LLEN for D$ input logic [$clog2(LINELEN/8) - $clog2(MUXINTERVAL/8) - 1 : 0] PAdr, // Physical address - input logic [LINELEN-1:0] ReadDataLine,// Read data of the whole cacheline - output logic [WORDLEN-1:0] ReadDataWord // read data of selected word. + input logic [LINELEN-1:0] ReadDataLine,// Read data of the whole cacheline + output logic [WORDLEN-1:0] ReadDataWord // read data of selected word. ); localparam WORDSPERLINE = LINELEN/MUXINTERVAL; @@ -50,7 +50,7 @@ module subcachelineread #(parameter LINELEN, WORDLEN, genvar index; for (index = 0; index < WORDSPERLINE; index++) begin:readdatalinesetsmux - assign ReadDataLineSets[index] = ReadDataLinePad[(index*MUXINTERVAL)+WORDLEN-1 : (index*MUXINTERVAL)]; + assign ReadDataLineSets[index] = ReadDataLinePad[(index*MUXINTERVAL)+WORDLEN-1 : (index*MUXINTERVAL)]; end // variable input mux diff --git a/src/ebu/buscachefsm.sv b/src/ebu/buscachefsm.sv index 508a49ff2..c619c9135 100644 --- a/src/ebu/buscachefsm.sv +++ b/src/ebu/buscachefsm.sv @@ -35,33 +35,33 @@ module buscachefsm #( parameter BeatCountThreshold, // Largest beat index parameter AHBWLOGBWPL // Log2 of BEATSPERLINE )( - input logic HCLK, - input logic HRESETn, + input logic HCLK, + input logic HRESETn, // IEU interface - input logic Stall, // Core pipeline is stalled - input logic Flush, // Pipeline stage flush. Prevents bus transaction from starting - input logic [1:0] BusRW, // Uncached memory operation read/write control: 10: read, 01: write - output logic BusStall, // Bus is busy with an in flight memory operation - output logic BusCommitted, // Bus is busy with an in flight memory operation and it is not safe to take an interrupt - - // ahb cache interface locals. - output logic CaptureEn, // Enable updating the Fetch buffer with valid data from HRDATA - - // cache interface - input logic [1:0] CacheBusRW, // Cache bus operation, 01: writeback, 10: fetch - output logic CacheBusAck, // Handshack to $ indicating bus transaction completed + input logic Stall, // Core pipeline is stalled + input logic Flush, // Pipeline stage flush. Prevents bus transaction from starting + input logic [1:0] BusRW, // Uncached memory operation read/write control: 10: read, 01: write + output logic BusStall, // Bus is busy with an in flight memory operation + output logic BusCommitted, // Bus is busy with an in flight memory operation and it is not safe to take an interrupt + + // ahb cache interface locals. + output logic CaptureEn, // Enable updating the Fetch buffer with valid data from HRDATA + + // cache interface + input logic [1:0] CacheBusRW, // Cache bus operation, 01: writeback, 10: fetch + output logic CacheBusAck, // Handshack to $ indicating bus transaction completed // lsu interface output logic [AHBWLOGBWPL-1:0] BeatCount, // Beat position within the cache line in the Address Phase output logic [AHBWLOGBWPL-1:0] BeatCountDelayed, // Beat within the cache line in the second (Data) cache stage - output logic SelBusBeat, // Tells the cache to select the word from ReadData or WriteData from BeatCount rather than PAdr + output logic SelBusBeat, // Tells the cache to select the word from ReadData or WriteData from BeatCount rather than PAdr // BUS interface - input logic HREADY, // AHB peripheral ready - output logic [1:0] HTRANS, // AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ - output logic HWRITE, // AHB 0: Read operation 1: Write operation - output logic [2:0] HBURST // AHB burst length + input logic HREADY, // AHB peripheral ready + output logic [1:0] HTRANS, // AHB transaction type, 00: IDLE, 10 NON_SEQ, 11 SEQ + output logic HWRITE, // AHB 0: Read operation 1: Write operation + output logic [2:0] HBURST // AHB burst length ); typedef enum logic [2:0] {ADR_PHASE, DATA_PHASE, MEM3, CACHE_FETCH, CACHE_WRITEBACK} busstatetype; @@ -70,11 +70,11 @@ module buscachefsm #( busstatetype CurrState, NextState; logic [AHBWLOGBWPL-1:0] NextBeatCount; - logic FinalBeatCount; - logic [2:0] LocalBurstType; - logic BeatCntEn; - logic BeatCntReset; - logic CacheAccess; + logic FinalBeatCount; + logic [2:0] LocalBurstType; + logic BeatCntEn; + logic BeatCntReset; + logic CacheAccess; always_ff @(posedge HCLK) if (~HRESETn | Flush) CurrState <= #1 ADR_PHASE; @@ -144,7 +144,7 @@ module buscachefsm #( // communication to cache assign CacheBusAck = (CacheAccess & HREADY & FinalBeatCount); assign SelBusBeat = (CurrState == ADR_PHASE & (BusRW[0] | CacheBusRW[0])) | - (CurrState == DATA_PHASE & BusRW[0]) | + (CurrState == DATA_PHASE & BusRW[0]) | (CurrState == CACHE_WRITEBACK) | (CurrState == CACHE_FETCH); diff --git a/src/ebu/ebu.sv b/src/ebu/ebu.sv index 17ba080fb..b045c6aaa 100644 --- a/src/ebu/ebu.sv +++ b/src/ebu/ebu.sv @@ -52,27 +52,26 @@ module ebu ( output logic LSUHREADY, // AHB peripheral. Never gated as LSU always has priority // AHB-Lite external signals - output logic HCLK, HRESETn, - input logic HREADY, // AHB peripheral ready - input logic HRESP, // AHB peripheral response. 0: OK 1: Error - output logic [`PA_BITS-1:0] HADDR, // AHB address to peripheral after arbitration - output logic [`AHBW-1:0] HWDATA, // AHB Write data after arbitration - output logic [`XLEN/8-1:0] HWSTRB, // AHB byte write enables after arbitration - output logic HWRITE, // AHB transaction direction after arbitration - output logic [2:0] HSIZE, // AHB transaction size after arbitration - output logic [2:0] HBURST, // AHB burst length after arbitration - output logic [3:0] HPROT, // AHB protection. Wally does not use - output logic [1:0] HTRANS, // AHB transaction request after arbitration - output logic HMASTLOCK // AHB master lock. Wally does not use + output logic HCLK, HRESETn, + input logic HREADY, // AHB peripheral ready + input logic HRESP, // AHB peripheral response. 0: OK 1: Error + output logic [`PA_BITS-1:0] HADDR, // AHB address to peripheral after arbitration + output logic [`AHBW-1:0] HWDATA, // AHB Write data after arbitration + output logic [`XLEN/8-1:0] HWSTRB, // AHB byte write enables after arbitration + output logic HWRITE, // AHB transaction direction after arbitration + output logic [2:0] HSIZE, // AHB transaction size after arbitration + output logic [2:0] HBURST, // AHB burst length after arbitration + output logic [3:0] HPROT, // AHB protection. Wally does not use + output logic [1:0] HTRANS, // AHB transaction request after arbitration + output logic HMASTLOCK // AHB master lock. Wally does not use ); - logic LSUDisable; - logic LSUSelect; + logic LSUSelect; logic IFUSave; - logic IFURestore; - logic IFUDisable; - logic IFUSelect; + logic IFURestore; + logic IFUDisable; + logic IFUSelect; logic [`PA_BITS-1:0] IFUHADDROut; logic [1:0] IFUHTRANSOut; @@ -87,7 +86,7 @@ module ebu ( logic LSUHWRITEOut; logic IFUReq; - logic LSUReq; + logic LSUReq; assign HCLK = clk; assign HRESETn = ~reset; @@ -127,7 +126,7 @@ module ebu ( // HRDATA is sent to all controllers at the core level. ebufsmarb ebufsmarb(.HCLK, .HRESETn, .HBURST, .HREADY, .LSUReq, .IFUReq, .IFUSave, - .IFURestore, .IFUDisable, .IFUSelect, .LSUDisable, .LSUSelect); + .IFURestore, .IFUDisable, .IFUSelect, .LSUDisable, .LSUSelect); endmodule From 46b1bca4fc892329ea0d9bbe0e278e0415b61037 Mon Sep 17 00:00:00 2001 From: Ross Thompson Date: Fri, 24 Mar 2023 17:32:25 -0500 Subject: [PATCH 5/5] Fixed all tap/space issue in RTL. --- src/fpu/fctrl.sv | 80 +++--- src/fpu/fcvt.sv | 28 +- src/fpu/fdivsqrt/fdivsqrt.sv | 76 +++--- src/fpu/fdivsqrt/fdivsqrtpostproc.sv | 14 +- src/fpu/fdivsqrt/fdivsqrtpreproc.sv | 50 ++-- src/fpu/fdivsqrt/fdivsqrtqsel2.sv | 6 +- src/fpu/fdivsqrt/fdivsqrtqsel4.sv | 22 +- src/fpu/fdivsqrt/fdivsqrtqsel4cmp.sv | 20 +- src/fpu/fdivsqrt/fdivsqrtstage2.sv | 34 +-- src/fpu/fdivsqrt/fdivsqrtstage4.sv | 42 +-- src/fpu/fma/fmalza.sv | 4 +- src/fpu/fpu.sv | 222 ++++++++-------- src/fpu/fregfile.sv | 6 +- src/fpu/fsgninj.sv | 54 ++-- src/generic/arrs.sv | 8 +- src/generic/clockgater.sv | 10 +- src/generic/mem/ram1p1rwbe.sv | 63 +++-- src/generic/mem/ram1p1rwbe_64x128.sv | 2 +- src/generic/mem/ram1p1rwbe_64x22.sv | 2 +- src/generic/mem/ram1p1rwbe_64x44.sv | 2 +- src/generic/mem/ram2p1r1wbe.sv | 152 ++++++----- src/generic/mem/ram2p1r1wbe_1024x36.sv | 10 +- src/generic/mem/ram2p1r1wbe_1024x68.sv | 8 +- src/generic/mem/ram2p1r1wbe_128x64.sv | 8 +- src/generic/mem/ram2p1r1wbe_512x64.sv | 8 +- src/generic/mem/ram2p1r1wbe_64x32.sv | 8 +- src/generic/mem/rom1p1r.sv | 94 +++---- src/generic/mem/rom1p1r_128x32.sv | 2 +- src/generic/mem/rom1p1r_128x64.sv | 6 +- src/ieu/bmu/bmuctrl.sv | 8 +- src/ieu/controller.sv | 66 ++--- src/ieu/datapath.sv | 4 +- src/ieu/forward.sv | 2 +- src/ieu/ieu.sv | 34 +-- src/ieu/regfile.sv | 2 +- src/ifu/bpred/RASPredictor.sv | 24 +- src/ifu/bpred/bpred.sv | 88 +++--- src/ifu/bpred/btb.sv | 33 ++- src/ifu/bpred/gshare.sv | 38 +-- src/ifu/bpred/gsharebasic.sv | 20 +- src/ifu/bpred/icpred.sv | 12 +- src/ifu/ifu.sv | 196 +++++++------- src/ifu/irom.sv | 14 +- src/ifu/spill.sv | 37 +-- src/lsu/dtim.sv | 12 +- src/lsu/lrsc.sv | 12 +- src/lsu/lsu.sv | 108 ++++---- src/lsu/subwordread.sv | 34 +-- src/mdu/div.sv | 4 +- src/mdu/mdu.sv | 100 +++---- src/mmu/hptw.sv | 354 ++++++++++++------------- src/mmu/tlb/tlb.sv | 6 +- src/privileged/csr.sv | 14 +- src/privileged/csrc.sv | 90 +++---- src/privileged/csri.sv | 10 +- src/privileged/csrm.sv | 42 +-- src/privileged/csrs.sv | 38 +-- src/privileged/csrsr.sv | 2 +- src/privileged/csru.sv | 2 +- src/privileged/privileged.sv | 160 +++++------ src/privileged/trap.sv | 50 ++-- src/uncore/clint_apb.sv | 2 +- src/uncore/plic_apb.sv | 50 ++-- src/uncore/ram_ahb.sv | 45 ++-- src/uncore/uartPC16550D.sv | 146 +++++----- src/uncore/uncore.sv | 106 ++++---- src/wally/wallypipelinedcore.sv | 76 +++--- src/wally/wallypipelinedsoc.sv | 56 ++-- 68 files changed, 1561 insertions(+), 1577 deletions(-) diff --git a/src/fpu/fctrl.sv b/src/fpu/fctrl.sv index 5700e1b6a..be10e8007 100755 --- a/src/fpu/fctrl.sv +++ b/src/fpu/fctrl.sv @@ -31,53 +31,53 @@ module fctrl ( input logic clk, input logic reset, // input control signals - input logic StallE, StallM, StallW, // stall signals - input logic FlushE, FlushM, FlushW, // flush signals - input logic IntDivE, // is inteteger division - input logic [2:0] FRM_REGW, // rounding mode from CSR - input logic [1:0] STATUS_FS, // is FPU enabled? - input logic FDivBusyE, // is the divider busy - // intruction - input logic [31:0] InstrD, // the full instruction - input logic [6:0] Funct7D, // bits 31:25 of instruction - may contain percision - input logic [6:0] OpD, // bits 6:0 of instruction - input logic [4:0] Rs2D, // bits 24:20 of instruction - input logic [2:0] Funct3D, Funct3E, // bits 14:12 of instruction - may contain rounding mode - // input mux selections - output logic XEnD, YEnD, ZEnD, // enable inputs - output logic XEnE, YEnE, ZEnE, // enable inputs - // opperation mux selections - output logic FCvtIntE, FCvtIntW, // convert to integer opperation - output logic [2:0] FrmM, // FP rounding mode - output logic [`FMTBITS-1:0] FmtE, FmtM, // FP format - output logic [2:0] OpCtrlE, OpCtrlM, // Select which opperation to do in each component - output logic FpLoadStoreM, // FP load or store instruction - output logic [1:0] PostProcSelE, PostProcSelM, // select result in the post processing unit - output logic [1:0] FResSelE, FResSelM, FResSelW, // Select one of the results that finish in the memory stage + input logic StallE, StallM, StallW, // stall signals + input logic FlushE, FlushM, FlushW, // flush signals + input logic IntDivE, // is inteteger division + input logic [2:0] FRM_REGW, // rounding mode from CSR + input logic [1:0] STATUS_FS, // is FPU enabled? + input logic FDivBusyE, // is the divider busy + // intruction + input logic [31:0] InstrD, // the full instruction + input logic [6:0] Funct7D, // bits 31:25 of instruction - may contain percision + input logic [6:0] OpD, // bits 6:0 of instruction + input logic [4:0] Rs2D, // bits 24:20 of instruction + input logic [2:0] Funct3D, Funct3E, // bits 14:12 of instruction - may contain rounding mode + // input mux selections + output logic XEnD, YEnD, ZEnD, // enable inputs + output logic XEnE, YEnE, ZEnE, // enable inputs + // opperation mux selections + output logic FCvtIntE, FCvtIntW, // convert to integer opperation + output logic [2:0] FrmM, // FP rounding mode + output logic [`FMTBITS-1:0] FmtE, FmtM, // FP format + output logic [2:0] OpCtrlE, OpCtrlM, // Select which opperation to do in each component + output logic FpLoadStoreM, // FP load or store instruction + output logic [1:0] PostProcSelE, PostProcSelM, // select result in the post processing unit + output logic [1:0] FResSelE, FResSelM, FResSelW, // Select one of the results that finish in the memory stage // register control signals - output logic FRegWriteE, FRegWriteM, FRegWriteW, // FP register write enable - output logic FWriteIntE, FWriteIntM, // Write to integer register - output logic [4:0] Adr1D, Adr2D, Adr3D, // adresses of each input - output logic [4:0] Adr1E, Adr2E, Adr3E, // adresses of each input + output logic FRegWriteE, FRegWriteM, FRegWriteW, // FP register write enable + output logic FWriteIntE, FWriteIntM, // Write to integer register + output logic [4:0] Adr1D, Adr2D, Adr3D, // adresses of each input + output logic [4:0] Adr1E, Adr2E, Adr3E, // adresses of each input // other control signals output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction - output logic FDivStartE, IDivStartE // Start division or squareroot + output logic FDivStartE, IDivStartE // Start division or squareroot ); `define FCTRLW 12 - logic [`FCTRLW-1:0] ControlsD; // control signals - logic FRegWriteD; // FP register write enable - logic FDivStartD; // start division/sqrt - logic FWriteIntD; // integer register write enable - logic [2:0] OpCtrlD; // Select which opperation to do in each component - logic [1:0] PostProcSelD; // select result in the post processing unit - logic [1:0] FResSelD; // Select one of the results that finish in the memory stage - logic [2:0] FrmD, FrmE; // FP rounding mode - logic [`FMTBITS-1:0] FmtD; // FP format - logic [1:0] Fmt; // format - before possible reduction - logic SupportedFmt; // is the format supported - logic FCvtIntD, FCvtIntM; // convert to integer opperation + logic [`FCTRLW-1:0] ControlsD; // control signals + logic FRegWriteD; // FP register write enable + logic FDivStartD; // start division/sqrt + logic FWriteIntD; // integer register write enable + logic [2:0] OpCtrlD; // Select which opperation to do in each component + logic [1:0] PostProcSelD; // select result in the post processing unit + logic [1:0] FResSelD; // Select one of the results that finish in the memory stage + logic [2:0] FrmD, FrmE; // FP rounding mode + logic [`FMTBITS-1:0] FmtD; // FP format + logic [1:0] Fmt; // format - before possible reduction + logic SupportedFmt; // is the format supported + logic FCvtIntD, FCvtIntM; // convert to integer opperation // FPU Instruction Decoder assign Fmt = Funct7D[1:0]; diff --git a/src/fpu/fcvt.sv b/src/fpu/fcvt.sv index 2f121a75a..32ca7542f 100644 --- a/src/fpu/fcvt.sv +++ b/src/fpu/fcvt.sv @@ -30,20 +30,20 @@ `include "wally-config.vh" module fcvt ( - input logic Xs, // input's sign - input logic [`NE-1:0] Xe, // input's exponent - input logic [`NF:0] Xm, // input's fraction - input logic [`XLEN-1:0] Int, // integer input - from IEU - input logic [2:0] OpCtrl, // choose which opperation (look below for values) - input logic ToInt, // is fp->int (since it's writting to the integer register) - input logic XZero, // is the input zero - input logic [`FMTBITS-1:0] Fmt, // the input's precision (11=quad 01=double 00=single 10=half) - output logic [`NE:0] Ce, // the calculated expoent - output logic [`LOGCVTLEN-1:0] ShiftAmt, // how much to shift by + input logic Xs, // input's sign + input logic [`NE-1:0] Xe, // input's exponent + input logic [`NF:0] Xm, // input's fraction + input logic [`XLEN-1:0] Int, // integer input - from IEU + input logic [2:0] OpCtrl, // choose which opperation (look below for values) + input logic ToInt, // is fp->int (since it's writting to the integer register) + input logic XZero, // is the input zero + input logic [`FMTBITS-1:0] Fmt, // the input's precision (11=quad 01=double 00=single 10=half) + output logic [`NE:0] Ce, // the calculated expoent + output logic [`LOGCVTLEN-1:0] ShiftAmt, // how much to shift by output logic ResSubnormUf,// does the result underflow or is subnormal - output logic Cs, // the result's sign - output logic IntZero, // is the integer zero? - output logic [`CVTLEN-1:0] LzcIn // input to the Leading Zero Counter (priority encoder) + output logic Cs, // the result's sign + output logic IntZero, // is the integer zero? + output logic [`CVTLEN-1:0] LzcIn // input to the Leading Zero Counter (priority encoder) ); // OpCtrls: @@ -60,7 +60,7 @@ module fcvt ( logic [`XLEN-1:0] PosInt; // the positive integer input logic [`XLEN-1:0] TrimInt; // integer trimmed to the correct size logic [`NE-2:0] NewBias; // the bias of the final result - logic [`NE-1:0] OldExp; // the old exponent + logic [`NE-1:0] OldExp; // the old exponent logic Signed; // is the opperation with a signed integer? logic Int64; // is the integer 64 bits? logic IntToFp; // is the opperation an int->fp conversion? diff --git a/src/fpu/fdivsqrt/fdivsqrt.sv b/src/fpu/fdivsqrt/fdivsqrt.sv index c69618f43..92f64cbdc 100644 --- a/src/fpu/fdivsqrt/fdivsqrt.sv +++ b/src/fpu/fdivsqrt/fdivsqrt.sv @@ -29,49 +29,49 @@ `include "wally-config.vh" module fdivsqrt( - input logic clk, - input logic reset, + input logic clk, + input logic reset, input logic [`FMTBITS-1:0] FmtE, - input logic XsE, - input logic [`NF:0] XmE, YmE, - input logic [`NE-1:0] XeE, YeE, - input logic XInfE, YInfE, - input logic XZeroE, YZeroE, - input logic XNaNE, YNaNE, - input logic FDivStartE, IDivStartE, - input logic StallM, - input logic FlushE, - input logic SqrtE, SqrtM, - input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B - input logic [2:0] Funct3E, Funct3M, - input logic IntDivE, W64E, - output logic DivStickyM, - output logic FDivBusyE, IFDivStartE, FDivDoneE, - output logic [`NE+1:0] QeM, - output logic [`DIVb:0] QmM, - output logic [`XLEN-1:0] FIntDivResultM + input logic XsE, + input logic [`NF:0] XmE, YmE, + input logic [`NE-1:0] XeE, YeE, + input logic XInfE, YInfE, + input logic XZeroE, YZeroE, + input logic XNaNE, YNaNE, + input logic FDivStartE, IDivStartE, + input logic StallM, + input logic FlushE, + input logic SqrtE, SqrtM, + input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // these are the src outputs before the mux choosing between them and PCE to put in srcA/B + input logic [2:0] Funct3E, Funct3M, + input logic IntDivE, W64E, + output logic DivStickyM, + output logic FDivBusyE, IFDivStartE, FDivDoneE, + output logic [`NE+1:0] QeM, + output logic [`DIVb:0] QmM, + output logic [`XLEN-1:0] FIntDivResultM ); // Floating-point division and square root module, with optional integer division and remainder // Computes X/Y, sqrt(X), A/B, or A%B - logic [`DIVb+3:0] WS, WC; // Partial remainder components - logic [`DIVb+3:0] X; // Iterator Initial Value (from dividend) - logic [`DIVb-1:0] DPreproc, D; // Iterator Divisor - logic [`DIVb:0] FirstU, FirstUM; // Intermediate result values - logic [`DIVb+1:0] FirstC; // Step tracker - logic Firstun; // Quotient selection - logic WZeroE; // Early termination flag - logic SpecialCaseM; // Divide by zero, square root of negative, etc. - logic DivStartE; // Enable signal for flops during stall - - // Integer div/rem signals - logic BZeroM; // Denominator is zero - logic IntDivM; // Integer operation - logic [`DIVBLEN:0] nE, nM, mM; // Shift amounts - logic NegQuotM, ALTBM, AsM, W64M; // Special handling for postprocessor - logic [`XLEN-1:0] AM; // Original Numerator for postprocessor - logic ISpecialCaseE; // Integer div/remainder special cases + logic [`DIVb+3:0] WS, WC; // Partial remainder components + logic [`DIVb+3:0] X; // Iterator Initial Value (from dividend) + logic [`DIVb-1:0] DPreproc, D; // Iterator Divisor + logic [`DIVb:0] FirstU, FirstUM; // Intermediate result values + logic [`DIVb+1:0] FirstC; // Step tracker + logic Firstun; // Quotient selection + logic WZeroE; // Early termination flag + logic SpecialCaseM; // Divide by zero, square root of negative, etc. + logic DivStartE; // Enable signal for flops during stall + + // Integer div/rem signals + logic BZeroM; // Denominator is zero + logic IntDivM; // Integer operation + logic [`DIVBLEN:0] nE, nM, mM; // Shift amounts + logic NegQuotM, ALTBM, AsM, W64M; // Special handling for postprocessor + logic [`XLEN-1:0] AM; // Original Numerator for postprocessor + logic ISpecialCaseE; // Integer div/remainder special cases fdivsqrtpreproc fdivsqrtpreproc( // Preprocessor .clk, .IFDivStartE, .Xm(XmE), .Ym(YmE), .Xe(XeE), .Ye(YeE), @@ -100,4 +100,4 @@ module fdivsqrt( // Int-specific .nM, .mM, .ALTBM, .AsM, .BZeroM, .NegQuotM, .W64M, .RemOpM(Funct3M[1]), .AM, .FIntDivResultM); -endmodule \ No newline at end of file +endmodule diff --git a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv index b09e9f385..7b92f8c83 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpostproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpostproc.sv @@ -37,7 +37,7 @@ module fdivsqrtpostproc( input logic [`DIVb+1:0] FirstC, input logic SqrtE, input logic Firstun, SqrtM, SpecialCaseM, NegQuotM, - input logic [`XLEN-1:0] AM, + input logic [`XLEN-1:0] AM, input logic RemOpM, ALTBM, BZeroM, AsM, W64M, input logic [`DIVBLEN:0] nM, mM, output logic [`DIVb:0] QmM, @@ -46,11 +46,11 @@ module fdivsqrtpostproc( output logic [`XLEN-1:0] FIntDivResultM ); - logic [`DIVb+3:0] W, Sum, DM; - logic [`DIVb:0] PreQmM; - logic NegStickyM; - logic weq0E, WZeroM; - logic [`XLEN-1:0] IntDivResultM; + logic [`DIVb+3:0] W, Sum, DM; + logic [`DIVb:0] PreQmM; + logic NegStickyM; + logic weq0E, WZeroM; + logic [`XLEN-1:0] IntDivResultM; ////////////////////////// // Execute Stage: Detect early termination for an exact result @@ -134,4 +134,4 @@ module fdivsqrtpostproc( end else assign FIntDivResultM = IntDivResultM[`XLEN-1:0]; end -endmodule \ No newline at end of file +endmodule diff --git a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv index c5485c26f..9a69085fd 100644 --- a/src/fpu/fdivsqrt/fdivsqrtpreproc.sv +++ b/src/fpu/fdivsqrt/fdivsqrtpreproc.sv @@ -29,35 +29,35 @@ `include "wally-config.vh" module fdivsqrtpreproc ( - input logic clk, - input logic IFDivStartE, - input logic [`NF:0] Xm, Ym, - input logic [`NE-1:0] Xe, Ye, + input logic clk, + input logic IFDivStartE, + input logic [`NF:0] Xm, Ym, + input logic [`NE-1:0] Xe, Ye, input logic [`FMTBITS-1:0] Fmt, - input logic Sqrt, - input logic XZeroE, - input logic [2:0] Funct3E, - output logic [`NE+1:0] QeM, - output logic [`DIVb+3:0] X, - output logic [`DIVb-1:0] DPreproc, + input logic Sqrt, + input logic XZeroE, + input logic [2:0] Funct3E, + output logic [`NE+1:0] QeM, + output logic [`DIVb+3:0] X, + output logic [`DIVb-1:0] DPreproc, // Int-specific - input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B - input logic IntDivE, W64E, - output logic ISpecialCaseE, - output logic [`DIVBLEN:0] nE, nM, mM, - output logic NegQuotM, ALTBM, IntDivM, W64M, - output logic AsM, BZeroM, - output logic [`XLEN-1:0] AM + input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // *** these are the src outputs before the mux choosing between them and PCE to put in srcA/B + input logic IntDivE, W64E, + output logic ISpecialCaseE, + output logic [`DIVBLEN:0] nE, nM, mM, + output logic NegQuotM, ALTBM, IntDivM, W64M, + output logic AsM, BZeroM, + output logic [`XLEN-1:0] AM ); - logic [`DIVb-1:0] XPreproc; - logic [`DIVb:0] PreSqrtX; - logic [`DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed - logic [`NE+1:0] QeE; // Quotient Exponent (FP only) - logic [`DIVb-1:0] IFNormLenX, IFNormLenD; // Correctly-sized inputs for iterator - logic [`DIVBLEN:0] mE, ell; // Leading zeros of inputs - logic NumerZeroE; // Numerator is zero (X or A) - logic AZeroE, BZeroE; // A or B is Zero for integer division + logic [`DIVb-1:0] XPreproc; + logic [`DIVb:0] PreSqrtX; + logic [`DIVb+3:0] DivX, DivXShifted, SqrtX, PreShiftX; // Variations of dividend, to be muxed + logic [`NE+1:0] QeE; // Quotient Exponent (FP only) + logic [`DIVb-1:0] IFNormLenX, IFNormLenD; // Correctly-sized inputs for iterator + logic [`DIVBLEN:0] mE, ell; // Leading zeros of inputs + logic NumerZeroE; // Numerator is zero (X or A) + logic AZeroE, BZeroE; // A or B is Zero for integer division if (`IDIV_ON_FPU) begin:intpreproc // Int Supported logic signedDiv, NegQuotE; diff --git a/src/fpu/fdivsqrt/fdivsqrtqsel2.sv b/src/fpu/fdivsqrt/fdivsqrtqsel2.sv index f18b31f1b..fd0a2a469 100644 --- a/src/fpu/fdivsqrt/fdivsqrtqsel2.sv +++ b/src/fpu/fdivsqrt/fdivsqrtqsel2.sv @@ -45,11 +45,11 @@ module fdivsqrtqsel2 ( assign g = ps & pc; assign magnitude = ~((ps[2]^pc[2]) & (ps[1]^pc[1]) & - (ps[0]^pc[0])); + (ps[0]^pc[0])); assign sign = (ps[3]^pc[3])^ (ps[2] & pc[2] | ((ps[2]^pc[2]) & - (ps[1]&pc[1] | ((ps[1]^pc[1]) & - (ps[0]&pc[0]))))); + (ps[1]&pc[1] | ((ps[1]^pc[1]) & + (ps[0]&pc[0]))))); // Produce digit = +1, 0, or -1 assign up = magnitude & ~sign; diff --git a/src/fpu/fdivsqrt/fdivsqrtqsel4.sv b/src/fpu/fdivsqrt/fdivsqrtqsel4.sv index 3a8a110f0..7a0db24d8 100644 --- a/src/fpu/fdivsqrt/fdivsqrtqsel4.sv +++ b/src/fpu/fdivsqrt/fdivsqrtqsel4.sv @@ -32,21 +32,21 @@ module fdivsqrtqsel4 ( input logic [2:0] Dmsbs, input logic [4:0] Smsbs, input logic [7:0] WSmsbs, WCmsbs, - input logic Sqrt, j1, + input logic Sqrt, j1, output logic [3:0] udigit ); - logic [6:0] Wmsbs; - logic [7:0] PreWmsbs; - logic [2:0] A; + logic [6:0] Wmsbs; + logic [7:0] PreWmsbs; + logic [2:0] A; - assign PreWmsbs = WCmsbs + WSmsbs; - assign Wmsbs = PreWmsbs[7:1]; - // D = 0001.xxx... - // Dmsbs = | | + assign PreWmsbs = WCmsbs + WSmsbs; + assign Wmsbs = PreWmsbs[7:1]; + // D = 0001.xxx... + // Dmsbs = | | // W = xxxx.xxx... - // Wmsbs = | | + // Wmsbs = | | - logic [3:0] USel4[1023:0]; + logic [3:0] USel4[1023:0]; // Prepopulate selection table; this is constant at compile time always_comb begin @@ -109,5 +109,5 @@ module fdivsqrtqsel4 ( end else A = Dmsbs; // Select quotient digit from lookup table based on A and W - assign udigit = USel4[{A,Wmsbs}]; + assign udigit = USel4[{A,Wmsbs}]; endmodule diff --git a/src/fpu/fdivsqrt/fdivsqrtqsel4cmp.sv b/src/fpu/fdivsqrt/fdivsqrtqsel4cmp.sv index 882458106..e508a6d7c 100644 --- a/src/fpu/fdivsqrt/fdivsqrtqsel4cmp.sv +++ b/src/fpu/fdivsqrt/fdivsqrtqsel4cmp.sv @@ -32,19 +32,19 @@ module fdivsqrtqsel4cmp ( input logic [2:0] Dmsbs, input logic [4:0] Smsbs, input logic [7:0] WSmsbs, WCmsbs, - input logic SqrtE, j1, + input logic SqrtE, j1, output logic [3:0] udigit ); - logic [6:0] Wmsbs; - logic [7:0] PreWmsbs; - logic [2:0] A; + logic [6:0] Wmsbs; + logic [7:0] PreWmsbs; + logic [2:0] A; - assign PreWmsbs = WCmsbs + WSmsbs; - assign Wmsbs = PreWmsbs[7:1]; - // D = 0001.xxx... - // Dmsbs = | | + assign PreWmsbs = WCmsbs + WSmsbs; + assign Wmsbs = PreWmsbs[7:1]; + // D = 0001.xxx... + // Dmsbs = | | // W = xxxx.xxx... - // Wmsbs = | | + // Wmsbs = | | logic [6:0] mk2, mk1, mk0, mkm1; logic [6:0] mks2[7:0], mks1[7:0]; @@ -87,5 +87,5 @@ module fdivsqrtqsel4cmp ( else if ($signed(Wmsbs) >= $signed(mk1)) udigit = 4'b0100; // choose 1 else if ($signed(Wmsbs) >= $signed(mk0)) udigit = 4'b0000; // choose 0 else if ($signed(Wmsbs) >= $signed(mkm1)) udigit = 4'b0010; // choose -1 - else udigit = 4'b0001; // choose -2 + else udigit = 4'b0001; // choose -2 endmodule diff --git a/src/fpu/fdivsqrt/fdivsqrtstage2.sv b/src/fpu/fdivsqrt/fdivsqrtstage2.sv index 63ab6c059..53c1711cb 100644 --- a/src/fpu/fdivsqrt/fdivsqrtstage2.sv +++ b/src/fpu/fdivsqrt/fdivsqrtstage2.sv @@ -31,32 +31,32 @@ /* verilator lint_off UNOPTFLAT */ module fdivsqrtstage2 ( input logic [`DIVb-1:0] D, - input logic [`DIVb+3:0] DBar, - input logic [`DIVb:0] U, UM, - input logic [`DIVb+3:0] WS, WC, + input logic [`DIVb+3:0] DBar, + input logic [`DIVb:0] U, UM, + input logic [`DIVb+3:0] WS, WC, input logic [`DIVb+1:0] C, - input logic SqrtE, - output logic un, + input logic SqrtE, + output logic un, output logic [`DIVb+1:0] CNext, - output logic [`DIVb:0] UNext, UMNext, - output logic [`DIVb+3:0] WSNext, WCNext + output logic [`DIVb:0] UNext, UMNext, + output logic [`DIVb+3:0] WSNext, WCNext ); /* verilator lint_on UNOPTFLAT */ - logic [`DIVb+3:0] Dsel; - logic up, uz; - logic [`DIVb+3:0] F; - logic [`DIVb+3:0] AddIn; - logic [`DIVb+3:0] WSA, WCA; + logic [`DIVb+3:0] Dsel; + logic up, uz; + logic [`DIVb+3:0] F; + logic [`DIVb+3:0] AddIn; + logic [`DIVb+3:0] WSA, WCA; // Qmient Selection logic // Given partial remainder, select digit of +1, 0, or -1 (up, uz, un) // q encoding: - // 1000 = +2 - // 0100 = +1 - // 0000 = 0 - // 0010 = -1 - // 0001 = -2 + // 1000 = +2 + // 0100 = +1 + // 0000 = 0 + // 0010 = -1 + // 0001 = -2 fdivsqrtqsel2 qsel2(WS[`DIVb+3:`DIVb], WC[`DIVb+3:`DIVb], up, uz, un); // Sqrt F generation. Extend C, U, UM to Q4.k diff --git a/src/fpu/fdivsqrt/fdivsqrtstage4.sv b/src/fpu/fdivsqrt/fdivsqrtstage4.sv index 007dd18bb..ee92d263b 100644 --- a/src/fpu/fdivsqrt/fdivsqrtstage4.sv +++ b/src/fpu/fdivsqrt/fdivsqrtstage4.sv @@ -30,34 +30,34 @@ module fdivsqrtstage4 ( input logic [`DIVb-1:0] D, - input logic [`DIVb+3:0] DBar, D2, DBar2, - input logic [`DIVb:0] U, UM, - input logic [`DIVb+3:0] WS, WC, + input logic [`DIVb+3:0] DBar, D2, DBar2, + input logic [`DIVb:0] U,UM, + input logic [`DIVb+3:0] WS, WC, input logic [`DIVb+1:0] C, - input logic SqrtE, j1, + input logic SqrtE, j1, output logic [`DIVb+1:0] CNext, - output logic un, - output logic [`DIVb:0] UNext, UMNext, - output logic [`DIVb+3:0] WSNext, WCNext + output logic un, + output logic [`DIVb:0] UNext, UMNext, + output logic [`DIVb+3:0] WSNext, WCNext ); - logic [`DIVb+3:0] Dsel; - logic [3:0] udigit; - logic [`DIVb+3:0] F; - logic [`DIVb+3:0] AddIn; - logic [4:0] Smsbs; - logic [2:0] Dmsbs; - logic [7:0] WCmsbs, WSmsbs; - logic CarryIn; - logic [`DIVb+3:0] WSA, WCA; + logic [`DIVb+3:0] Dsel; + logic [3:0] udigit; + logic [`DIVb+3:0] F; + logic [`DIVb+3:0] AddIn; + logic [4:0] Smsbs; + logic [2:0] Dmsbs; + logic [7:0] WCmsbs, WSmsbs; + logic CarryIn; + logic [`DIVb+3:0] WSA, WCA; // Digit Selection logic // u encoding: - // 1000 = +2 - // 0100 = +1 - // 0000 = 0 - // 0010 = -1 - // 0001 = -2 + // 1000 = +2 + // 0100 = +1 + // 0000 = 0 + // 0010 = -1 + // 0001 = -2 assign Smsbs = U[`DIVb:`DIVb-4]; assign Dmsbs = D[`DIVb-1:`DIVb-3]; assign WCmsbs = WC[`DIVb+3:`DIVb-4]; diff --git a/src/fpu/fma/fmalza.sv b/src/fpu/fma/fmalza.sv index 640e4ed80..59fb3fc3e 100644 --- a/src/fpu/fma/fmalza.sv +++ b/src/fpu/fma/fmalza.sv @@ -32,12 +32,12 @@ module fmalza #(WIDTH) ( input logic [WIDTH-1:0] A, // addend input logic [2*`NF+1:0] Pm, // product - input logic Cin, // carry in + input logic Cin, // carry in input logic sub, // subtraction output logic [$clog2(WIDTH+1)-1:0] SCnt // normalization shift count for the positive result ); - logic [WIDTH:0] F; // most significant bit of F indicates leading digit + logic [WIDTH:0] F; // most significant bit of F indicates leading digit logic [WIDTH-1:0] B; // zero-extended product with same size as aligned A logic [WIDTH-1:0] P, G, K; // propagate, generate, kill for each column logic [WIDTH-1:0] Pp1, Gm1, Km1; // propagate shifted right by 1, generate/kill shifted left 1 diff --git a/src/fpu/fpu.sv b/src/fpu/fpu.sv index 8ff36fbfc..32bdfc1ca 100755 --- a/src/fpu/fpu.sv +++ b/src/fpu/fpu.sv @@ -29,40 +29,40 @@ `include "wally-config.vh" module fpu ( - input logic clk, - input logic reset, + input logic clk, + input logic reset, // Hazards - input logic StallE, StallM, StallW, // stall signals (from HZU) - input logic FlushE, FlushM, FlushW, // flush signals (from HZU) - output logic FPUStallD, // Stall the decode stage (To HZU) - output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) (to HZU) - // CSRs - input logic [1:0] STATUS_FS, // Is floating-point enabled? (From privileged unit) - input logic [2:0] FRM_REGW, // Rounding mode (from CSR) - // Decode stage - input logic [31:0] InstrD, // instruction (from IFU) - // Execute stage - input logic [2:0] Funct3E, // Funct fields of instruction specify type of operations - input logic IntDivE, W64E, // Integer division on FPU - input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // Integer input for convert, move, and int div (from IEU) - input logic [4:0] RdE, // which FP register to write to (from IEU) - output logic FWriteIntE, // integer register write enable (to IEU) - output logic FCvtIntE, // Convert to int (to IEU) - // Memory stage - input logic [2:0] Funct3M, // Funct fields of instruction specify type of operations - input logic [4:0] RdM, // which FP register to write to (from IEU) - output logic FRegWriteM, // FP register write enable (to privileged unit) - output logic FpLoadStoreM, // Fp load instruction? (to LSU) - output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory (to LSU) - output logic [`XLEN-1:0] FIntResM, // data to be written to integer register (to IEU) - output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction (to IFU) - output logic [4:0] SetFflagsM, // FPU flags (to privileged unit) - // Writeback stage - input logic [4:0] RdW, // which FP register to write to (from IEU) - input logic [`FLEN-1:0] ReadDataW, // Read data (from LSU) - output logic [`XLEN-1:0] FCvtIntResW, // convert result to to be written to integer register (to IEU) - output logic FCvtIntW, // select FCvtIntRes (to IEU) - output logic [`XLEN-1:0] FIntDivResultW // Result from integer division (to IEU) + input logic StallE, StallM, StallW, // stall signals (from HZU) + input logic FlushE, FlushM, FlushW, // flush signals (from HZU) + output logic FPUStallD, // Stall the decode stage (To HZU) + output logic FDivBusyE, // Is the divide/sqrt unit busy (stall execute stage) (to HZU) + // CSRs + input logic [1:0] STATUS_FS, // Is floating-point enabled? (From privileged unit) + input logic [2:0] FRM_REGW, // Rounding mode (from CSR) + // Decode stage + input logic [31:0] InstrD, // instruction (from IFU) + // Execute stage + input logic [2:0] Funct3E, // Funct fields of instruction specify type of operations + input logic IntDivE, W64E, // Integer division on FPU + input logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // Integer input for convert, move, and int div (from IEU) + input logic [4:0] RdE, // which FP register to write to (from IEU) + output logic FWriteIntE, // integer register write enable (to IEU) + output logic FCvtIntE, // Convert to int (to IEU) + // Memory stage + input logic [2:0] Funct3M, // Funct fields of instruction specify type of operations + input logic [4:0] RdM, // which FP register to write to (from IEU) + output logic FRegWriteM, // FP register write enable (to privileged unit) + output logic FpLoadStoreM, // Fp load instruction? (to LSU) + output logic [`FLEN-1:0] FWriteDataM, // Data to be written to memory (to LSU) + output logic [`XLEN-1:0] FIntResM, // data to be written to integer register (to IEU) + output logic IllegalFPUInstrD, // Is the instruction an illegal fpu instruction (to IFU) + output logic [4:0] SetFflagsM, // FPU flags (to privileged unit) + // Writeback stage + input logic [4:0] RdW, // which FP register to write to (from IEU) + input logic [`FLEN-1:0] ReadDataW, // Read data (from LSU) + output logic [`XLEN-1:0] FCvtIntResW, // convert result to to be written to integer register (to IEU) + output logic FCvtIntW, // select FCvtIntRes (to IEU) + output logic [`XLEN-1:0] FIntDivResultW // Result from integer division (to IEU) ); // RISC-V FPU specifics: @@ -70,97 +70,97 @@ module fpu ( // - RISC-V detects underflow after rounding // control signals - logic FRegWriteW; // FP register write enable - logic [2:0] FrmM; // FP rounding mode - logic [`FMTBITS-1:0] FmtE, FmtM; // FP precision 0-single 1-double - logic FDivStartE, IDivStartE; // Start division or squareroot - logic FWriteIntM; // Write to integer register - logic [1:0] ForwardXE, ForwardYE, ForwardZE; // forwarding mux control signals - logic [2:0] OpCtrlE, OpCtrlM; // Select which opperation to do in each component - logic [1:0] FResSelE, FResSelM, FResSelW; // Select one of the results that finish in the memory stage - logic [1:0] PostProcSelE, PostProcSelM; // select result in the post processing unit - logic [4:0] Adr1D, Adr2D, Adr3D; // register adresses of each input - logic [4:0] Adr1E, Adr2E, Adr3E; // register adresses of each input - logic XEnD, YEnD, ZEnD; // X, Y, Z inputs used for current operation - logic XEnE, YEnE, ZEnE; // X, Y, Z inputs used for current operation - logic FRegWriteE; // Write floating-point register + logic FRegWriteW; // FP register write enable + logic [2:0] FrmM; // FP rounding mode + logic [`FMTBITS-1:0] FmtE, FmtM; // FP precision 0-single 1-double + logic FDivStartE, IDivStartE; // Start division or squareroot + logic FWriteIntM; // Write to integer register + logic [1:0] ForwardXE, ForwardYE, ForwardZE; // forwarding mux control signals + logic [2:0] OpCtrlE, OpCtrlM; // Select which opperation to do in each component + logic [1:0] FResSelE, FResSelM, FResSelW; // Select one of the results that finish in the memory stage + logic [1:0] PostProcSelE, PostProcSelM; // select result in the post processing unit + logic [4:0] Adr1D, Adr2D, Adr3D; // register adresses of each input + logic [4:0] Adr1E, Adr2E, Adr3E; // register adresses of each input + logic XEnD, YEnD, ZEnD; // X, Y, Z inputs used for current operation + logic XEnE, YEnE, ZEnE; // X, Y, Z inputs used for current operation + logic FRegWriteE; // Write floating-point register // regfile signals - logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage - logic [`FLEN-1:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage - logic [`FLEN-1:0] XE; // Input 1 to the various units (after forwarding) - logic [`XLEN-1:0] IntSrcXE; // Input 1 to the various units (after forwarding) - logic [`FLEN-1:0] PreYE, YE; // Input 2 to the various units (after forwarding) - logic [`FLEN-1:0] PreZE, ZE; // Input 3 to the various units (after forwarding) + logic [`FLEN-1:0] FRD1D, FRD2D, FRD3D; // Read Data from FP register - decode stage + logic [`FLEN-1:0] FRD1E, FRD2E, FRD3E; // Read Data from FP register - execute stage + logic [`FLEN-1:0] XE; // Input 1 to the various units (after forwarding) + logic [`XLEN-1:0] IntSrcXE; // Input 1 to the various units (after forwarding) + logic [`FLEN-1:0] PreYE, YE; // Input 2 to the various units (after forwarding) + logic [`FLEN-1:0] PreZE, ZE; // Input 3 to the various units (after forwarding) // unpacking signals - logic XsE, YsE, ZsE; // input's sign - execute stage - logic XsM, YsM; // input's sign - memory stage - logic [`NE-1:0] XeE, YeE, ZeE; // input's exponent - execute stage - logic [`NE-1:0] ZeM; // input's exponent - memory stage - logic [`NF:0] XmE, YmE, ZmE; // input's significand - execute stage - logic [`NF:0] XmM, YmM, ZmM; // input's significand - memory stage - logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage - logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage - logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage - logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage - logic XSubnormE; // is the input subnormal - logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage - logic XZeroM, YZeroM; // is the input zero - memory stage - logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage - logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage - logic XExpMaxE; // is the exponent all ones (max value) - logic [`FLEN-1:0] XPostBoxE; // X after fixing bad NaN box. Needed for 1-input operations + logic XsE, YsE, ZsE; // input's sign - execute stage + logic XsM, YsM; // input's sign - memory stage + logic [`NE-1:0] XeE, YeE, ZeE; // input's exponent - execute stage + logic [`NE-1:0] ZeM; // input's exponent - memory stage + logic [`NF:0] XmE, YmE, ZmE; // input's significand - execute stage + logic [`NF:0] XmM, YmM, ZmM; // input's significand - memory stage + logic XNaNE, YNaNE, ZNaNE; // is the input a NaN - execute stage + logic XNaNM, YNaNM, ZNaNM; // is the input a NaN - memory stage + logic XSNaNE, YSNaNE, ZSNaNE; // is the input a signaling NaN - execute stage + logic XSNaNM, YSNaNM, ZSNaNM; // is the input a signaling NaN - memory stage + logic XSubnormE; // is the input subnormal + logic XZeroE, YZeroE, ZZeroE; // is the input zero - execute stage + logic XZeroM, YZeroM; // is the input zero - memory stage + logic XInfE, YInfE, ZInfE; // is the input infinity - execute stage + logic XInfM, YInfM, ZInfM; // is the input infinity - memory stage + logic XExpMaxE; // is the exponent all ones (max value) + logic [`FLEN-1:0] XPostBoxE; // X after fixing bad NaN box. Needed for 1-input operations // Fma Signals - logic FmaAddSubE; // Multiply by 1.0 when adding or subtracting - logic [1:0] FmaZSelE; // Select Z = Y when adding or subtracting, 0 when multiplying - logic [3*`NF+3:0] SmE, SmM; // Sum significand - logic FmaAStickyE, FmaAStickyM; // FMA addend sticky bit output - logic [`NE+1:0] SeE,SeM; // Sum exponent - logic InvAE, InvAM; // Invert addend - logic AsE, AsM; // Addend sign - logic PsE, PsM; // Product sign - logic SsE, SsM; // Sum sign - logic [$clog2(3*`NF+5)-1:0] SCntE, SCntM; // LZA sum leading zero count + logic FmaAddSubE; // Multiply by 1.0 when adding or subtracting + logic [1:0] FmaZSelE; // Select Z = Y when adding or subtracting, 0 when multiplying + logic [3*`NF+3:0] SmE, SmM; // Sum significand + logic FmaAStickyE, FmaAStickyM; // FMA addend sticky bit output + logic [`NE+1:0] SeE,SeM; // Sum exponent + logic InvAE, InvAM; // Invert addend + logic AsE, AsM; // Addend sign + logic PsE, PsM; // Product sign + logic SsE, SsM; // Sum sign + logic [$clog2(3*`NF+5)-1:0] SCntE, SCntM; // LZA sum leading zero count // Cvt Signals - logic [`NE:0] CeE, CeM; // convert intermediate expoent - logic [`LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by - logic CvtResSubnormUfE, CvtResSubnormUfM; // does the result underflow or is subnormal - logic CsE, CsM; // convert result sign - logic IntZeroE, IntZeroM; // is the integer zero? - logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder) - logic [`XLEN-1:0] FCvtIntResM; // fcvt integer result (for IEU) + logic [`NE:0] CeE, CeM; // convert intermediate expoent + logic [`LOGCVTLEN-1:0] CvtShiftAmtE, CvtShiftAmtM; // how much to shift by + logic CvtResSubnormUfE, CvtResSubnormUfM; // does the result underflow or is subnormal + logic CsE, CsM; // convert result sign + logic IntZeroE, IntZeroM; // is the integer zero? + logic [`CVTLEN-1:0] CvtLzcInE, CvtLzcInM; // input to the Leading Zero Counter (priority encoder) + logic [`XLEN-1:0] FCvtIntResM; // fcvt integer result (for IEU) // divide signals - logic [`DIVb:0] QmM; // fdivsqrt signifcand - logic [`NE+1:0] QeM; // fdivsqrt exponent - logic DivStickyM; // fdivsqrt sticky bit - logic FDivDoneE, IFDivStartE; // fdivsqrt control signals - logic [`XLEN-1:0] FIntDivResultM; // fdivsqrt integer division result (for IEU) + logic [`DIVb:0] QmM; // fdivsqrt signifcand + logic [`NE+1:0] QeM; // fdivsqrt exponent + logic DivStickyM; // fdivsqrt sticky bit + logic FDivDoneE, IFDivStartE; // fdivsqrt control signals + logic [`XLEN-1:0] FIntDivResultM; // fdivsqrt integer division result (for IEU) // result and flag signals - logic [`XLEN-1:0] ClassResE; // classify result - logic [`FLEN-1:0] CmpFpResE; // compare result to FPU (min/max) - logic [`XLEN-1:0] CmpIntResE; // compare result to IEU (eq/lt/le) - logic CmpNVE; // compare invalid flag (Not Valid) - logic [`FLEN-1:0] SgnResE; // sign injection result - logic [`XLEN-1:0] FIntResE; // FPU to IEU E-stage result (classify, compare, move) - logic [`FLEN-1:0] PostProcResM; // Postprocessor output - logic [4:0] PostProcFlgM; // Postprocessor flags - logic PreNVE, PreNVM; // selected flag that is ready in the memory stage - logic [`FLEN-1:0] FpResM, FpResW; // FPU preliminary result - logic [`FLEN-1:0] PreFpResE, PreFpResM; // selected result that is ready in the memory stage - logic [`FLEN-1:0] FResultW; // final FP result being written to the FP register + logic [`XLEN-1:0] ClassResE; // classify result + logic [`FLEN-1:0] CmpFpResE; // compare result to FPU (min/max) + logic [`XLEN-1:0] CmpIntResE; // compare result to IEU (eq/lt/le) + logic CmpNVE; // compare invalid flag (Not Valid) + logic [`FLEN-1:0] SgnResE; // sign injection result + logic [`XLEN-1:0] FIntResE; // FPU to IEU E-stage result (classify, compare, move) + logic [`FLEN-1:0] PostProcResM; // Postprocessor output + logic [4:0] PostProcFlgM; // Postprocessor flags + logic PreNVE, PreNVM; // selected flag that is ready in the memory stage + logic [`FLEN-1:0] FpResM, FpResW; // FPU preliminary result + logic [`FLEN-1:0] PreFpResE, PreFpResM; // selected result that is ready in the memory stage + logic [`FLEN-1:0] FResultW; // final FP result being written to the FP register // other signals - logic [`FLEN-1:0] AlignedSrcAE; // align SrcA from IEU to the floating point format for fmv - logic [`FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed - logic [`FLEN-1:0] BoxedOneE; // One value for Z for multiplication, with NaN boxing if needed - logic StallUnpackedM; // Stall unpacker outputs during multicycle fdivsqrt - logic [`FLEN-1:0] SgnExtXE; // Sign-extended X input for move to integer - logic mvsgn; // sign bit for extending move + logic [`FLEN-1:0] AlignedSrcAE; // align SrcA from IEU to the floating point format for fmv + logic [`FLEN-1:0] BoxedZeroE; // Zero value for Z for multiplication, with NaN boxing if needed + logic [`FLEN-1:0] BoxedOneE; // One value for Z for multiplication, with NaN boxing if needed + logic StallUnpackedM; // Stall unpacker outputs during multicycle fdivsqrt + logic [`FLEN-1:0] SgnExtXE; // Sign-extended X input for move to integer + logic mvsgn; // sign bit for extending move ////////////////////////////////////////////////////////////////////////////////////////// // Decode Stage: fctrl decoder, read register file @@ -180,7 +180,7 @@ module fpu ( fregfile fregfile (.clk, .reset, .we4(FRegWriteW), .a1(InstrD[19:15]), .a2(InstrD[24:20]), .a3(InstrD[31:27]), .a4(RdW), .wd4(FResultW), - .rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D)); + .rd1(FRD1D), .rd2(FRD2D), .rd3(FRD3D)); // D/E pipeline registers flopenrc #(`FLEN) DEReg1(clk, reset, FlushE, ~StallE, FRD1D, FRD1E); diff --git a/src/fpu/fregfile.sv b/src/fpu/fregfile.sv index 1a5a2eecf..69961a847 100644 --- a/src/fpu/fregfile.sv +++ b/src/fpu/fregfile.sv @@ -29,8 +29,8 @@ `include "wally-config.vh" module fregfile ( - input logic clk, reset, - input logic we4, // write enable + input logic clk, reset, + input logic we4, // write enable input logic [4:0] a1, a2, a3, a4, // adresses input logic [`FLEN-1:0] wd4, // write data output logic [`FLEN-1:0] rd1, rd2, rd3 // read data @@ -46,7 +46,7 @@ module fregfile ( always_ff @(negedge clk) // or posedge reset) if (reset) for(i=0; i<32; i++) rf[i] <= 0; - else if (we4) rf[a4] <= wd4; + else if (we4) rf[a4] <= wd4; assign #2 rd1 = rf[a1]; assign #2 rd2 = rf[a2]; diff --git a/src/fpu/fsgninj.sv b/src/fpu/fsgninj.sv index 0db7dc2f3..9ce938709 100755 --- a/src/fpu/fsgninj.sv +++ b/src/fpu/fsgninj.sv @@ -29,43 +29,43 @@ `include "wally-config.vh" module fsgninj ( - input logic Xs, Ys, // X and Y sign bits - input logic [`FLEN-1:0] X, // X - input logic [`FMTBITS-1:0] Fmt, // format - input logic [1:0] OpCtrl, // operation control - output logic [`FLEN-1:0] SgnRes // result + input logic Xs, Ys, // X and Y sign bits + input logic [`FLEN-1:0] X, // X + input logic [`FMTBITS-1:0] Fmt, // format + input logic [1:0] OpCtrl, // operation control + output logic [`FLEN-1:0] SgnRes // result ); - logic ResSgn; // result sign + logic ResSgn; // result sign - // OpCtrl: - // 00 - fsgnj - directly copy over sign value of Y - // 01 - fsgnjn - negate sign value of Y - // 10 - fsgnjx - XOR sign values of X and Y - - // calculate the result's sign - assign ResSgn = (OpCtrl[1] ? Xs : OpCtrl[0]) ^ Ys; - - // format final result based on precision - // - uses NaN-blocking format - // - if there are any unsused bits the most significant bits are filled with 1s - + // OpCtrl: + // 00 - fsgnj - directly copy over sign value of Y + // 01 - fsgnjn - negate sign value of Y + // 10 - fsgnjx - XOR sign values of X and Y + + // calculate the result's sign + assign ResSgn = (OpCtrl[1] ? Xs : OpCtrl[0]) ^ Ys; + + // format final result based on precision + // - uses NaN-blocking format + // - if there are any unsused bits the most significant bits are filled with 1s + if (`FPSIZES == 1) - assign SgnRes = {ResSgn, X[`FLEN-2:0]}; + assign SgnRes = {ResSgn, X[`FLEN-2:0]}; else if (`FPSIZES == 2) - assign SgnRes = {~Fmt|ResSgn, X[`FLEN-2:`LEN1], Fmt ? X[`LEN1-1] : ResSgn, X[`LEN1-2:0]}; + assign SgnRes = {~Fmt|ResSgn, X[`FLEN-2:`LEN1], Fmt ? X[`LEN1-1] : ResSgn, X[`LEN1-2:0]}; else if (`FPSIZES == 3) begin - logic [2:0] SgnBits; + logic [2:0] SgnBits; always_comb case (Fmt) `FMT: SgnBits = {ResSgn, X[`LEN1-1], X[`LEN2-1]}; - `FMT1: SgnBits = {1'b1, ResSgn, X[`LEN2-1]}; + `FMT1: SgnBits = {1'b1, ResSgn, X[`LEN2-1]}; `FMT2: SgnBits = {2'b11, ResSgn}; default: SgnBits = {3{1'bx}}; endcase - assign SgnRes = {SgnBits[2], X[`FLEN-2:`LEN1], SgnBits[1], X[`LEN1-2:`LEN2], SgnBits[0], X[`LEN2-2:0]}; - end else if (`FPSIZES == 4) begin - logic [3:0] SgnBits; + assign SgnRes = {SgnBits[2], X[`FLEN-2:`LEN1], SgnBits[1], X[`LEN1-2:`LEN2], SgnBits[0], X[`LEN2-2:0]}; + end else if (`FPSIZES == 4) begin + logic [3:0] SgnBits; always_comb case (Fmt) `Q_FMT: SgnBits = {ResSgn, X[`D_LEN-1], X[`S_LEN-1], X[`H_LEN-1]}; @@ -73,7 +73,7 @@ module fsgninj ( `S_FMT: SgnBits = {2'b11, ResSgn, X[`H_LEN-1]}; `H_FMT: SgnBits = {3'b111, ResSgn}; endcase - assign SgnRes = {SgnBits[3], X[`Q_LEN-2:`D_LEN], SgnBits[2], X[`D_LEN-2:`S_LEN], SgnBits[1], X[`S_LEN-2:`H_LEN], SgnBits[0], X[`H_LEN-2:0]}; - end + assign SgnRes = {SgnBits[3], X[`Q_LEN-2:`D_LEN], SgnBits[2], X[`D_LEN-2:`S_LEN], SgnBits[1], X[`S_LEN-2:`H_LEN], SgnBits[0], X[`H_LEN-2:0]}; + end endmodule diff --git a/src/generic/arrs.sv b/src/generic/arrs.sv index 0bb30c96a..5a9cf21f0 100644 --- a/src/generic/arrs.sv +++ b/src/generic/arrs.sv @@ -30,13 +30,13 @@ `include "wally-config.vh" module arrs( - input logic clk, - input logic areset, + input logic clk, + input logic areset, output logic reset ); - logic metaStable; - logic resetB; + logic metaStable; + logic resetB; always_ff @(posedge clk , posedge areset) begin if (areset) begin diff --git a/src/generic/clockgater.sv b/src/generic/clockgater.sv index 55f02cff5..c09f98f2c 100644 --- a/src/generic/clockgater.sv +++ b/src/generic/clockgater.sv @@ -27,9 +27,9 @@ `include "wally-config.vh" module clockgater ( - input logic E, - input logic SE, - input logic CLK, + input logic E, + input logic SE, + input logic CLK, output logic ECLK ); @@ -39,10 +39,10 @@ module clockgater ( // VERY IMPORTANT. // This part functionally models a clock gater, but does not necessarily meet the timing constrains a real standard cell would. // Do not use this in synthesis! - logic enable_q; + logic enable_q; always_latch begin if(~CLK) begin - enable_q <= E | SE; + enable_q <= E | SE; end end assign ECLK = enable_q & CLK; diff --git a/src/generic/mem/ram1p1rwbe.sv b/src/generic/mem/ram1p1rwbe.sv index f3c98873e..8905cc551 100644 --- a/src/generic/mem/ram1p1rwbe.sv +++ b/src/generic/mem/ram1p1rwbe.sv @@ -49,39 +49,39 @@ module ram1p1rwbe #(parameter DEPTH=64, WIDTH=44) ( // *************************************************************************** // TRUE SRAM macro // *************************************************************************** - if ((`USE_SRAM == 1) & (WIDTH == 128) & (DEPTH == 64)) begin // Cache data subarray + if ((`USE_SRAM == 1) & (WIDTH == 128) & (DEPTH == 64)) begin // Cache data subarray genvar index; - // 64 x 128-bit SRAM - logic [WIDTH-1:0] BitWriteMask; - for (index=0; index < WIDTH; index++) - assign BitWriteMask[index] = bwe[index/8]; + // 64 x 128-bit SRAM + logic [WIDTH-1:0] BitWriteMask; + for (index=0; index < WIDTH; index++) + assign BitWriteMask[index] = bwe[index/8]; ram1p1rwbe_64x128 sram1A (.CLK(clk), .CEB(~ce), .WEB(~we), - .A(addr), .D(din), - .BWEB(~BitWriteMask), .Q(dout)); + .A(addr), .D(din), + .BWEB(~BitWriteMask), .Q(dout)); end else if ((`USE_SRAM == 1) & (WIDTH == 44) & (DEPTH == 64)) begin // RV64 cache tag - genvar index; - // 64 x 44-bit SRAM - logic [WIDTH-1:0] BitWriteMask; - for (index=0; index < WIDTH; index++) - assign BitWriteMask[index] = bwe[index/8]; - ram1p1rwbe_64x44 sram1B (.CLK(clk), .CEB(~ce), .WEB(~we), - .A(addr), .D(din), - .BWEB(~BitWriteMask), .Q(dout)); + genvar index; + // 64 x 44-bit SRAM + logic [WIDTH-1:0] BitWriteMask; + for (index=0; index < WIDTH; index++) + assign BitWriteMask[index] = bwe[index/8]; + ram1p1rwbe_64x44 sram1B (.CLK(clk), .CEB(~ce), .WEB(~we), + .A(addr), .D(din), + .BWEB(~BitWriteMask), .Q(dout)); end else if ((`USE_SRAM == 1) & (WIDTH == 22) & (DEPTH == 64)) begin // RV32 cache tag - genvar index; - // 64 x 22-bit SRAM - logic [WIDTH-1:0] BitWriteMask; - for (index=0; index < WIDTH; index++) - assign BitWriteMask[index] = bwe[index/8]; - ram1p1rwbe_64x22 sram1B (.CLK(clk), .CEB(~ce), .WEB(~we), - .A(addr), .D(din), - .BWEB(~BitWriteMask), .Q(dout)); + genvar index; + // 64 x 22-bit SRAM + logic [WIDTH-1:0] BitWriteMask; + for (index=0; index < WIDTH; index++) + assign BitWriteMask[index] = bwe[index/8]; + ram1p1rwbe_64x22 sram1B (.CLK(clk), .CEB(~ce), .WEB(~we), + .A(addr), .D(din), + .BWEB(~BitWriteMask), .Q(dout)); - // *************************************************************************** - // READ first SRAM model - // *************************************************************************** + // *************************************************************************** + // READ first SRAM model + // *************************************************************************** end else begin: ram integer i; @@ -91,19 +91,18 @@ module ram1p1rwbe #(parameter DEPTH=64, WIDTH=44) ( assign dout = RAM[addrd]; /* // Read - always_ff @(posedge clk) - if(ce) dout <= #1 mem[addr]; */ - + always_ff @(posedge clk) + if(ce) dout <= #1 mem[addr]; */ // Write divided into part for bytes and part for extra msbs - // Questa sim version 2022.3_2 does not allow multiple drivers for RAM when using always_ff. - // Therefore these always blocks use the older always @(posedge clk) + // Questa sim version 2022.3_2 does not allow multiple drivers for RAM when using always_ff. + // Therefore these always blocks use the older always @(posedge clk) if(WIDTH >= 8) always @(posedge clk) if (ce & we) for(i = 0; i < WIDTH/8; i++) if(bwe[i]) RAM[addr][i*8 +: 8] <= #1 din[i*8 +: 8]; - + if (WIDTH%8 != 0) // handle msbs if width not a multiple of 8 always @(posedge clk) if (ce & we & bwe[WIDTH/8]) diff --git a/src/generic/mem/ram1p1rwbe_64x128.sv b/src/generic/mem/ram1p1rwbe_64x128.sv index 84a3e74f9..55b1d75b0 100755 --- a/src/generic/mem/ram1p1rwbe_64x128.sv +++ b/src/generic/mem/ram1p1rwbe_64x128.sv @@ -26,7 +26,7 @@ module ram1p1rwbe_64x128( input logic CLK, - input logic CEB, + input logic CEB, input logic WEB, input logic [5:0] A, input logic [127:0] D, diff --git a/src/generic/mem/ram1p1rwbe_64x22.sv b/src/generic/mem/ram1p1rwbe_64x22.sv index 8e0f56306..5e7a4c5cf 100755 --- a/src/generic/mem/ram1p1rwbe_64x22.sv +++ b/src/generic/mem/ram1p1rwbe_64x22.sv @@ -26,7 +26,7 @@ module ram1p1rwbe_64x22( input logic CLK, - input logic CEB, + input logic CEB, input logic WEB, input logic [5:0] A, input logic [21:0] D, diff --git a/src/generic/mem/ram1p1rwbe_64x44.sv b/src/generic/mem/ram1p1rwbe_64x44.sv index 89730a42b..a2c2c81fa 100644 --- a/src/generic/mem/ram1p1rwbe_64x44.sv +++ b/src/generic/mem/ram1p1rwbe_64x44.sv @@ -26,7 +26,7 @@ module ram1p1rwbe_64x44( input logic CLK, - input logic CEB, + input logic CEB, input logic WEB, input logic [5:0] A, input logic [43:0] D, diff --git a/src/generic/mem/ram2p1r1wbe.sv b/src/generic/mem/ram2p1r1wbe.sv index cefd5ab9f..4c72095b6 100644 --- a/src/generic/mem/ram2p1r1wbe.sv +++ b/src/generic/mem/ram2p1r1wbe.sv @@ -44,96 +44,94 @@ module ram2p1r1wbe #(parameter DEPTH=1024, WIDTH=68) ( output logic [WIDTH-1:0] rd1 ); - logic [WIDTH-1:0] mem[DEPTH-1:0]; - localparam SRAMWIDTH = 32; - localparam SRAMNUMSETS = SRAMWIDTH/WIDTH; + logic [WIDTH-1:0] mem[DEPTH-1:0]; + localparam SRAMWIDTH = 32; + localparam SRAMNUMSETS = SRAMWIDTH/WIDTH; // *************************************************************************** // TRUE Smem macro // *************************************************************************** - if ((`USE_SRAM == 1) & (WIDTH == 68) & (DEPTH == 1024)) begin - - ram2p1r1wbe_1024x68 memory1(.CLKA(clk), .CLKB(clk), - .CEBA(~ce1), .CEBB(~ce2), - .WEBA('0), .WEBB(~we2), - .AA(ra1), .AB(wa2), - .DA('0), - .DB(wd2), - .BWEBA('0), .BWEBB('1), - .QA(rd1), - .QB()); + if ((`USE_SRAM == 1) & (WIDTH == 68) & (DEPTH == 1024)) begin + + ram2p1r1wbe_1024x68 memory1(.CLKA(clk), .CLKB(clk), + .CEBA(~ce1), .CEBB(~ce2), + .WEBA('0), .WEBB(~we2), + .AA(ra1), .AB(wa2), + .DA('0), + .DB(wd2), + .BWEBA('0), .BWEBB('1), + .QA(rd1), + .QB()); - end else if ((`USE_SRAM == 1) & (WIDTH == 36) & (DEPTH == 1024)) begin - - ram2p1r1wbe_1024x36 memory1(.CLKA(clk), .CLKB(clk), - .CEBA(~ce1), .CEBB(~ce2), - .WEBA('0), .WEBB(~we2), - .AA(ra1), .AB(wa2), - .DA('0), - .DB(wd2), - .BWEBA('0), .BWEBB('1), - .QA(rd1), - .QB()); + end else if ((`USE_SRAM == 1) & (WIDTH == 36) & (DEPTH == 1024)) begin + + ram2p1r1wbe_1024x36 memory1(.CLKA(clk), .CLKB(clk), + .CEBA(~ce1), .CEBB(~ce2), + .WEBA('0), .WEBB(~we2), + .AA(ra1), .AB(wa2), + .DA('0), + .DB(wd2), + .BWEBA('0), .BWEBB('1), + .QA(rd1), + .QB()); - end else if ((`USE_SRAM == 1) & (WIDTH == 2) & (DEPTH == 1024)) begin + end else if ((`USE_SRAM == 1) & (WIDTH == 2) & (DEPTH == 1024)) begin - logic [SRAMWIDTH-1:0] SRAMReadData; - logic [SRAMWIDTH-1:0] SRAMWriteData; - logic [SRAMWIDTH-1:0] RD1Sets[SRAMNUMSETS-1:0]; - logic [SRAMNUMSETS-1:0] SRAMBitMaskPre; - logic [SRAMWIDTH-1:0] SRAMBitMask; - logic [$clog2(DEPTH)-1:0] RA1Q; - - - onehotdecoder #($clog2(SRAMNUMSETS)) oh1(wa2[$clog2(SRAMNUMSETS)-1:0], SRAMBitMaskPre); - genvar index; - for (index = 0; index < SRAMNUMSETS; index++) begin:readdatalinesetsmux - assign RD1Sets[index] = SRAMReadData[(index*WIDTH)+WIDTH-1 : (index*WIDTH)]; - assign SRAMWriteData[index*2+1:index*2] = wd2; - assign SRAMBitMask[index*2+1:index*2] = {2{SRAMBitMaskPre[index]}}; - end - flopen #($clog2(DEPTH)) mem_reg1 (clk, ce1, ra1, RA1Q); - assign rd1 = RD1Sets[RA1Q[$clog2(SRAMWIDTH)-1:0]]; - ram2p1r1wbe_64x32 memory2(.CLKA(clk), .CLKB(clk), - .CEBA(~ce1), .CEBB(~ce2), - .WEBA('0), .WEBB(~we2), - .AA(ra1[$clog2(DEPTH)-1:$clog2(SRAMNUMSETS)]), - .AB(wa2[$clog2(DEPTH)-1:$clog2(SRAMNUMSETS)]), - .DA('0), - .DB(SRAMWriteData), - .BWEBA('0), .BWEBB(SRAMBitMask), - .QA(SRAMReadData), - .QB()); + logic [SRAMWIDTH-1:0] SRAMReadData; + logic [SRAMWIDTH-1:0] SRAMWriteData; + logic [SRAMWIDTH-1:0] RD1Sets[SRAMNUMSETS-1:0]; + logic [SRAMNUMSETS-1:0] SRAMBitMaskPre; + logic [SRAMWIDTH-1:0] SRAMBitMask; + logic [$clog2(DEPTH)-1:0] RA1Q; + + onehotdecoder #($clog2(SRAMNUMSETS)) oh1(wa2[$clog2(SRAMNUMSETS)-1:0], SRAMBitMaskPre); + genvar index; + for (index = 0; index < SRAMNUMSETS; index++) begin:readdatalinesetsmux + assign RD1Sets[index] = SRAMReadData[(index*WIDTH)+WIDTH-1 : (index*WIDTH)]; + assign SRAMWriteData[index*2+1:index*2] = wd2; + assign SRAMBitMask[index*2+1:index*2] = {2{SRAMBitMaskPre[index]}}; + end + flopen #($clog2(DEPTH)) mem_reg1 (clk, ce1, ra1, RA1Q); + assign rd1 = RD1Sets[RA1Q[$clog2(SRAMWIDTH)-1:0]]; + ram2p1r1wbe_64x32 memory2(.CLKA(clk), .CLKB(clk), + .CEBA(~ce1), .CEBB(~ce2), + .WEBA('0), .WEBB(~we2), + .AA(ra1[$clog2(DEPTH)-1:$clog2(SRAMNUMSETS)]), + .AB(wa2[$clog2(DEPTH)-1:$clog2(SRAMNUMSETS)]), + .DA('0), + .DB(SRAMWriteData), + .BWEBA('0), .BWEBB(SRAMBitMask), + .QA(SRAMReadData), + .QB()); - end else begin - - // *************************************************************************** - // READ first SRAM model - // *************************************************************************** - integer i; - + end else begin + + // *************************************************************************** + // READ first SRAM model + // *************************************************************************** + integer i; + // Read logic [$clog2(DEPTH)-1:0] ra1d; flopen #($clog2(DEPTH)) adrreg(clk, ce1, ra1, ra1d); assign rd1 = mem[ra1d]; -/* // Read - always_ff @(posedge clk) - if(ce1) rd1 <= #1 mem[ra1]; */ - - // Write divided into part for bytes and part for extra msbs - if(WIDTH >= 8) - always @(posedge clk) - if (ce2 & we2) - for(i = 0; i < WIDTH/8; i++) - if(bwe2[i]) mem[wa2][i*8 +: 8] <= #1 wd2[i*8 +: 8]; - - if (WIDTH%8 != 0) // handle msbs if width not a multiple of 8 - always @(posedge clk) - if (ce2 & we2 & bwe2[WIDTH/8]) - mem[wa2][WIDTH-1:WIDTH-WIDTH%8] <= #1 wd2[WIDTH-1:WIDTH-WIDTH%8]; - + /* // Read + always_ff @(posedge clk) + if(ce1) rd1 <= #1 mem[ra1]; */ + + // Write divided into part for bytes and part for extra msbs + if(WIDTH >= 8) + always @(posedge clk) + if (ce2 & we2) + for(i = 0; i < WIDTH/8; i++) + if(bwe2[i]) mem[wa2][i*8 +: 8] <= #1 wd2[i*8 +: 8]; + + if (WIDTH%8 != 0) // handle msbs if width not a multiple of 8 + always @(posedge clk) + if (ce2 & we2 & bwe2[WIDTH/8]) + mem[wa2][WIDTH-1:WIDTH-WIDTH%8] <= #1 wd2[WIDTH-1:WIDTH-WIDTH%8]; end - + endmodule diff --git a/src/generic/mem/ram2p1r1wbe_1024x36.sv b/src/generic/mem/ram2p1r1wbe_1024x36.sv index b6b501f27..302277ed0 100755 --- a/src/generic/mem/ram2p1r1wbe_1024x36.sv +++ b/src/generic/mem/ram2p1r1wbe_1024x36.sv @@ -27,8 +27,8 @@ module ram2p1r1wbe_1024x36( input logic CLKA, input logic CLKB, - input logic CEBA, - input logic CEBB, + input logic CEBA, + input logic CEBB, input logic WEBA, input logic WEBB, input logic [9:0] AA, @@ -43,12 +43,12 @@ module ram2p1r1wbe_1024x36( // replace "generic1024x36RAM" with "TSDN..1024X36.." module from your memory vendor //generic1024x36RAM sramIP (.CLKA, .CLKB, .CEBA, .CEBB, .WEBA, .WEBB, - // .AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB); + // .AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB); // use part of a larger RAM to avoid generating more flavors of RAM logic [67:0] QAfull, QBfull; TSDN28HPCPA1024X68M4MW sramIP(.CLKA, .CLKB, .CEBA, .CEBB, .WEBA, .WEBB, - .AA, .AB, .DA({32'b0, DA[35:0]}), .DB({32'b0, DB[35:0]}), - .BWEBA({32'b0, BWEBA[35:0]}), .BWEBB({32'b0, BWEBB[35:0]}), .QA(QAfull), .QB(QBfull)); + .AA, .AB, .DA({32'b0, DA[35:0]}), .DB({32'b0, DB[35:0]}), + .BWEBA({32'b0, BWEBA[35:0]}), .BWEBB({32'b0, BWEBB[35:0]}), .QA(QAfull), .QB(QBfull)); assign QA = QAfull[35:0]; assign QB = QBfull[35:0]; diff --git a/src/generic/mem/ram2p1r1wbe_1024x68.sv b/src/generic/mem/ram2p1r1wbe_1024x68.sv index 108530bee..6da7e06d6 100755 --- a/src/generic/mem/ram2p1r1wbe_1024x68.sv +++ b/src/generic/mem/ram2p1r1wbe_1024x68.sv @@ -27,8 +27,8 @@ module ram2p1r1wbe_1024x68( input logic CLKA, input logic CLKB, - input logic CEBA, - input logic CEBB, + input logic CEBA, + input logic CEBB, input logic WEBA, input logic WEBB, input logic [9:0] AA, @@ -43,8 +43,8 @@ module ram2p1r1wbe_1024x68( // replace "generic1024x68RAM" with "TSDN..1024X68.." module from your memory vendor //generic1024x68RAM sramIP (.CLKA, .CLKB, .CEBA, .CEBB, .WEBA, .WEBB, - // .AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB); + // .AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB); TSDN28HPCPA1024X68M4MW sramIP(.CLKA, .CLKB, .CEBA, .CEBB, .WEBA, .WEBB, - .AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB); + .AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB); endmodule diff --git a/src/generic/mem/ram2p1r1wbe_128x64.sv b/src/generic/mem/ram2p1r1wbe_128x64.sv index 49d2631ee..e181fdd07 100644 --- a/src/generic/mem/ram2p1r1wbe_128x64.sv +++ b/src/generic/mem/ram2p1r1wbe_128x64.sv @@ -27,8 +27,8 @@ module ram2p1r1wbe_128x64( input logic CLKA, input logic CLKB, - input logic CEBA, - input logic CEBB, + input logic CEBA, + input logic CEBB, input logic WEBA, input logic WEBB, input logic [6:0] AA, @@ -43,8 +43,8 @@ module ram2p1r1wbe_128x64( // replace "generic128x64RAM" with "TSDN..128X64.." module from your memory vendor TSDN28HPCPA128X64M4FW sramIP (.CLKA, .CLKB, .CEBA, .CEBB, .WEBA, .WEBB, - .AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB); + .AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB); // generic128x64RAM sramIP (.CLKA, .CLKB, .CEBA, .CEBB, .WEBA, .WEBB, -// .AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB); +// .AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB); endmodule diff --git a/src/generic/mem/ram2p1r1wbe_512x64.sv b/src/generic/mem/ram2p1r1wbe_512x64.sv index 14fbea75c..442eff90d 100644 --- a/src/generic/mem/ram2p1r1wbe_512x64.sv +++ b/src/generic/mem/ram2p1r1wbe_512x64.sv @@ -27,8 +27,8 @@ module ram2p1r1wbe_2048x64( input logic CLKA, input logic CLKB, - input logic CEBA, - input logic CEBB, + input logic CEBA, + input logic CEBB, input logic WEBA, input logic WEBB, input logic [8:0] AA, @@ -43,8 +43,8 @@ module ram2p1r1wbe_2048x64( // replace "generic2048x64RAM" with "TSDN..2048X64.." module from your memory vendor TSDN28HPCPA2048X64MMFW sramIP (.CLKA, .CLKB, .CEBA, .CEBB, .WEBA, .WEBB, - .AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB); + .AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB); // generic2048x64RAM sramIP (.CLKA, .CLKB, .CEBA, .CEBB, .WEBA, .WEBB, -// .AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB); +// .AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB); endmodule diff --git a/src/generic/mem/ram2p1r1wbe_64x32.sv b/src/generic/mem/ram2p1r1wbe_64x32.sv index e551099f4..4236bb3f6 100755 --- a/src/generic/mem/ram2p1r1wbe_64x32.sv +++ b/src/generic/mem/ram2p1r1wbe_64x32.sv @@ -27,8 +27,8 @@ module ram2p1r1wbe_64x32( input logic CLKA, input logic CLKB, - input logic CEBA, - input logic CEBB, + input logic CEBA, + input logic CEBB, input logic WEBA, input logic WEBB, input logic [5:0] AA, @@ -43,7 +43,7 @@ module ram2p1r1wbe_64x32( // replace "generic64x32RAM" with "TSDN..64X32.." module from your memory vendor //generic64x32RAM sramIP (.CLKA, .CLKB, .CEBA, .CEBB, .WEBA, .WEBB, - // .AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB); + // .AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB); TSDN28HPCPA64X32M4MW sramIP(.CLKA, .CLKB, .CEBA, .CEBB, .WEBA, .WEBB, - .AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB); + .AA, .AB, .DA, .DB, .BWEBA, .BWEBB, .QA, .QB); endmodule diff --git a/src/generic/mem/rom1p1r.sv b/src/generic/mem/rom1p1r.sv index 6930bc0db..ef9c6da37 100644 --- a/src/generic/mem/rom1p1r.sv +++ b/src/generic/mem/rom1p1r.sv @@ -28,8 +28,8 @@ `include "wally-config.vh" module rom1p1r #(parameter ADDR_WIDTH = 8, - parameter DATA_WIDTH = 32, - parameter PRELOAD_ENABLED = 0) + parameter DATA_WIDTH = 32, + parameter PRELOAD_ENABLED = 0) (input logic clk, input logic ce, input logic [ADDR_WIDTH-1:0] addr, @@ -37,7 +37,7 @@ module rom1p1r #(parameter ADDR_WIDTH = 8, ); // Core Memory - logic [DATA_WIDTH-1:0] ROM [(2**ADDR_WIDTH)-1:0]; + logic [DATA_WIDTH-1:0] ROM [(2**ADDR_WIDTH)-1:0]; /* if ((`USE_SRAM == 1) & (ADDR_WDITH == 7) & (DATA_WIDTH == 64)) begin rom1p1r_128x64 rom1 (.CLK(clk), .CEB(~ce), .A(addr[6:0]), .Q(dout)); @@ -46,55 +46,55 @@ module rom1p1r #(parameter ADDR_WIDTH = 8, end else begin */ always @ (posedge clk) begin - if(ce) dout <= ROM[addr]; + if(ce) dout <= ROM[addr]; end // for FPGA, initialize with zero-stage bootloader if(PRELOAD_ENABLED) initial begin - ROM[0] = 64'h9581819300002197; - ROM[1] = 64'h4281420141014081; - ROM[2] = 64'h4481440143814301; - ROM[3] = 64'h4681460145814501; - ROM[4] = 64'h4881480147814701; - ROM[5] = 64'h4a814a0149814901; - ROM[6] = 64'h4c814c014b814b01; - ROM[7] = 64'h4e814e014d814d01; - ROM[8] = 64'h0110011b4f814f01; - ROM[9] = 64'h059b45011161016e; - ROM[10] = 64'h0004063705fe0010; - ROM[11] = 64'h05a000ef8006061b; - ROM[12] = 64'h0ff003930000100f; - ROM[13] = 64'h4e952e3110060e37; - ROM[14] = 64'hc602829b0053f2b7; - ROM[15] = 64'h2023fe02dfe312fd; - ROM[16] = 64'h829b0053f2b7007e; - ROM[17] = 64'hfe02dfe312fdc602; - ROM[18] = 64'h4de31efd000e2023; - ROM[19] = 64'h059bf1402573fdd0; - ROM[20] = 64'h0000061705e20870; - ROM[21] = 64'h0010029b01260613; - ROM[22] = 64'h11010002806702fe; - ROM[23] = 64'h84b2842ae426e822; - ROM[24] = 64'h892ee04aec064511; - ROM[25] = 64'h06e000ef07e000ef; - ROM[26] = 64'h979334fd02905563; - ROM[27] = 64'h07930177d4930204; - ROM[28] = 64'h4089093394be2004; - ROM[29] = 64'h04138522008905b3; - ROM[30] = 64'h19e3014000ef2004; - ROM[31] = 64'h64a2644260e2fe94; - ROM[32] = 64'h6749808261056902; - ROM[33] = 64'hdfed8b8510472783; - ROM[34] = 64'h2423479110a73823; - ROM[35] = 64'h10472783674910f7; - ROM[36] = 64'h20058693ffed8b89; - ROM[37] = 64'h05a1118737836749; - ROM[38] = 64'hfed59be3fef5bc23; - ROM[39] = 64'h1047278367498082; - ROM[40] = 64'h47858082dfed8b85; - ROM[41] = 64'h40a7853b4015551b; - ROM[42] = 64'h808210a7a02367c9; + ROM[0] = 64'h9581819300002197; + ROM[1] = 64'h4281420141014081; + ROM[2] = 64'h4481440143814301; + ROM[3] = 64'h4681460145814501; + ROM[4] = 64'h4881480147814701; + ROM[5] = 64'h4a814a0149814901; + ROM[6] = 64'h4c814c014b814b01; + ROM[7] = 64'h4e814e014d814d01; + ROM[8] = 64'h0110011b4f814f01; + ROM[9] = 64'h059b45011161016e; + ROM[10] = 64'h0004063705fe0010; + ROM[11] = 64'h05a000ef8006061b; + ROM[12] = 64'h0ff003930000100f; + ROM[13] = 64'h4e952e3110060e37; + ROM[14] = 64'hc602829b0053f2b7; + ROM[15] = 64'h2023fe02dfe312fd; + ROM[16] = 64'h829b0053f2b7007e; + ROM[17] = 64'hfe02dfe312fdc602; + ROM[18] = 64'h4de31efd000e2023; + ROM[19] = 64'h059bf1402573fdd0; + ROM[20] = 64'h0000061705e20870; + ROM[21] = 64'h0010029b01260613; + ROM[22] = 64'h11010002806702fe; + ROM[23] = 64'h84b2842ae426e822; + ROM[24] = 64'h892ee04aec064511; + ROM[25] = 64'h06e000ef07e000ef; + ROM[26] = 64'h979334fd02905563; + ROM[27] = 64'h07930177d4930204; + ROM[28] = 64'h4089093394be2004; + ROM[29] = 64'h04138522008905b3; + ROM[30] = 64'h19e3014000ef2004; + ROM[31] = 64'h64a2644260e2fe94; + ROM[32] = 64'h6749808261056902; + ROM[33] = 64'hdfed8b8510472783; + ROM[34] = 64'h2423479110a73823; + ROM[35] = 64'h10472783674910f7; + ROM[36] = 64'h20058693ffed8b89; + ROM[37] = 64'h05a1118737836749; + ROM[38] = 64'hfed59be3fef5bc23; + ROM[39] = 64'h1047278367498082; + ROM[40] = 64'h47858082dfed8b85; + ROM[41] = 64'h40a7853b4015551b; + ROM[42] = 64'h808210a7a02367c9; end endmodule diff --git a/src/generic/mem/rom1p1r_128x32.sv b/src/generic/mem/rom1p1r_128x32.sv index bc16a76dc..ea5b92054 100755 --- a/src/generic/mem/rom1p1r_128x32.sv +++ b/src/generic/mem/rom1p1r_128x32.sv @@ -26,7 +26,7 @@ module rom1p1r_128x32( input logic CLK, - input logic CEB, + input logic CEB, input logic [6:0] A, output logic [31:0] Q ); diff --git a/src/generic/mem/rom1p1r_128x64.sv b/src/generic/mem/rom1p1r_128x64.sv index 3c7ea4842..6712d10fa 100755 --- a/src/generic/mem/rom1p1r_128x64.sv +++ b/src/generic/mem/rom1p1r_128x64.sv @@ -25,14 +25,14 @@ //////////////////////////////////////////////////////////////////////////////////////////////// module rom1p1r_128x64( - input logic CLK, - input logic CEB, + input logic CLK, + input logic CEB, input logic [6:0] A, output logic [63:0] Q ); // replace "generic64x128RAM" with "TS3N..64X128.." module from your memory vendor -ts3n28hpcpa128x64m8m romIP (.CLK, .CEB, .A, .Q); + ts3n28hpcpa128x64m8m romIP (.CLK, .CEB, .A, .Q); // generic64x128ROM romIP (.CLK, .CEB, .A, .Q); endmodule diff --git a/src/ieu/bmu/bmuctrl.sv b/src/ieu/bmu/bmuctrl.sv index b436fda46..90d031a14 100644 --- a/src/ieu/bmu/bmuctrl.sv +++ b/src/ieu/bmu/bmuctrl.sv @@ -30,7 +30,7 @@ `include "wally-config.vh" module bmuctrl( - input logic clk, reset, + input logic clk, reset, // Decode stage control signals input logic StallD, FlushD, // Stall, flush Decode stage input logic [31:0] InstrD, // Instruction in Decode stage @@ -43,7 +43,7 @@ module bmuctrl( output logic BSubArithD, // TRUE if ext, clr, andn, orn, xnor instruction in Decode Stage output logic IllegalBitmanipInstrD, // Indicates if it is unrecognized B instruction in Decode Stage // Execute stage control signals - input logic StallE, FlushE, // Stall, flush Execute stage + input logic StallE, FlushE, // Stall, flush Execute stage output logic [2:0] ALUSelectD, // ALU select output logic [1:0] BSelectE, // Indicates if ZBA_ZBB_ZBC_ZBS instruction in one-hot encoding output logic [2:0] ZBBSelectE, // ZBB mux select signal @@ -66,7 +66,7 @@ module bmuctrl( `define BMUCTRLW 17 - logic [`BMUCTRLW-1:0] BMUControlsD; // Main B Instructions Decoder control signals + logic [`BMUCTRLW-1:0] BMUControlsD; // Main B Instructions Decoder control signals // Extract fields assign OpD = InstrD[6:0]; @@ -180,4 +180,4 @@ module bmuctrl( // BMU Execute stage pipieline control register flopenrc#(10) controlregBMU(clk, reset, FlushE, ~StallE, {BSelectD, ZBBSelectD, BRegWriteD, BComparatorSignedD, BALUControlD}, {BSelectE, ZBBSelectE, BRegWriteE, BComparatorSignedE, BALUControlE}); -endmodule \ No newline at end of file +endmodule diff --git a/src/ieu/controller.sv b/src/ieu/controller.sv index 21811b49a..da99a48f6 100644 --- a/src/ieu/controller.sv +++ b/src/ieu/controller.sv @@ -31,7 +31,7 @@ module controller( - input logic clk, reset, + input logic clk, reset, // Decode stage control signals input logic StallD, FlushD, // Stall, flush Decode stage input logic [31:0] InstrD, // Instruction in Decode stage @@ -41,11 +41,11 @@ module controller( output logic JumpD, // Jump instruction output logic BranchD, // Branch instruction // Execute stage control signals - input logic StallE, FlushE, // Stall, flush Execute stage + input logic StallE, FlushE, // Stall, flush Execute stage input logic [1:0] FlagsE, // Comparison flags ({eq, lt}) input logic FWriteIntE, // Write integer register, coming from FPU controller output logic PCSrcE, // Select signal to choose next PC (for datapath and Hazard unit) - output logic ALUSrcAE, ALUSrcBE, // ALU operands + output logic ALUSrcAE, ALUSrcBE, // ALU operands output logic ALUResultSrcE, // Selects result to pass on to Memory stage output logic [2:0] ALUSelectE, // ALU mux select signal output logic MemReadE, CSRReadE, // Instruction reads memory, reads a CSR (needed for Hazard unit) @@ -74,7 +74,7 @@ module controller( output logic FWriteIntM, // FPU controller writes integer register file // Writeback stage control signals input logic StallW, FlushW, // Stall, flush Writeback stage - output logic RegWriteW, IntDivW, // Instruction writes a register, is an integer divide + output logic RegWriteW, IntDivW, // Instruction writes a register, is an integer divide output logic [2:0] ResultSrcW, // Select source of result to write back to register file // Stall during CSRs output logic CSRWriteFenceM, // CSR write or fence instruction; needs to flush the following instructions @@ -89,16 +89,16 @@ module controller( `define CTRLW 23 // pipelined control signals - logic RegWriteD, RegWriteE; // RegWrite (register will be written) + logic RegWriteD, RegWriteE; // RegWrite (register will be written) logic [2:0] ResultSrcD, ResultSrcE, ResultSrcM; // Select which result to write back to register file logic [1:0] MemRWD, MemRWE; // Store (write to memory) - logic ALUOpD; // 0 for address generation, 1 for all other operations (must use Funct3) - logic BaseW64D; // W64 for Base instructions specifically - logic BaseRegWriteD; // Indicates if Base instruction register write instruction - logic BaseSubArithD; // Indicates if Base instruction subtracts, sra, slt, sltu + logic ALUOpD; // 0 for address generation, 1 for all other operations (must use Funct3) + logic BaseW64D; // W64 for Base instructions specifically + logic BaseRegWriteD; // Indicates if Base instruction register write instruction + logic BaseSubArithD; // Indicates if Base instruction subtracts, sra, slt, sltu logic BaseALUSrcBD; // Base instruction ALU B source select signal logic [2:0] ALUControlD; // Determines ALU operation - logic ALUSrcAD, ALUSrcBD; // ALU inputs + logic ALUSrcAD, ALUSrcBD; // ALU inputs logic ALUResultSrcD, W64D, MDUD; // ALU result, is RV64 W-type, is multiply/divide instruction logic CSRZeroSrcD; // Ignore setting and clearing zeros to CSR logic CSRReadD; // CSR read instruction @@ -115,7 +115,7 @@ module controller( logic BranchTakenE; // Branch is taken logic eqE, ltE; // Comparator outputs logic unused; - logic BranchFlagE; // Branch flag to use (chosen between eq or lt) + logic BranchFlagE; // Branch flag to use (chosen between eq or lt) logic IEURegWriteE; // Register write logic BRegWriteE; // Register write from BMU controller in Execute Stage logic IllegalERegAdrD; // RV32E attempts to write upper 16 registers @@ -146,29 +146,29 @@ module controller( logic Funct7ZeroD, Funct7b5D, IShiftD, INoShiftD; logic Funct7ShiftZeroD, Funct7Shiftb5D; - assign Funct7ZeroD = (Funct7D == 7'b0000000); // most R-type instructions - assign Funct7b5D = (Funct7D == 7'b0100000); // srai, sub + assign Funct7ZeroD = (Funct7D == 7'b0000000); // most R-type instructions + assign Funct7b5D = (Funct7D == 7'b0100000); // srai, sub assign Funct7ShiftZeroD = (`XLEN==64) ? (Funct7D[6:1] == 6'b000000) : Funct7ZeroD; assign Funct7Shiftb5D = (`XLEN==64) ? (Funct7D[6:1] == 6'b010000) : Funct7b5D; - assign IShiftD = (Funct3D == 3'b001 & Funct7ShiftZeroD) | (Funct3D == 3'b101 & (Funct7ShiftZeroD | Funct7Shiftb5D)); // slli, srli, srai, or w forms - assign INoShiftD = ((Funct3D != 3'b001) & (Funct3D != 3'b101)); - assign IFunctD = IShiftD | INoShiftD; - assign RFunctD = ((Funct3D == 3'b000 | Funct3D == 3'b101) & Funct7b5D) | Funct7ZeroD; - assign MFunctD = (Funct7D == 7'b0000001) & (`M_SUPPORTED | (`ZMMUL_SUPPORTED & ~Funct3D[2])); // muldiv - assign LFunctD = Funct3D == 3'b000 | Funct3D == 3'b001 | Funct3D == 3'b010 | Funct3D == 3'b100 | Funct3D == 3'b101 | - ((`XLEN == 64) & (Funct3D == 3'b011 | Funct3D == 3'b110)); - assign SFunctD = Funct3D == 3'b000 | Funct3D == 3'b001 | Funct3D == 3'b010 | - ((`XLEN == 64) & (Funct3D == 3'b011)); - assign BFunctD = (Funct3D[2:1] != 2'b01); // legal branches - assign JFunctD = (Funct3D == 3'b000); + assign IShiftD = (Funct3D == 3'b001 & Funct7ShiftZeroD) | (Funct3D == 3'b101 & (Funct7ShiftZeroD | Funct7Shiftb5D)); // slli, srli, srai, or w forms + assign INoShiftD = ((Funct3D != 3'b001) & (Funct3D != 3'b101)); + assign IFunctD = IShiftD | INoShiftD; + assign RFunctD = ((Funct3D == 3'b000 | Funct3D == 3'b101) & Funct7b5D) | Funct7ZeroD; + assign MFunctD = (Funct7D == 7'b0000001) & (`M_SUPPORTED | (`ZMMUL_SUPPORTED & ~Funct3D[2])); // muldiv + assign LFunctD = Funct3D == 3'b000 | Funct3D == 3'b001 | Funct3D == 3'b010 | Funct3D == 3'b100 | Funct3D == 3'b101 | + ((`XLEN == 64) & (Funct3D == 3'b011 | Funct3D == 3'b110)); + assign SFunctD = Funct3D == 3'b000 | Funct3D == 3'b001 | Funct3D == 3'b010 | + ((`XLEN == 64) & (Funct3D == 3'b011)); + assign BFunctD = (Funct3D[2:1] != 2'b01); // legal branches + assign JFunctD = (Funct3D == 3'b000); end else begin:legalcheck2 - assign IFunctD = 1; // Don't bother to separate out shift decoding - assign RFunctD = ~Funct7D[0]; // Not a multiply - assign MFunctD = Funct7D[0] & (`M_SUPPORTED | (`ZMMUL_SUPPORTED & ~Funct3D[2])); // muldiv - assign LFunctD = 1; // don't bother to check Funct3 for loads - assign SFunctD = 1; // don't bother to check Funct3 for stores - assign BFunctD = 1; // don't bother to check Funct3 for branches - assign JFunctD = 1; // don't bother to check Funct3 for jumps + assign IFunctD = 1; // Don't bother to separate out shift decoding + assign RFunctD = ~Funct7D[0]; // Not a multiply + assign MFunctD = Funct7D[0] & (`M_SUPPORTED | (`ZMMUL_SUPPORTED & ~Funct3D[2])); // muldiv + assign LFunctD = 1; // don't bother to check Funct3 for loads + assign SFunctD = 1; // don't bother to check Funct3 for stores + assign BFunctD = 1; // don't bother to check Funct3 for branches + assign JFunctD = 1; // don't bother to check Funct3 for jumps end // Main Instruction Decoder @@ -182,7 +182,7 @@ module controller( 7'b0000111: ControlsD = `CTRLW'b0_000_01_10_001_0_0_0_0_0_0_0_0_0_00_1; // flw - only legal if FP supported 7'b0001111: if (`ZIFENCEI_SUPPORTED) ControlsD = `CTRLW'b0_000_00_00_000_0_0_0_0_0_0_0_1_0_00_0; // fence - else + else ControlsD = `CTRLW'b0_000_00_00_000_0_0_0_0_0_0_0_0_0_00_0; // fence treated as nop 7'b0010011: if (IFunctD) ControlsD = `CTRLW'b1_000_01_00_000_0_1_0_0_0_0_0_0_0_00_0; // I-type ALU @@ -337,4 +337,4 @@ module controller( // the synchronous DTIM cannot read immediately after write // a cache cannot read or write immediately after a write assign StoreStallD = MemRWE[0] & ((MemRWD[1] | (MemRWD[0] & `DCACHE_SUPPORTED)) | (|AtomicD)); -endmodule \ No newline at end of file +endmodule diff --git a/src/ieu/datapath.sv b/src/ieu/datapath.sv index a6fb1fdcd..a48b39402 100644 --- a/src/ieu/datapath.sv +++ b/src/ieu/datapath.sv @@ -108,7 +108,7 @@ module datapath ( flopenrc #(5) Rs1EReg(clk, reset, FlushE, ~StallE, Rs1D, Rs1E); flopenrc #(5) Rs2EReg(clk, reset, FlushE, ~StallE, Rs2D, Rs2E); flopenrc #(5) RdEReg(clk, reset, FlushE, ~StallE, RdD, RdE); - + mux3 #(`XLEN) faemux(R1E, ResultW, IFResultM, ForwardAE, ForwardedSrcAE); mux3 #(`XLEN) fbemux(R2E, ResultW, IFResultM, ForwardBE, ForwardedSrcBE); comparator #(`XLEN) comp(ForwardedSrcAE, ForwardedSrcBE, BranchSignedE, FlagsE); @@ -121,7 +121,7 @@ module datapath ( // Memory stage pipeline register flopenrc #(`XLEN) SrcAMReg(clk, reset, FlushM, ~StallM, SrcAE, SrcAM); flopenrc #(`XLEN) IEUResultMReg(clk, reset, FlushM, ~StallM, IEUResultE, IEUResultM); - flopenrc #(5) RdMReg(clk, reset, FlushM, ~StallM, RdE, RdM); + flopenrc #(5) RdMReg(clk, reset, FlushM, ~StallM, RdE, RdM); flopenrc #(`XLEN) WriteDataMReg(clk, reset, FlushM, ~StallM, ForwardedSrcBE, WriteDataM); // Writeback stage pipeline register and logic diff --git a/src/ieu/forward.sv b/src/ieu/forward.sv index 8bfaa51c6..62cc5ea4f 100644 --- a/src/ieu/forward.sv +++ b/src/ieu/forward.sv @@ -34,7 +34,7 @@ module forward( input logic [4:0] Rs1D, Rs2D, Rs1E, Rs2E, RdE, RdM, RdW, // Source and destination registers input logic MemReadE, MDUE, CSRReadE, // Execute stage instruction is a load (MemReadE), divide (MDUE), or CSR read (CSRReadE) input logic RegWriteM, RegWriteW, // Instruction in Memory or Writeback stage writes register file - input logic FCvtIntE, // FPU convert float to int + input logic FCvtIntE, // FPU convert float to int input logic SCE, // Store Conditional instruction // Forwarding controls output logic [1:0] ForwardAE, ForwardBE, // Select signals for forwarding multiplexers diff --git a/src/ieu/ieu.sv b/src/ieu/ieu.sv index 0fdba9e8a..d5b3e8f40 100644 --- a/src/ieu/ieu.sv +++ b/src/ieu/ieu.sv @@ -29,27 +29,27 @@ `include "wally-config.vh" module ieu ( - input logic clk, reset, + input logic clk, reset, // Decode stage signals - input logic [31:0] InstrD, // Instruction - input logic IllegalIEUFPUInstrD, // Illegal instruction - output logic IllegalBaseInstrD, // Illegal I-type instruction, or illegal RV32 access to upper 16 registers + input logic [31:0] InstrD, // Instruction + input logic IllegalIEUFPUInstrD, // Illegal instruction + output logic IllegalBaseInstrD, // Illegal I-type instruction, or illegal RV32 access to upper 16 registers // Execute stage signals input logic [`XLEN-1:0] PCE, // PC input logic [`XLEN-1:0] PCLinkE, // PC + 4 - output logic PCSrcE, // Select next PC (between PC+4 and IEUAdrE) - input logic FWriteIntE, FCvtIntE, // FPU writes to integer register file, FPU converts float to int + output logic PCSrcE, // Select next PC (between PC+4 and IEUAdrE) + input logic FWriteIntE, FCvtIntE, // FPU writes to integer register file, FPU converts float to int output logic [`XLEN-1:0] IEUAdrE, // Memory address - output logic IntDivE, W64E, // Integer divide, RV64 W-type instruction - output logic [2:0] Funct3E, // Funct3 instruction field + output logic IntDivE, W64E, // Integer divide, RV64 W-type instruction + output logic [2:0] Funct3E, // Funct3 instruction field output logic [`XLEN-1:0] ForwardedSrcAE, ForwardedSrcBE, // ALU src inputs before the mux choosing between them and PCE to put in srcA/B output logic [4:0] RdE, // Destination register // Memory stage signals - input logic SquashSCW, // Squash store conditional, from LSU - output logic [1:0] MemRWM, // Read/write control goes to LSU - output logic [1:0] AtomicM, // Atomic control goes to LSU + input logic SquashSCW, // Squash store conditional, from LSU + output logic [1:0] MemRWM, // Read/write control goes to LSU + output logic [1:0] AtomicM, // Atomic control goes to LSU output logic [`XLEN-1:0] WriteDataM, // Write data to LSU - output logic [2:0] Funct3M, // Funct3 (size and signedness) to LSU + output logic [2:0] Funct3M, // Funct3 (size and signedness) to LSU output logic [`XLEN-1:0] SrcAM, // ALU SrcA to Privileged unit and FPU output logic [4:0] RdM, // Destination register input logic [`XLEN-1:0] FIntResM, // Integer result from FPU (fmv, fclass, fcmp) @@ -66,12 +66,12 @@ module ieu ( output logic [4:0] RdW, // Destination register input logic [`XLEN-1:0] ReadDataW, // LSU's read data // Hazard unit signals - input logic StallD, StallE, StallM, StallW, // Stall signals from hazard unit - input logic FlushD, FlushE, FlushM, FlushW, // Flush signals - output logic FCvtIntStallD, LoadStallD, // Stall causes from IEU to hazard unit + input logic StallD, StallE, StallM, StallW, // Stall signals from hazard unit + input logic FlushD, FlushE, FlushM, FlushW, // Flush signals + output logic FCvtIntStallD, LoadStallD, // Stall causes from IEU to hazard unit output logic MDUStallD, CSRRdStallD, StoreStallD, - output logic CSRReadM, CSRWriteM, PrivilegedM,// CSR read, CSR write, is privileged instruction - output logic CSRWriteFenceM // CSR write or fence instruction needs to flush subsequent instructions + output logic CSRReadM, CSRWriteM, PrivilegedM,// CSR read, CSR write, is privileged instruction + output logic CSRWriteFenceM // CSR write or fence instruction needs to flush subsequent instructions ); logic [2:0] ImmSrcD; // Select type of immediate extension diff --git a/src/ieu/regfile.sv b/src/ieu/regfile.sv index cc0439c08..a4ee1cc3e 100644 --- a/src/ieu/regfile.sv +++ b/src/ieu/regfile.sv @@ -52,7 +52,7 @@ module regfile ( always_ff @(negedge clk) if (reset) for(i=1; i