// Copyright 2020 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51

/// Integer Processing Unit
/// Based on Snitch Shared Muliplier/Divider
/// Author: Sergio Mazzola, <smazzola@student.ethz.ch>

module snitch_ipu #(
  parameter int unsigned IdWidth = 5
) (
  input  logic                     clk_i,
  input  logic                     rst_i,
  // Accelerator Interface - Slave
  input  logic [31:0]              acc_qaddr_i,      // unused
  input  logic [IdWidth-1:0]       acc_qid_i,
  input  logic [31:0]              acc_qdata_op_i,   // RISC-V instruction
  input  logic [31:0]              acc_qdata_arga_i,
  input  logic [31:0]              acc_qdata_argb_i,
  input  logic [31:0]              acc_qdata_argc_i,
  input  logic                     acc_qvalid_i,
  output logic                     acc_qready_o,
  output logic [31:0]              acc_pdata_o,
  output logic [IdWidth-1:0]       acc_pid_o,
  output logic                     acc_perror_o,
  output logic                     acc_pvalid_o,
  input  logic                     acc_pready_i
);
  `include "common_cells/registers.svh"

  typedef struct packed {
    logic [31:0]        result;
    logic [IdWidth-1:0] id;
  } result_t;
  // input handshake
  logic div_valid_op, div_ready_op;
  /* verilator lint_off UNDRIVEN */
  logic mul_valid_op, mul_ready_op;
  logic dsp_valid_op, dsp_ready_op;
  /* verilator lint_on UNDRIVEN */
  // output handshake
  logic mul_valid, mul_ready;
  logic div_valid, div_ready;
  logic dsp_valid, dsp_ready;
  result_t div, mul, dsp, oup;
  logic illegal_instruction;

  always_comb begin
    mul_valid_op = 1'b0;
    div_valid_op = 1'b0;
    dsp_valid_op = 1'b0;
    acc_qready_o = 1'b0;
    acc_perror_o = 1'b0;
    illegal_instruction = 1'b0;
    unique casez (acc_qdata_op_i)
      riscv_instr::MUL,
      riscv_instr::MULH,
      riscv_instr::MULHSU,
      riscv_instr::MULHU: begin
        if (snitch_pkg::XPULPIMG) begin
          dsp_valid_op = acc_qvalid_i;
          acc_qready_o = dsp_ready_op;
        end else begin
          mul_valid_op = acc_qvalid_i;
          acc_qready_o = mul_ready_op;
        end
      end
      riscv_instr::DIV,
      riscv_instr::DIVU,
      riscv_instr::REM,
      riscv_instr::REMU: begin
        div_valid_op = acc_qvalid_i;
        acc_qready_o = div_ready_op;
      end
      riscv_instr::P_ABS,                 // Xpulpimg: p.abs
      riscv_instr::P_SLET,                // Xpulpimg: p.slet
      riscv_instr::P_SLETU,               // Xpulpimg: p.sletu
      riscv_instr::P_MIN,                 // Xpulpimg: p.min
      riscv_instr::P_MINU,                // Xpulpimg: p.minu
      riscv_instr::P_MAX,                 // Xpulpimg: p.max
      riscv_instr::P_MAXU,                // Xpulpimg: p.maxu
      riscv_instr::P_EXTHS,               // Xpulpimg: p.exths
      riscv_instr::P_EXTHZ,               // Xpulpimg: p.exthz
      riscv_instr::P_EXTBS,               // Xpulpimg: p.extbs
      riscv_instr::P_EXTBZ,               // Xpulpimg: p.extbz
      riscv_instr::P_CLIP,                // Xpulpimg: p.clip
      riscv_instr::P_CLIPU,               // Xpulpimg: p.clipu
      riscv_instr::P_CLIPR,               // Xpulpimg: p.clipr
      riscv_instr::P_CLIPUR,              // Xpulpimg: p.clipur
      riscv_instr::P_MAC,                 // Xpulpimg: p.mac
      riscv_instr::P_MSU,                 // Xpulpimg: p.msu
      riscv_instr::PV_ADD_H,              // Xpulpimg: pv.add.h
      riscv_instr::PV_ADD_SC_H,           // Xpulpimg: pv.add.sc.h
      riscv_instr::PV_ADD_SCI_H,          // Xpulpimg: pv.add.sci.h
      riscv_instr::PV_ADD_B,              // Xpulpimg: pv.add.b
      riscv_instr::PV_ADD_SC_B,           // Xpulpimg: pv.add.sc.b
      riscv_instr::PV_ADD_SCI_B,          // Xpulpimg: pv.add.sci.b
      riscv_instr::PV_SUB_H,              // Xpulpimg: pv.sub.h
      riscv_instr::PV_SUB_SC_H,           // Xpulpimg: pv.sub.sc.h
      riscv_instr::PV_SUB_SCI_H,          // Xpulpimg: pv.sub.sci.h
      riscv_instr::PV_SUB_B,              // Xpulpimg: pv.sub.b
      riscv_instr::PV_SUB_SC_B,           // Xpulpimg: pv.sub.sc.b
      riscv_instr::PV_SUB_SCI_B,          // Xpulpimg: pv.sub.sci.b
      riscv_instr::PV_AVG_H,              // Xpulpimg: pv.avg.h
      riscv_instr::PV_AVG_SC_H,           // Xpulpimg: pv.avg.sc.h
      riscv_instr::PV_AVG_SCI_H,          // Xpulpimg: pv.avg.sci.h
      riscv_instr::PV_AVG_B,              // Xpulpimg: pv.avg.b
      riscv_instr::PV_AVG_SC_B,           // Xpulpimg: pv.avg.sc.b
      riscv_instr::PV_AVG_SCI_B,          // Xpulpimg: pv.avg.sci.b
      riscv_instr::PV_AVGU_H,             // Xpulpimg: pv.avgu.h
      riscv_instr::PV_AVGU_SC_H,          // Xpulpimg: pv.avgu.sc.h
      riscv_instr::PV_AVGU_SCI_H,         // Xpulpimg: pv.avgu.sci.h
      riscv_instr::PV_AVGU_B,             // Xpulpimg: pv.avgu.b
      riscv_instr::PV_AVGU_SC_B,          // Xpulpimg: pv.avgu.sc.b
      riscv_instr::PV_AVGU_SCI_B,         // Xpulpimg: pv.avgu.sci.b
      riscv_instr::PV_MIN_H,              // Xpulpimg: pv.min.h
      riscv_instr::PV_MIN_SC_H,           // Xpulpimg: pv.min.sc.h
      riscv_instr::PV_MIN_SCI_H,          // Xpulpimg: pv.min.sci.h
      riscv_instr::PV_MIN_B,              // Xpulpimg: pv.min.b
      riscv_instr::PV_MIN_SC_B,           // Xpulpimg: pv.min.sc.b
      riscv_instr::PV_MIN_SCI_B,          // Xpulpimg: pv.min.sci.b
      riscv_instr::PV_MINU_H,             // Xpulpimg: pv.minu.h
      riscv_instr::PV_MINU_SC_H,          // Xpulpimg: pv.minu.sc.h
      riscv_instr::PV_MINU_SCI_H,         // Xpulpimg: pv.minu.sci.h
      riscv_instr::PV_MINU_B,             // Xpulpimg: pv.minu.b
      riscv_instr::PV_MINU_SC_B,          // Xpulpimg: pv.minu.sc.b
      riscv_instr::PV_MINU_SCI_B,         // Xpulpimg: pv.minu.sci.b
      riscv_instr::PV_MAX_H,              // Xpulpimg: pv.max.h
      riscv_instr::PV_MAX_SC_H,           // Xpulpimg: pv.max.sc.h
      riscv_instr::PV_MAX_SCI_H,          // Xpulpimg: pv.max.sci.h
      riscv_instr::PV_MAX_B,              // Xpulpimg: pv.max.b
      riscv_instr::PV_MAX_SC_B,           // Xpulpimg: pv.max.sc.b
      riscv_instr::PV_MAX_SCI_B,          // Xpulpimg: pv.max.sci.b
      riscv_instr::PV_MAXU_H,             // Xpulpimg: pv.maxu.h
      riscv_instr::PV_MAXU_SC_H,          // Xpulpimg: pv.maxu.sc.h
      riscv_instr::PV_MAXU_SCI_H,         // Xpulpimg: pv.maxu.sci.h
      riscv_instr::PV_MAXU_B,             // Xpulpimg: pv.maxu.b
      riscv_instr::PV_MAXU_SC_B,          // Xpulpimg: pv.maxu.sc.b
      riscv_instr::PV_MAXU_SCI_B,         // Xpulpimg: pv.maxu.sci.b
      riscv_instr::PV_SRL_H,              // Xpulpimg: pv.srl.h
      riscv_instr::PV_SRL_SC_H,           // Xpulpimg: pv.srl.sc.h
      riscv_instr::PV_SRL_SCI_H,          // Xpulpimg: pv.srl.sci.h
      riscv_instr::PV_SRL_B,              // Xpulpimg: pv.srl.b
      riscv_instr::PV_SRL_SC_B,           // Xpulpimg: pv.srl.sc.b
      riscv_instr::PV_SRL_SCI_B,          // Xpulpimg: pv.srl.sci.b
      riscv_instr::PV_SRA_H,              // Xpulpimg: pv.sra.h
      riscv_instr::PV_SRA_SC_H,           // Xpulpimg: pv.sra.sc.h
      riscv_instr::PV_SRA_SCI_H,          // Xpulpimg: pv.sra.sci.h
      riscv_instr::PV_SRA_B,              // Xpulpimg: pv.sra.b
      riscv_instr::PV_SRA_SC_B,           // Xpulpimg: pv.sra.sc.b
      riscv_instr::PV_SRA_SCI_B,          // Xpulpimg: pv.sra.sci.b
      riscv_instr::PV_SLL_H,              // Xpulpimg: pv.sll.h
      riscv_instr::PV_SLL_SC_H,           // Xpulpimg: pv.sll.sc.h
      riscv_instr::PV_SLL_SCI_H,          // Xpulpimg: pv.sll.sci.h
      riscv_instr::PV_SLL_B,              // Xpulpimg: pv.sll.b
      riscv_instr::PV_SLL_SC_B,           // Xpulpimg: pv.sll.sc.b
      riscv_instr::PV_SLL_SCI_B,          // Xpulpimg: pv.sll.sci.b
      riscv_instr::PV_OR_H,               // Xpulpimg: pv.or.h
      riscv_instr::PV_OR_SC_H,            // Xpulpimg: pv.or.sc.h
      riscv_instr::PV_OR_SCI_H,           // Xpulpimg: pv.or.sci.h
      riscv_instr::PV_OR_B,               // Xpulpimg: pv.or.b
      riscv_instr::PV_OR_SC_B,            // Xpulpimg: pv.or.sc.b
      riscv_instr::PV_OR_SCI_B,           // Xpulpimg: pv.or.sci.b
      riscv_instr::PV_XOR_H,              // Xpulpimg: pv.xor.h
      riscv_instr::PV_XOR_SC_H,           // Xpulpimg: pv.xor.sc.h
      riscv_instr::PV_XOR_SCI_H,          // Xpulpimg: pv.xor.sci.h
      riscv_instr::PV_XOR_B,              // Xpulpimg: pv.xor.b
      riscv_instr::PV_XOR_SC_B,           // Xpulpimg: pv.xor.sc.b
      riscv_instr::PV_XOR_SCI_B,          // Xpulpimg: pv.xor.sci.b
      riscv_instr::PV_AND_H,              // Xpulpimg: pv.and.h
      riscv_instr::PV_AND_SC_H,           // Xpulpimg: pv.and.sc.h
      riscv_instr::PV_AND_SCI_H,          // Xpulpimg: pv.and.sci.h
      riscv_instr::PV_AND_B,              // Xpulpimg: pv.and.b
      riscv_instr::PV_AND_SC_B,           // Xpulpimg: pv.and.sc.b
      riscv_instr::PV_AND_SCI_B,          // Xpulpimg: pv.and.sci.b
      riscv_instr::PV_ABS_H,              // Xpulpimg: pv.abs.h
      riscv_instr::PV_ABS_B,              // Xpulpimg: pv.abs.b
      riscv_instr::PV_EXTRACT_H,          // Xpulpimg: pv.extract.h
      riscv_instr::PV_EXTRACT_B,          // Xpulpimg: pv.extract.b
      riscv_instr::PV_EXTRACTU_H,         // Xpulpimg: pv.extractu.h
      riscv_instr::PV_EXTRACTU_B,         // Xpulpimg: pv.extractu.b
      riscv_instr::PV_INSERT_H,           // Xpulpimg: pv.insert.h
      riscv_instr::PV_INSERT_B,           // Xpulpimg: pv.insert.b
      riscv_instr::PV_DOTUP_H,            // Xpulpimg: pv.dotup.h
      riscv_instr::PV_DOTUP_SC_H,         // Xpulpimg: pv.dotup.sc.h
      riscv_instr::PV_DOTUP_SCI_H,        // Xpulpimg: pv.dotup.sci.h
      riscv_instr::PV_DOTUP_B,            // Xpulpimg: pv.dotup.b
      riscv_instr::PV_DOTUP_SC_B,         // Xpulpimg: pv.dotup.sc.b
      riscv_instr::PV_DOTUP_SCI_B,        // Xpulpimg: pv.dotup.sci.b
      riscv_instr::PV_DOTUSP_H,           // Xpulpimg: pv.dotusp.h
      riscv_instr::PV_DOTUSP_SC_H,        // Xpulpimg: pv.dotusp.sc.h
      riscv_instr::PV_DOTUSP_SCI_H,       // Xpulpimg: pv.dotusp.sci.h
      riscv_instr::PV_DOTUSP_B,           // Xpulpimg: pv.dotusp.b
      riscv_instr::PV_DOTUSP_SC_B,        // Xpulpimg: pv.dotusp.sc.b
      riscv_instr::PV_DOTUSP_SCI_B,       // Xpulpimg: pv.dotusp.sci.b
      riscv_instr::PV_DOTSP_H,            // Xpulpimg: pv.dotsp.h
      riscv_instr::PV_DOTSP_SC_H,         // Xpulpimg: pv.dotsp.sc.h
      riscv_instr::PV_DOTSP_SCI_H,        // Xpulpimg: pv.dotsp.sci.h
      riscv_instr::PV_DOTSP_B,            // Xpulpimg: pv.dotsp.b
      riscv_instr::PV_DOTSP_SC_B,         // Xpulpimg: pv.dotsp.sc.b
      riscv_instr::PV_DOTSP_SCI_B,        // Xpulpimg: pv.dotsp.sci.b
      riscv_instr::PV_SDOTUP_H,           // Xpulpimg: pv.sdotup.h
      riscv_instr::PV_SDOTUP_SC_H,        // Xpulpimg: pv.sdotup.sc.h
      riscv_instr::PV_SDOTUP_SCI_H,       // Xpulpimg: pv.sdotup.sci.h
      riscv_instr::PV_SDOTUP_B,           // Xpulpimg: pv.sdotup.b
      riscv_instr::PV_SDOTUP_SC_B,        // Xpulpimg: pv.sdotup.sc.b
      riscv_instr::PV_SDOTUP_SCI_B,       // Xpulpimg: pv.sdotup.sci.b
      riscv_instr::PV_SDOTUSP_H,          // Xpulpimg: pv.sdotusp.h
      riscv_instr::PV_SDOTUSP_SC_H,       // Xpulpimg: pv.sdotusp.sc.h
      riscv_instr::PV_SDOTUSP_SCI_H,      // Xpulpimg: pv.sdotusp.sci.h
      riscv_instr::PV_SDOTUSP_B,          // Xpulpimg: pv.sdotusp.b
      riscv_instr::PV_SDOTUSP_SC_B,       // Xpulpimg: pv.sdotusp.sc.b
      riscv_instr::PV_SDOTUSP_SCI_B,      // Xpulpimg: pv.sdotusp.sci.b
      riscv_instr::PV_SDOTSP_H,           // Xpulpimg: pv.sdotsp.h
      riscv_instr::PV_SDOTSP_SC_H,        // Xpulpimg: pv.sdotsp.sc.h
      riscv_instr::PV_SDOTSP_SCI_H,       // Xpulpimg: pv.sdotsp.sci.h
      riscv_instr::PV_SDOTSP_B,           // Xpulpimg: pv.sdotsp.b
      riscv_instr::PV_SDOTSP_SC_B,        // Xpulpimg: pv.sdotsp.sc.b
      riscv_instr::PV_SDOTSP_SCI_B,       // Xpulpimg: pv.sdotsp.sci.b
      riscv_instr::PV_SHUFFLE2_H,         // Xpulpimg: pv.shuffle2.h
      riscv_instr::PV_SHUFFLE2_B: begin   // Xpulpimg: pv.shuffle2.b
        if (snitch_pkg::XPULPIMG) begin
          dsp_valid_op = acc_qvalid_i;
          acc_qready_o = dsp_ready_op;
        end else begin
          illegal_instruction = 1'b1;
        end
      end
      default: illegal_instruction = 1'b1;
    endcase
  end

  // Serial Divider
  serdiv #(
      .WIDTH       ( 32      ),
      .IdWidth     ( IdWidth )
  ) i_div (
      .clk_i       ( clk_i                ),
      .rst_ni      ( ~rst_i               ),
      .id_i        ( acc_qid_i              ),
      .operator_i  ( acc_qdata_op_i         ),
      .op_a_i      ( acc_qdata_arga_i       ),
      .op_b_i      ( acc_qdata_argb_i       ),
      .in_vld_i    ( div_valid_op           ),
      .in_rdy_o    ( div_ready_op           ),
      .out_vld_o   ( div_valid              ),
      .out_rdy_i   ( div_ready              ),
      .id_o        ( div.id                 ),
      .res_o       ( div.result             )
  );

  if (snitch_pkg::XPULPIMG) begin : gen_xpulpimg
    // DSP Unit
    dspu #(
        .Width    ( 32      ),
        .IdWidth  ( IdWidth )
    ) i_dspu (
        .clk_i       ( clk_i                ),
        .rst_i       ( rst_i                ),
        .id_i        ( acc_qid_i            ),
        .operator_i  ( acc_qdata_op_i       ),
        .op_a_i      ( acc_qdata_arga_i     ),
        .op_b_i      ( acc_qdata_argb_i     ),
        .op_c_i      ( acc_qdata_argc_i     ),
        .in_valid_i  ( dsp_valid_op         ),
        .in_ready_o  ( dsp_ready_op         ),
        .out_valid_o ( dsp_valid            ),
        .out_ready_i ( dsp_ready            ),
        .id_o        ( dsp.id               ),
        .result_o    ( dsp.result           )
    );
    // Output Arbitration
    stream_arbiter #(
      .DATA_T ( result_t ),
      .N_INP  ( 2        )
    ) i_stream_arbiter (
      .clk_i,
      .rst_ni      ( ~rst_i                 ),
      .inp_data_i  ( {div, dsp}             ),
      .inp_valid_i ( {div_valid, dsp_valid} ),
      .inp_ready_o ( {div_ready, dsp_ready} ),
      .oup_data_o  ( oup                    ),
      .oup_valid_o ( acc_pvalid_o           ),
      .oup_ready_i ( acc_pready_i           )
    );
  end else begin : gen_vanilla
    // Multiplication
    multiplier #(
      .Width    ( 32      ),
      .IdWidth  ( IdWidth )
    ) i_multiplier (
      .clk_i,
      .rst_i,
      .id_i        ( acc_qid_i              ),
      .operator_i  ( acc_qdata_op_i         ),
      .operand_a_i ( acc_qdata_arga_i       ),
      .operand_b_i ( acc_qdata_argb_i       ),
      .valid_i     ( mul_valid_op           ),
      .ready_o     ( mul_ready_op           ),
      .result_o    ( mul.result             ),
      .valid_o     ( mul_valid              ),
      .ready_i     ( mul_ready              ),
      .id_o        ( mul.id                 )
    );
    // Output Arbitration
    stream_arbiter #(
      .DATA_T ( result_t ),
      .N_INP  ( 2        )
    ) i_stream_arbiter (
      .clk_i,
      .rst_ni      ( ~rst_i                 ),
      .inp_data_i  ( {div, mul}             ),
      .inp_valid_i ( {div_valid, mul_valid} ),
      .inp_ready_o ( {div_ready, mul_ready} ),
      .oup_data_o  ( oup                    ),
      .oup_valid_o ( acc_pvalid_o           ),
      .oup_ready_i ( acc_pready_i           )
    );
  end

  assign acc_pdata_o = oup.result;
  assign acc_pid_o = oup.id;
endmodule


module dspu #(
  parameter int unsigned Width = 32,
  parameter int unsigned IdWidth = 5
) (
    input  logic               clk_i,      // unused
    input  logic               rst_i,      // unused
    input  logic [IdWidth-1:0] id_i,
    input  logic [31:0]        operator_i,
    input  logic [Width-1:0]   op_a_i,
    input  logic [Width-1:0]   op_b_i,
    input  logic [Width-1:0]   op_c_i,
    input  logic               in_valid_i,
    output logic               in_ready_o,
    output logic               out_valid_o,
    input  logic               out_ready_i,
    output logic [IdWidth-1:0] id_o,
    output logic [Width-1:0]   result_o
);

  // Control signals
  assign out_valid_o = in_valid_i;
  assign in_ready_o = out_ready_i;
  assign id_o = id_i;

  // Decoded fields
  logic [4:0] imm5;
  logic [5:0] imm6;
  assign imm5 = operator_i[24:20];
  assign imm6 = {operator_i[24:20], operator_i[25]};

  // Internal control signals
  logic cmp_signed;            // comparator operation is signed
  enum logic [1:0] {
    None, Reg, Zero, ClipBound
  } cmp_op_b_sel;              // selection of shared comparator operands
  logic clip_unsigned;         // clip operation has "0" as lower bound
  logic clip_register;         // if 1 clip operation uses rs2, else imm5
  enum logic [1:0] {
    NoMul, MulLow, MulHigh, MulMac
  } mul_op;                    // type of multiplication operation
  logic mac_msu;               // multiplication operation is MSU
  logic mul_op_a_sign;         // sign of multiplier operand a
  logic mac_op_b_sign;         // sign of multiplier operand b
  enum logic [3:0] {
    Nop, Abs, Sle, Min, Max, Exths, Exthz, Extbs, Extbz, Clip, Mac, Simd
  } res_sel;                   // result selection

  enum logic [4:0] {
    SimdNop, SimdAdd, SimdSub, SimdAvg, SimdMin, SimdMax, SimdSrl, SimdSra, SimdSll, SimdOr,
    SimdXor, SimdAnd, SimdAbs, SimdExt, SimdIns, SimdDotp, SimdShuffle
  } simd_op;                   // SIMD operation
  enum logic {
    HalfWord, Byte
  } simd_size;                 // SIMD granularity
  enum logic [1:0] {
    Vect, Sc, Sci
  } simd_mode;                 // SIMD mode
  logic simd_signed;           // SIMD operation is signed and uses sign-extended imm6
  logic simd_dotp_op_a_signed; // signedness of SIMD dotp operand a
  logic simd_dotp_op_b_signed; // signedness of SIMD dotp operand b
  logic simd_dotp_acc;         // accumulate result of SIMD dotp on destination reg

  // --------------------
  // Decoder
  // --------------------

  always_comb begin
    cmp_signed = 1'b1;
    cmp_op_b_sel = None;
    clip_unsigned = 1'b0;
    clip_register = 1'b0;
    mul_op = NoMul;
    mac_msu = 1'b0;
    mul_op_a_sign = 1'b0;
    mac_op_b_sign = 1'b0;
    res_sel = Nop;
    simd_op = SimdNop;
    simd_size = HalfWord;
    simd_mode = Vect;
    simd_signed = 1;
    simd_dotp_op_a_signed = 1;
    simd_dotp_op_b_signed = 1;
    simd_dotp_acc = 0;
    unique casez (operator_i)
      // Multiplications from M extension
      riscv_instr::MUL: begin
        mul_op = MulLow;
        mul_op_a_sign = 1'b1;
        mac_op_b_sign = 1'b1;
        res_sel = Mac;
      end
      riscv_instr::MULH: begin
        mul_op = MulHigh;
        mul_op_a_sign = 1'b1;
        mac_op_b_sign = 1'b1;
        res_sel = Mac;
      end
      riscv_instr::MULHSU: begin
        mul_op = MulHigh;
        mul_op_a_sign = 1'b1;
        res_sel = Mac;
      end
      riscv_instr::MULHU: begin
        mul_op = MulHigh;
        res_sel = Mac;
      end
      // Instructions from Xpulpimg
      riscv_instr::P_ABS: begin
        cmp_op_b_sel = Zero;
        res_sel = Abs;
      end
      riscv_instr::P_SLET: begin
        cmp_op_b_sel = Reg;
        res_sel = Sle;
      end
      riscv_instr::P_SLETU: begin
        cmp_signed = 1'b0;
        cmp_op_b_sel = Reg;
        res_sel = Sle;
      end
      riscv_instr::P_MIN: begin
        cmp_op_b_sel = Reg;
        res_sel = Min;
      end
      riscv_instr::P_MINU: begin
        cmp_signed = 1'b0;
        cmp_op_b_sel = Reg;
        res_sel = Min;
      end
      riscv_instr::P_MAX: begin
        cmp_op_b_sel = Reg;
        res_sel = Max;
      end
      riscv_instr::P_MAXU: begin
        cmp_signed = 1'b0;
        cmp_op_b_sel = Reg;
        res_sel = Max;
      end
      riscv_instr::P_EXTHS: begin
        cmp_op_b_sel = Reg;
        res_sel = Exths;
      end
      riscv_instr::P_EXTHZ: begin
        cmp_op_b_sel = Reg;
        res_sel = Exthz;
      end
      riscv_instr::P_EXTBS: begin
        cmp_op_b_sel = Reg;
        res_sel = Extbs;
      end
      riscv_instr::P_EXTBZ: begin
        cmp_op_b_sel = Reg;
        res_sel = Extbz;
      end
      riscv_instr::P_CLIP: begin
        cmp_op_b_sel = ClipBound;
        res_sel = Clip;
      end
      riscv_instr::P_CLIPU: begin
        clip_unsigned = 1'b1;
        cmp_op_b_sel = ClipBound;
        res_sel = Clip;
      end
      riscv_instr::P_CLIPR: begin
        clip_register = 1'b1;
        cmp_op_b_sel = ClipBound;
        res_sel = Clip;
      end
      riscv_instr::P_CLIPUR: begin
        clip_unsigned = 1'b1;
        clip_register = 1'b1;
        cmp_op_b_sel = ClipBound;
        res_sel = Clip;
      end
      riscv_instr::P_MAC: begin
        mul_op = MulMac;
        mul_op_a_sign = 1'b1;
        mac_op_b_sign = 1'b1;
        res_sel = Mac;
      end
      riscv_instr::P_MSU: begin
        mul_op = MulMac;
        mac_msu = 1'b1;
        mul_op_a_sign = 1'b1;
        mac_op_b_sign = 1'b1;
        res_sel = Mac;
      end
      riscv_instr::PV_ADD_H: begin
        simd_op = SimdAdd;
        res_sel = Simd;
      end
      riscv_instr::PV_ADD_SC_H: begin
        simd_op = SimdAdd;
        simd_mode = Sc;
        res_sel = Simd;
      end
      riscv_instr::PV_ADD_SCI_H: begin
        simd_op = SimdAdd;
        simd_mode = Sci;
        res_sel = Simd;
      end
      riscv_instr::PV_ADD_B: begin
        simd_op = SimdAdd;
        simd_size = Byte;
        res_sel = Simd;
      end
      riscv_instr::PV_ADD_SC_B: begin
        simd_op = SimdAdd;
        simd_size = Byte;
        simd_mode = Sc;
        res_sel = Simd;
      end
      riscv_instr::PV_ADD_SCI_B: begin
        simd_op = SimdAdd;
        simd_size = Byte;
        simd_mode = Sci;
        res_sel = Simd;
      end
      riscv_instr::PV_SUB_H: begin
        simd_op = SimdSub;
        res_sel = Simd;
      end
      riscv_instr::PV_SUB_SC_H: begin
        simd_op = SimdSub;
        simd_mode = Sc;
        res_sel = Simd;
      end
      riscv_instr::PV_SUB_SCI_H: begin
        simd_op = SimdSub;
        simd_mode = Sci;
        res_sel = Simd;
      end
      riscv_instr::PV_SUB_B: begin
        simd_op = SimdSub;
        simd_size = Byte;
        res_sel = Simd;
      end
      riscv_instr::PV_SUB_SC_B: begin
        simd_op = SimdSub;
        simd_size = Byte;
        simd_mode = Sc;
        res_sel = Simd;
      end
      riscv_instr::PV_SUB_SCI_B: begin
        simd_op = SimdSub;
        simd_size = Byte;
        simd_mode = Sci;
        res_sel = Simd;
      end
      riscv_instr::PV_AVG_H: begin
        simd_op = SimdAvg;
        res_sel = Simd;
      end
      riscv_instr::PV_AVG_SC_H: begin
        simd_op = SimdAvg;
        simd_mode = Sc;
        res_sel = Simd;
      end
      riscv_instr::PV_AVG_SCI_H: begin
        simd_op = SimdAvg;
        simd_mode = Sci;
        res_sel = Simd;
      end
      riscv_instr::PV_AVG_B: begin
        simd_op = SimdAvg;
        simd_size = Byte;
        res_sel = Simd;
      end
      riscv_instr::PV_AVG_SC_B: begin
        simd_op = SimdAvg;
        simd_size = Byte;
        simd_mode = Sc;
        res_sel = Simd;
      end
      riscv_instr::PV_AVG_SCI_B: begin
        simd_op = SimdAvg;
        simd_size = Byte;
        simd_mode = Sci;
        res_sel = Simd;
      end
      riscv_instr::PV_AVGU_H: begin
        simd_op = SimdAvg;
        simd_signed = 0;
        res_sel = Simd;
      end
      riscv_instr::PV_AVGU_SC_H: begin
        simd_op = SimdAvg;
        simd_mode = Sc;
        simd_signed = 0;
        res_sel = Simd;
      end
      riscv_instr::PV_AVGU_SCI_H: begin
        simd_op = SimdAvg;
        simd_mode = Sci;
        simd_signed = 0;
        res_sel = Simd;
      end
      riscv_instr::PV_AVGU_B: begin
        simd_op = SimdAvg;
        simd_size = Byte;
        simd_signed = 0;
        res_sel = Simd;
      end
      riscv_instr::PV_AVGU_SC_B: begin
        simd_op = SimdAvg;
        simd_size = Byte;
        simd_mode = Sc;
        simd_signed = 0;
        res_sel = Simd;
      end
      riscv_instr::PV_AVGU_SCI_B: begin
        simd_op = SimdAvg;
        simd_size = Byte;
        simd_mode = Sci;
        simd_signed = 0;
        res_sel = Simd;
      end
      riscv_instr::PV_MIN_H: begin
        simd_op = SimdMin;
        res_sel = Simd;
      end
      riscv_instr::PV_MIN_SC_H: begin
        simd_op = SimdMin;
        simd_mode = Sc;
        res_sel = Simd;
      end
      riscv_instr::PV_MIN_SCI_H: begin
        simd_op = SimdMin;
        simd_mode = Sci;
        res_sel = Simd;
      end
      riscv_instr::PV_MIN_B: begin
        simd_op = SimdMin;
        simd_size = Byte;
        res_sel = Simd;
      end
      riscv_instr::PV_MIN_SC_B: begin
        simd_op = SimdMin;
        simd_size = Byte;
        simd_mode = Sc;
        res_sel = Simd;
      end
      riscv_instr::PV_MIN_SCI_B: begin
        simd_op = SimdMin;
        simd_size = Byte;
        simd_mode = Sci;
        res_sel = Simd;
      end
      riscv_instr::PV_MINU_H: begin
        simd_op = SimdMin;
        simd_signed = 0;
        res_sel = Simd;
      end
      riscv_instr::PV_MINU_SC_H: begin
        simd_op = SimdMin;
        simd_mode = Sc;
        simd_signed = 0;
        res_sel = Simd;
      end
      riscv_instr::PV_MINU_SCI_H: begin
        simd_op = SimdMin;
        simd_mode = Sci;
        simd_signed = 0;
        res_sel = Simd;
      end
      riscv_instr::PV_MINU_B: begin
        simd_op = SimdMin;
        simd_size = Byte;
        simd_signed = 0;
        res_sel = Simd;
      end
      riscv_instr::PV_MINU_SC_B: begin
        simd_op = SimdMin;
        simd_size = Byte;
        simd_mode = Sc;
        simd_signed = 0;
        res_sel = Simd;
      end
      riscv_instr::PV_MINU_SCI_B: begin
        simd_op = SimdMin;
        simd_size = Byte;
        simd_mode = Sci;
        simd_signed = 0;
        res_sel = Simd;
      end
      riscv_instr::PV_MAX_H: begin
        simd_op = SimdMax;
        res_sel = Simd;
      end
      riscv_instr::PV_MAX_SC_H: begin
        simd_op = SimdMax;
        simd_mode = Sc;
        res_sel = Simd;
      end
      riscv_instr::PV_MAX_SCI_H: begin
        simd_op = SimdMax;
        simd_mode = Sci;
        res_sel = Simd;
      end
      riscv_instr::PV_MAX_B: begin
        simd_op = SimdMax;
        simd_size = Byte;
        res_sel = Simd;
      end
      riscv_instr::PV_MAX_SC_B: begin
        simd_op = SimdMax;
        simd_size = Byte;
        simd_mode = Sc;
        res_sel = Simd;
      end
      riscv_instr::PV_MAX_SCI_B: begin
        simd_op = SimdMax;
        simd_size = Byte;
        simd_mode = Sci;
        res_sel = Simd;
      end
      riscv_instr::PV_MAXU_H: begin
        simd_op = SimdMax;
        simd_signed = 0;
        res_sel = Simd;
      end
      riscv_instr::PV_MAXU_SC_H: begin
        simd_op = SimdMax;
        simd_mode = Sc;
        simd_signed = 0;
        res_sel = Simd;
      end
      riscv_instr::PV_MAXU_SCI_H: begin
        simd_op = SimdMax;
        simd_mode = Sci;
        simd_signed = 0;
        res_sel = Simd;
      end
      riscv_instr::PV_MAXU_B: begin
        simd_op = SimdMax;
        simd_size = Byte;
        simd_signed = 0;
        res_sel = Simd;
      end
      riscv_instr::PV_MAXU_SC_B: begin
        simd_op = SimdMax;
        simd_size = Byte;
        simd_mode = Sc;
        simd_signed = 0;
        res_sel = Simd;
      end
      riscv_instr::PV_MAXU_SCI_B: begin
        simd_op = SimdMax;
        simd_size = Byte;
        simd_mode = Sci;
        simd_signed = 0;
        res_sel = Simd;
      end
      riscv_instr::PV_SRL_H: begin
        simd_op = SimdSrl;
        res_sel = Simd;
      end
      riscv_instr::PV_SRL_SC_H: begin
        simd_op = SimdSrl;
        simd_mode = Sc;
        res_sel = Simd;
      end
      riscv_instr::PV_SRL_SCI_H: begin
        simd_op = SimdSrl;
        simd_mode = Sci;
        res_sel = Simd;
      end
      riscv_instr::PV_SRL_B: begin
        simd_op = SimdSrl;
        simd_size = Byte;
        res_sel = Simd;
      end
      riscv_instr::PV_SRL_SC_B: begin
        simd_op = SimdSrl;
        simd_size = Byte;
        simd_mode = Sc;
        res_sel = Simd;
      end
      riscv_instr::PV_SRL_SCI_B: begin
        simd_op = SimdSrl;
        simd_size = Byte;
        simd_mode = Sci;
        res_sel = Simd;
      end
      riscv_instr::PV_SRA_H: begin
        simd_op = SimdSra;
        res_sel = Simd;
      end
      riscv_instr::PV_SRA_SC_H: begin
        simd_op = SimdSra;
        simd_mode = Sc;
        res_sel = Simd;
      end
      riscv_instr::PV_SRA_SCI_H: begin
        simd_op = SimdSra;
        simd_mode = Sci;
        res_sel = Simd;
      end
      riscv_instr::PV_SRA_B: begin
        simd_op = SimdSra;
        simd_size = Byte;
        res_sel = Simd;
      end
      riscv_instr::PV_SRA_SC_B: begin
        simd_op = SimdSra;
        simd_size = Byte;
        simd_mode = Sc;
        res_sel = Simd;
      end
      riscv_instr::PV_SRA_SCI_B: begin
        simd_op = SimdSra;
        simd_size = Byte;
        simd_mode = Sci;
        res_sel = Simd;
      end
      riscv_instr::PV_SLL_H: begin
        simd_op = SimdSll;
        res_sel = Simd;
      end
      riscv_instr::PV_SLL_SC_H: begin
        simd_op = SimdSll;
        simd_mode = Sc;
        res_sel = Simd;
      end
      riscv_instr::PV_SLL_SCI_H: begin
        simd_op = SimdSll;
        simd_mode = Sci;
        res_sel = Simd;
      end
      riscv_instr::PV_SLL_B: begin
        simd_op = SimdSll;
        simd_size = Byte;
        res_sel = Simd;
      end
      riscv_instr::PV_SLL_SC_B: begin
        simd_op = SimdSll;
        simd_size = Byte;
        simd_mode = Sc;
        res_sel = Simd;
      end
      riscv_instr::PV_SLL_SCI_B: begin
        simd_op = SimdSll;
        simd_size = Byte;
        simd_mode = Sci;
        res_sel = Simd;
      end
      riscv_instr::PV_OR_H: begin
        simd_op = SimdOr;
        res_sel = Simd;
      end
      riscv_instr::PV_OR_SC_H: begin
        simd_op = SimdOr;
        simd_mode = Sc;
        res_sel = Simd;
      end
      riscv_instr::PV_OR_SCI_H: begin
        simd_op = SimdOr;
        simd_mode = Sci;
        res_sel = Simd;
      end
      riscv_instr::PV_OR_B: begin
        simd_op = SimdOr;
        simd_size = Byte;
        res_sel = Simd;
      end
      riscv_instr::PV_OR_SC_B: begin
        simd_op = SimdOr;
        simd_size = Byte;
        simd_mode = Sc;
        res_sel = Simd;
      end
      riscv_instr::PV_OR_SCI_B: begin
        simd_op = SimdOr;
        simd_size = Byte;
        simd_mode = Sci;
        res_sel = Simd;
      end
      riscv_instr::PV_XOR_H: begin
        simd_op = SimdXor;
        res_sel = Simd;
      end
      riscv_instr::PV_XOR_SC_H: begin
        simd_op = SimdXor;
        simd_mode = Sc;
        res_sel = Simd;
      end
      riscv_instr::PV_XOR_SCI_H: begin
        simd_op = SimdXor;
        simd_mode = Sci;
        res_sel = Simd;
      end
      riscv_instr::PV_XOR_B: begin
        simd_op = SimdXor;
        simd_size = Byte;
        res_sel = Simd;
      end
      riscv_instr::PV_XOR_SC_B: begin
        simd_op = SimdXor;
        simd_size = Byte;
        simd_mode = Sc;
        res_sel = Simd;
      end
      riscv_instr::PV_XOR_SCI_B: begin
        simd_op = SimdXor;
        simd_size = Byte;
        simd_mode = Sci;
        res_sel = Simd;
      end
      riscv_instr::PV_AND_H: begin
        simd_op = SimdAnd;
        res_sel = Simd;
      end
      riscv_instr::PV_AND_SC_H: begin
        simd_op = SimdAnd;
        simd_mode = Sc;
        res_sel = Simd;
      end
      riscv_instr::PV_AND_SCI_H: begin
        simd_op = SimdAnd;
        simd_mode = Sci;
        res_sel = Simd;
      end
      riscv_instr::PV_AND_B: begin
        simd_op = SimdAnd;
        simd_size = Byte;
        res_sel = Simd;
      end
      riscv_instr::PV_AND_SC_B: begin
        simd_op = SimdAnd;
        simd_size = Byte;
        simd_mode = Sc;
        res_sel = Simd;
      end
      riscv_instr::PV_AND_SCI_B: begin
        simd_op = SimdAnd;
        simd_size = Byte;
        simd_mode = Sci;
        res_sel = Simd;
      end
      riscv_instr::PV_ABS_H: begin
        simd_op = SimdAbs;
        res_sel = Simd;
      end
      riscv_instr::PV_ABS_B: begin
        simd_op = SimdAbs;
        simd_size = Byte;
        res_sel = Simd;
      end
      riscv_instr::PV_EXTRACT_H: begin
        simd_op = SimdExt;
        res_sel = Simd;
      end
      riscv_instr::PV_EXTRACT_B: begin
        simd_op = SimdExt;
        simd_size = Byte;
        res_sel = Simd;
      end
      riscv_instr::PV_EXTRACTU_H: begin
        simd_op = SimdExt;
        simd_signed = 0;
        res_sel = Simd;
      end
      riscv_instr::PV_EXTRACTU_B: begin
        simd_op = SimdExt;
        simd_size = Byte;
        simd_signed = 0;
        res_sel = Simd;
      end
      riscv_instr::PV_INSERT_H: begin
        simd_op = SimdIns;
        res_sel = Simd;
      end
      riscv_instr::PV_INSERT_B: begin
        simd_op = SimdIns;
        simd_size = Byte;
        res_sel = Simd;
      end
      riscv_instr::PV_DOTUP_H: begin
        simd_op = SimdDotp;
        simd_signed = 0;
        simd_dotp_op_a_signed = 0;
        simd_dotp_op_b_signed = 0;
        res_sel = Simd;
      end
      riscv_instr::PV_DOTUP_SC_H: begin
        simd_op = SimdDotp;
        simd_mode = Sc;
        simd_signed = 0;
        simd_dotp_op_a_signed = 0;
        simd_dotp_op_b_signed = 0;
        res_sel = Simd;
      end
      riscv_instr::PV_DOTUP_SCI_H: begin
        simd_op = SimdDotp;
        simd_mode = Sci;
        simd_signed = 0;
        simd_dotp_op_a_signed = 0;
        simd_dotp_op_b_signed = 0;
        res_sel = Simd;
      end
      riscv_instr::PV_DOTUP_B: begin
        simd_op = SimdDotp;
        simd_size = Byte;
        simd_signed = 0;
        simd_dotp_op_a_signed = 0;
        simd_dotp_op_b_signed = 0;
        res_sel = Simd;
      end
      riscv_instr::PV_DOTUP_SC_B: begin
        simd_op = SimdDotp;
        simd_size = Byte;
        simd_mode = Sc;
        simd_signed = 0;
        simd_dotp_op_a_signed = 0;
        simd_dotp_op_b_signed = 0;
        res_sel = Simd;
      end
      riscv_instr::PV_DOTUP_SCI_B: begin
        simd_op = SimdDotp;
        simd_size = Byte;
        simd_mode = Sci;
        simd_signed = 0;
        simd_dotp_op_a_signed = 0;
        simd_dotp_op_b_signed = 0;
        res_sel = Simd;
      end
      riscv_instr::PV_DOTUSP_H: begin
        simd_op = SimdDotp;
        simd_dotp_op_a_signed = 0;
        res_sel = Simd;
      end
      riscv_instr::PV_DOTUSP_SC_H: begin
        simd_op = SimdDotp;
        simd_mode = Sc;
        simd_dotp_op_a_signed = 0;
        res_sel = Simd;
      end
      riscv_instr::PV_DOTUSP_SCI_H: begin
        simd_op = SimdDotp;
        simd_mode = Sci;
        simd_dotp_op_a_signed = 0;
        res_sel = Simd;
      end
      riscv_instr::PV_DOTUSP_B: begin
        simd_op = SimdDotp;
        simd_size = Byte;
        simd_dotp_op_a_signed = 0;
        res_sel = Simd;
      end
      riscv_instr::PV_DOTUSP_SC_B: begin
        simd_op = SimdDotp;
        simd_size = Byte;
        simd_mode = Sc;
        simd_dotp_op_a_signed = 0;
        res_sel = Simd;
      end
      riscv_instr::PV_DOTUSP_SCI_B: begin
        simd_op = SimdDotp;
        simd_size = Byte;
        simd_mode = Sci;
        simd_dotp_op_a_signed = 0;
        res_sel = Simd;
      end
      riscv_instr::PV_DOTSP_H: begin
        simd_op = SimdDotp;
        res_sel = Simd;
      end
      riscv_instr::PV_DOTSP_SC_H: begin
        simd_op = SimdDotp;
        simd_mode = Sc;
        res_sel = Simd;
      end
      riscv_instr::PV_DOTSP_SCI_H: begin
        simd_op = SimdDotp;
        simd_mode = Sci;
        res_sel = Simd;
      end
      riscv_instr::PV_DOTSP_B: begin
        simd_op = SimdDotp;
        simd_size = Byte;
        res_sel = Simd;
      end
      riscv_instr::PV_DOTSP_SC_B: begin
        simd_op = SimdDotp;
        simd_size = Byte;
        simd_mode = Sc;
        res_sel = Simd;
      end
      riscv_instr::PV_DOTSP_SCI_B: begin
        simd_op = SimdDotp;
        simd_size = Byte;
        simd_mode = Sci;
        res_sel = Simd;
      end
      riscv_instr::PV_SDOTUP_H: begin
        simd_op = SimdDotp;
        simd_signed = 0;
        simd_dotp_op_a_signed = 0;
        simd_dotp_op_b_signed = 0;
        simd_dotp_acc = 1;
        res_sel = Simd;
      end
      riscv_instr::PV_SDOTUP_SC_H: begin
        simd_op = SimdDotp;
        simd_mode = Sc;
        simd_signed = 0;
        simd_dotp_op_a_signed = 0;
        simd_dotp_op_b_signed = 0;
        simd_dotp_acc = 1;
        res_sel = Simd;
      end
      riscv_instr::PV_SDOTUP_SCI_H: begin
        simd_op = SimdDotp;
        simd_mode = Sci;
        simd_signed = 0;
        simd_dotp_op_a_signed = 0;
        simd_dotp_op_b_signed = 0;
        simd_dotp_acc = 1;
        res_sel = Simd;
      end
      riscv_instr::PV_SDOTUP_B: begin
        simd_op = SimdDotp;
        simd_size = Byte;
        simd_signed = 0;
        simd_dotp_op_a_signed = 0;
        simd_dotp_op_b_signed = 0;
        simd_dotp_acc = 1;
        res_sel = Simd;
      end
      riscv_instr::PV_SDOTUP_SC_B: begin
        simd_op = SimdDotp;
        simd_size = Byte;
        simd_mode = Sc;
        simd_signed = 0;
        simd_dotp_op_a_signed = 0;
        simd_dotp_op_b_signed = 0;
        simd_dotp_acc = 1;
        res_sel = Simd;
      end
      riscv_instr::PV_SDOTUP_SCI_B: begin
        simd_op = SimdDotp;
        simd_size = Byte;
        simd_mode = Sci;
        simd_signed = 0;
        simd_dotp_op_a_signed = 0;
        simd_dotp_op_b_signed = 0;
        simd_dotp_acc = 1;
        res_sel = Simd;
      end
      riscv_instr::PV_SDOTUSP_H: begin
        simd_op = SimdDotp;
        simd_dotp_op_a_signed = 0;
        simd_dotp_acc = 1;
        res_sel = Simd;
      end
      riscv_instr::PV_SDOTUSP_SC_H: begin
        simd_op = SimdDotp;
        simd_mode = Sc;
        simd_dotp_op_a_signed = 0;
        simd_dotp_acc = 1;
        res_sel = Simd;
      end
      riscv_instr::PV_SDOTUSP_SCI_H: begin
        simd_op = SimdDotp;
        simd_mode = Sci;
        simd_dotp_op_a_signed = 0;
        simd_dotp_acc = 1;
        res_sel = Simd;
      end
      riscv_instr::PV_SDOTUSP_B: begin
        simd_op = SimdDotp;
        simd_size = Byte;
        simd_dotp_op_a_signed = 0;
        simd_dotp_acc = 1;
        res_sel = Simd;
      end
      riscv_instr::PV_SDOTUSP_SC_B: begin
        simd_op = SimdDotp;
        simd_size = Byte;
        simd_mode = Sc;
        simd_dotp_op_a_signed = 0;
        simd_dotp_acc = 1;
        res_sel = Simd;
      end
      riscv_instr::PV_SDOTUSP_SCI_B: begin
        simd_op = SimdDotp;
        simd_size = Byte;
        simd_mode = Sci;
        simd_dotp_op_a_signed = 0;
        simd_dotp_acc = 1;
        res_sel = Simd;
      end
      riscv_instr::PV_SDOTSP_H: begin
        simd_op = SimdDotp;
        simd_dotp_acc = 1;
        res_sel = Simd;
      end
      riscv_instr::PV_SDOTSP_SC_H: begin
        simd_op = SimdDotp;
        simd_mode = Sc;
        simd_dotp_acc = 1;
        res_sel = Simd;
      end
      riscv_instr::PV_SDOTSP_SCI_H: begin
        simd_op = SimdDotp;
        simd_mode = Sci;
        simd_dotp_acc = 1;
        res_sel = Simd;
      end
      riscv_instr::PV_SDOTSP_B: begin
        simd_op = SimdDotp;
        simd_size = Byte;
        simd_dotp_acc = 1;
        res_sel = Simd;
      end
      riscv_instr::PV_SDOTSP_SC_B: begin
        simd_op = SimdDotp;
        simd_size = Byte;
        simd_mode = Sc;
        simd_dotp_acc = 1;
        res_sel = Simd;
      end
      riscv_instr::PV_SDOTSP_SCI_B: begin
        simd_op = SimdDotp;
        simd_size = Byte;
        simd_mode = Sci;
        simd_dotp_acc = 1;
        res_sel = Simd;
      end
      riscv_instr::PV_SHUFFLE2_H: begin
        simd_op = SimdShuffle;
        res_sel = Simd;
      end
      riscv_instr::PV_SHUFFLE2_B: begin
        simd_op = SimdShuffle;
        simd_size = Byte;
        res_sel = Simd;
      end
      default: ;
    endcase
  end

  //  ___    _  _____  _    ___   _  _____  _  _
  // |   \  /_\|_   _|/_\  | _ \ /_\|_   _|| || |
  // | |) |/ _ \ | | / _ \ |  _// _ \ | |  | __ |
  // |___//_/ \_\|_|/_/ \_\|_| /_/ \_\|_|  |_||_|
  //

  // --------------------
  // Clips
  // --------------------
  logic clip_use_n_bound;
  logic [Width-1:0] clip_op_b_n, clip_op_b; // clip lower and upper bounds
  logic [Width-1:0] clip_lower;
  logic [Width-1:0] clip_comp;

  // Generate -2^(imm5-1), 2^(imm5-1)-1 for clip/clipu and -rs2-1, rs2 for clipr, clipur
  assign clip_lower = ({(Width+1){1'b1}} << $unsigned(imm5)) >> 1;
  assign clip_op_b_n = clip_unsigned ? 'b0 : (clip_register ? ~op_b_i : clip_lower);
  assign clip_op_b = clip_register ? op_b_i : ~clip_lower;

  // is 1 when NOT(rs1 >= 0 AND clip_op_b >= 0), i.e. at least one operand is negative
  assign clip_use_n_bound = op_a_i[Width-1] | clip_op_b[Width-1];

  // Select operand to use in comparison for clip operations: clips would need two comparisons
  // to clamp the result between the two bounds; but one comparison is enough if we select the
  // second operand basing on op_a and clip_op_b signs (i.e. rs1 and clip upper bound, being
  // either rs2 or 2^(imm5-1)-1)
  assign clip_comp = clip_use_n_bound ? clip_op_b_n : clip_op_b;

  // --------------------
  // Shared comparator
  // --------------------
  logic [Width-1:0] cmp_op_a, cmp_op_b;
  logic cmp_result;

  // Comparator operand A assignment
  assign cmp_op_a = op_a_i;
  // Comparator operand B assignment
  always_comb begin
    unique case (cmp_op_b_sel)
      Reg: cmp_op_b = op_b_i;
      Zero: cmp_op_b = '0;
      ClipBound: cmp_op_b = clip_comp;
      default: cmp_op_b = '0;
    endcase
  end

  // Instantiate comparator
  assign cmp_result = $signed({cmp_op_a[Width-1] & cmp_signed, cmp_op_a}) <= $signed({cmp_op_b[Width-1] & cmp_signed, cmp_op_b});

  // --------------------
  // Multiplier & acc
  // --------------------

  // 32x32 into 32 bits multiplier & accumulator
  logic [Width-1:0] mul_op_a;
  logic [2*Width-1:0] mul_result;
  logic [Width-1:0] mac_result;

  assign mul_op_a = mac_msu ? -op_a_i : op_a_i; // op_a_i is sign-inverted if mac_msu=1, to have -op_a*op_b

  // 32-bits input, 64-bits output multiplier
  assign mul_result = $signed({mul_op_a[Width-1] & mul_op_a_sign, mul_op_a}) * $signed({op_b_i[Width-1] & mac_op_b_sign, op_b_i});

  always_comb begin
    unique case (mul_op)
      MulLow: mac_result = mul_result[Width-1:0]; // mul, take lowest 32 bits
      MulHigh: mac_result = mul_result[2*Width-1:Width]; // mul high, take highest 32 bits
      MulMac: mac_result = op_c_i + mul_result[Width-1:0]; // accumulate
      default: mac_result = '0;
    endcase
  end

  // --------------------
  // SIMD operations
  // --------------------

  logic [3:0][7:0] simd_op_a, simd_op_b, simd_op_c;
  logic [1:0][7:0] simd_imm;
  logic [3:0][7:0] simd_result;

  // half-word and byte immediate extensions
  always_comb
    if(simd_signed) simd_imm = $signed(imm6);
    else simd_imm = $unsigned(imm6);

  // SIMD operands composition
  always_comb begin
    simd_op_a = 'b0;
    simd_op_b = 'b0;
    simd_op_c = 'b0;
    unique case (simd_size)
      // half-word granularity
      HalfWord:
        for (int i = 0; i < Width/16; i++) begin
          simd_op_a[2*i +: 2] = op_a_i[16*i +: 16]; // operands A are the half-words of op_a_i
          // operands B are the half-words of op_b_i, replicated lowest half-word of op_b_i or replicated 6-bit immediate
          simd_op_b[2*i +: 2] = (simd_mode == Vect) ? op_b_i[16*i +: 16] : ((simd_mode == Sc) ? op_b_i[15:0] : simd_imm);
          simd_op_c[2*i +: 2] = op_c_i[16*i +: 16]; // operands C are the half-words of op_c_i
        end
      // byte granularity
      Byte:
        for (int i = 0; i < Width/8; i++) begin
          simd_op_a[i] = op_a_i[8*i +: 8]; // operands A are the bytes of op_a_i
          // operands B are the bytes of op_b_i, replicated lowest byte of op_b_i or replicated 6-bit immediate
          simd_op_b[i] = (simd_mode == Vect) ? op_b_i[8*i +: 8] : ((simd_mode == Sc) ? op_b_i[7:0] : simd_imm[0]);
          simd_op_c[i] = op_c_i[8*i +: 8]; // operands C are the bytes of op_c_i
        end
      default: ;
    endcase
  end

  // SIMD unit
  always_comb begin
    simd_result = 'b0;
    unique case (simd_size)
      // half-word granularity
      HalfWord: begin
        unique case (simd_op)
          SimdAdd:
            for (int i = 0; i < Width/16; i++)
              simd_result[2*i +: 2] = $signed(simd_op_a[2*i +: 2]) + $signed(simd_op_b[2*i +: 2]);
          SimdSub:
            for (int i = 0; i < Width/16; i++)
              simd_result[2*i +: 2] = $signed(simd_op_a[2*i +: 2]) - $signed(simd_op_b[2*i +: 2]);
          SimdAvg:
            for (int i = 0; i < Width/16; i++) begin
              simd_result[2*i +: 2] = $signed(simd_op_a[2*i +: 2]) + $signed(simd_op_b[2*i +: 2]);
              simd_result[2*i +: 2] = {simd_result[2*i+1][7] & simd_signed, simd_result[2*i +: 2]} >> 1;
            end
          SimdMin:
            for (int i = 0; i < Width/16; i++)
              simd_result[2*i +: 2] = $signed({simd_op_a[2*i+1][7] & simd_signed, simd_op_a[2*i +: 2]}) <=
                                       $signed({simd_op_b[2*i+1][7] & simd_signed, simd_op_b[2*i +: 2]}) ?
                                       simd_op_a[2*i +: 2] : simd_op_b[2*i +: 2];
          SimdMax:
            for (int i = 0; i < Width/16; i++)
              simd_result[2*i +: 2] = $signed({simd_op_a[2*i+1][7] & simd_signed, simd_op_a[2*i +: 2]}) >
                                       $signed({simd_op_b[2*i+1][7] & simd_signed, simd_op_b[2*i +: 2]}) ?
                                       simd_op_a[2*i +: 2] : simd_op_b[2*i +: 2];
          SimdSrl:
            for (int i = 0; i < Width/16; i++)
              simd_result[2*i +: 2] = $unsigned(simd_op_a[2*i +: 2]) >> simd_op_b[2*i][3:0];
          SimdSra:
            for (int i = 0; i < Width/16; i++)
              simd_result[2*i +: 2] = $signed(simd_op_a[2*i +: 2]) >>> simd_op_b[2*i][3:0];
          SimdSll:
            for (int i = 0; i < Width/16; i++)
              simd_result[2*i +: 2] = $unsigned(simd_op_a[2*i +: 2]) << simd_op_b[2*i][3:0];
          SimdOr: simd_result = simd_op_a | simd_op_b;
          SimdXor: simd_result = simd_op_a ^ simd_op_b;
          SimdAnd: simd_result = simd_op_a & simd_op_b;
          SimdAbs:
            for (int i = 0; i < Width/16; i++)
              simd_result[2*i +: 2] = $signed(simd_op_a[2*i +: 2]) > 0 ? simd_op_a[2*i +: 2] : -$signed(simd_op_a[2*i +: 2]);
          SimdExt: begin
            simd_result[1:0] = simd_op_a[2*imm6[0] +: 2];
            // sign- or zero-extend
            simd_result[3:2] = {16{simd_op_a[2*imm6[0]+1][7] & simd_signed}};
          end
          SimdIns: begin
            simd_result = op_c_i;
            simd_result[2*imm6[0] +: 2] = simd_op_a[1:0];
          end
          SimdDotp: begin
            simd_result = op_c_i & {(Width){simd_dotp_acc}}; // accumulate on rd or start from zero
            for (int i = 0; i < Width/16; i++) begin
              simd_result = $signed(simd_result) + $signed({simd_op_a[2*i+1][7] & simd_dotp_op_a_signed, simd_op_a[2*i +: 2]}) *
                                                   $signed({simd_op_b[2*i+1][7] & simd_dotp_op_b_signed, simd_op_b[2*i +: 2]});
            end
          end
          SimdShuffle:
            for (int i = 0; i < Width/16; i++)
              simd_result[2*i +: 2] = simd_op_b[2*i][1] ? simd_op_a[2*simd_op_b[2*i][0] +: 2] : simd_op_c[2*simd_op_b[2*i][0] +: 2];
          default: ;
        endcase
      end
      // byte granularity
      Byte: begin
        unique case (simd_op)
          SimdAdd:
            for (int i = 0; i < Width/8; i++)
              simd_result[i] = $signed(simd_op_a[i]) + $signed(simd_op_b[i]);
          SimdSub:
            for (int i = 0; i < Width/8; i++)
              simd_result[i] = $signed(simd_op_a[i]) - $signed(simd_op_b[i]);
          SimdAvg:
            for (int i = 0; i < Width/8; i++) begin
              simd_result[i] = $signed(simd_op_a[i]) + $signed(simd_op_b[i]);
              simd_result[i] = {simd_result[i][7] & simd_signed, simd_result[i]} >> 1;
            end
          SimdMin:
            for (int i = 0; i < Width/8; i++)
              simd_result[i] = $signed({simd_op_a[i][7] & simd_signed, simd_op_a[i]}) <=
                               $signed({simd_op_b[i][7] & simd_signed, simd_op_b[i]}) ?
                               simd_op_a[i] : simd_op_b[i];
          SimdMax:
            for (int i = 0; i < Width/8; i++)
              simd_result[i] = $signed({simd_op_a[i][7] & simd_signed, simd_op_a[i]}) >
                               $signed({simd_op_b[i][7] & simd_signed, simd_op_b[i]}) ?
                               simd_op_a[i] : simd_op_b[i];
          SimdSrl:
            for (int i = 0; i < Width/8; i++)
              simd_result[i] = $unsigned(simd_op_a[i]) >> simd_op_b[i][2:0];
          SimdSra:
            for (int i = 0; i < Width/8; i++)
              simd_result[i] = $signed(simd_op_a[i]) >>> simd_op_b[i][2:0];
          SimdSll:
            for (int i = 0; i < Width/8; i++)
              simd_result[i] = $unsigned(simd_op_a[i]) << simd_op_b[i][2:0];
          SimdOr: simd_result = simd_op_a | simd_op_b;
          SimdXor: simd_result = simd_op_a ^ simd_op_b;
          SimdAnd: simd_result = simd_op_a & simd_op_b;
          SimdAbs:
            for (int i = 0; i < Width/8; i++)
              simd_result[i] = $signed(simd_op_a[i]) > 0 ? simd_op_a[i] : -$signed(simd_op_a[i]);
          SimdExt: begin
            simd_result[0] = simd_op_a[imm6[1:0]];
            // sign- or zero-extend
            simd_result[3:1] = {24{simd_op_a[imm6[1:0]][7] & simd_signed}};
          end
          SimdIns: begin
            simd_result = op_c_i;
            simd_result[imm6[1:0]] = simd_op_a[0];
          end
          SimdDotp: begin
            simd_result = op_c_i & {(Width){simd_dotp_acc}}; // accumulate on rd or start from zero
            for (int i = 0; i < Width/8; i++)
              simd_result = $signed(simd_result) + $signed({simd_op_a[i][7] & simd_dotp_op_a_signed, simd_op_a[i]}) *
                                                   $signed({simd_op_b[i][7] & simd_dotp_op_b_signed, simd_op_b[i]});
          end
          SimdShuffle:
            for (int i = 0; i < Width/8; i++)
              simd_result[i] = simd_op_b[i][2] ? simd_op_a[simd_op_b[i][1:0]] : simd_op_c[simd_op_b[i][1:0]];
          default: ;
        endcase
      end
      default: ;
    endcase
  end

  // --------------------
  // Result generation
  // --------------------

  always_comb begin
    unique case (res_sel)
      Abs: result_o = cmp_result ? -$signed(op_a_i) : op_a_i;
      Sle: result_o = $unsigned(cmp_result);
      Min: result_o = cmp_result ? op_a_i : op_b_i;
      Max: result_o = ~cmp_result ? op_a_i : op_b_i;
      Exths: result_o = $signed(op_a_i[15:0]);
      Exthz: result_o = $unsigned(op_a_i[15:0]);
      Extbs: result_o = $signed(op_a_i[7:0]);
      Extbz: result_o = $unsigned(op_a_i[7:0]);
      // Select the clip output basing on the result of the comparison and on the signs of the operands:
      // - if rs1 <= clip_comp (i.e. cmp_result = 1)
      //   * if clip_comp=clip_op_b_n (i.e. rs1<0 or clip_op_b<0): rs1 is below the lower boundand since
      //     this check has priority over the others, result_o is clipped to clip_op_b_n
      //   * if clip_comp=clip_op_b (i.e. rs1>=0 and clip_op_b>=0): since rs1<=clip_op_b, then it is
      //     clip_op_b_n < 0 <= rs1 <= clip_op_b thus rs1 is already within the clip bounds
      // - if rs1 > clip_comp (i.e. cmp_result = 0)
      //   * if rs1 < 0: clip_comp=clip_op_b_n because clip_use_n_bound=1; since rs1>clip_op_b_n and
      //     rs1<0 it is clip_op_b_n < rs1 < 0 <= clip_op_b, thus rs1 is already within the clip bounds
      //   * if rs1 >= 0: then clip_comp might be clip_op_b_n or clip_op_b basing on clip_op_b sign;
      //     + if clip_op_b < 0: clip_comp=clip_op_b_n, so rs1>clip_op_b_n but also rs1 >= 0, so it is
      //       clip_op_b < 0 <= clip_op_n <= rs1; then rs1 is not <= clip_ob_n but it is >= clip_op_b,
      //       so result_o is clipped to clip_op_b
      //     + if clip_op_b >= 0: clip_comp=clip_op_b (i.e. rs1>=0 and clip_op_b>=0) and the result must
      //       be clipped to the upper bound since rs1 > clip_op_b
      Clip: result_o = cmp_result ? (clip_use_n_bound ? clip_op_b_n : op_a_i) : (op_a_i[Width-1] ? op_a_i : clip_op_b);
      Mac: result_o = mac_result;
      Simd: result_o = simd_result;
      default: result_o = '0;
    endcase
  end

endmodule