--------------------------------------------------------------------------
-- f-cpu/vhdl/eu_rop2/rop2_unit.vhdl - ROP2 Execution Unit for the F-CPU
-- Copyright (C) 2000-2002 Yann GUIDON (whygee@f-cpu.org)
--
-- v0.2: Michael Riepe reorganized the main for-generate loop
-- + corrected the lookup table (wrong op for ORN)
-- v0.3: YG replaced UMAX/8 with MAXSIZE :-)
-- v0.4: 11/17/2000, YG wants to rewrite the unit with MR's gate library ...
--  -> abandonned. we stick to high-level coding.
-- v0.5: 8/12/2001, YG modifies the interface, the names, adds MUX,...
-- Sun Aug 12 01:16:11 2001: still untested but it includes
-- the latest updates to the FC0 core.
-- Tue Aug 21 08:45:16 2001: trying to make something that works reasonably.
-- Mon Sep  3 08:49:45 2001: YG fixed some silly compile bugs :-/
-- vanillaHDL script and testbench added.
-- Sun Oct  7 05:39:23 2001: changed ROP2 function to MUX4
-- Mon Oct  8 01:39:45 2001: merged SELECT with the FANOUT loop.
-- sam nov 24 04:40:35 2001: cleanup
-- jeu nov 29 20:15:19 2001: off-by-one corrected in the interface vector size.
-- lun dec 10 01:51:17 2001: corrected the MUX input order to correctly map ROP2 
-- Fri Jun 28 05:51:44 2002: problem with ncsim : reverting to older with-select strategy.
--                               also removing several fanout stuffs.
-- Tue Jul 23 05:59:24 2002: YG's higher level version
--
--------------------------BEGIN-VHDL-LICENSE-----------------------------
-- This program is free software; you can redistribute it and/or modify
-- it under the terms of the GNU General Public License as published by
-- the Free Software Foundation; either version 2 of the License, or
-- (at your option) any later version.
--
-- This program is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-- GNU General Public License for more details.
--
-- You should have received a copy of the GNU General Public License
-- along with this program; if not, write to the Free Software
-- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
---------------------------END-VHDL-LICENSE------------------------------
--
-- It should be easily synthetizable but it is not attempted yet.
-- What matters most today is that it compiles and behaves correctly.
-- Warning : this code is and should remain purely combinatorial,
-- there is no latching here, it must be done at another level.
-- Furthermore, the function lookup table is now moved earlier
-- in the pipeline, in parallel with the Xbar cycle : look at the
-- f-cpu/vhdl/eu_rop2/rop2_xbar.vhdl file
-- The big fanout problems (propagation of the opcode from 1 to 64 bits)
-- overlaps the Xbar cycle so we can make a nice "signal tree".
-- Finally, only byte combines are possible yet. The COMBINE
-- instruction is still not completely specified in the manual.
--
-- Note : rop2.eps explains the trick to relieve the fanout (1->8) problem.
--------------------------------------------------------------------------

LIBRARY ieee;
    USE ieee.std_logic_1164.ALL;
    USE ieee.numeric_std.all;
LIBRARY work;
    USE work.FCPU_config.ALL;

Entity EU_ROP2 is
  port(
    ROP2_in_A,
    ROP2_in_B,
    ROP2_in_C : in F_VECTOR;    -- the 3 operands
    ROP2_function_bit0,
    ROP2_function_bit1,   -- pre-buffered boolean function bits
    ROP2_function_bit2,
    ROP2_function_bit3 : in Std_ulogic_vector((MAXSIZE *2)-1 downto 0); -- fanout=4
    ROP2_mode : in Std_ulogic_vector(1 downto 0);  -- 2 function bits from the instruction
--   Combine_size : in Std_ulogic_vector(1 downto 0); -- unused ATM. Byte chuncks only.
    ROP2_out     : out F_VECTOR     -- the result
  );
end EU_ROP2;

Architecture arch1 of EU_ROP2 is
begin

--------------------------------------------------------------------------
-- ROP2 cycle : (combinational part only)
--------------------------------------------------------------------------

  rop2_unit: process (ROP2_in_A, ROP2_in_B, ROP2_in_C,
    ROP2_function_bit0, ROP2_function_bit1, ROP2_function_bit2,
    ROP2_function_bit3, ROP2_mode)

    variable partial_ROP, partial_MUX, ROP2_tmp : F_VECTOR;  -- the partial results.

    -- these are partial results for the combine fanout problem :
    variable partial_OR1, partial_AND1,
             partial_OR2, partial_AND2 : Std_ulogic_vector( MAXSIZE   -1 downto 0);
    variable partial_OR,  partial_AND  : Std_ulogic_vector((MAXSIZE*2)-1 downto 0);

  begin

    L1_loop : for i in F_RANGE loop

      -- the MUX
      if ROP2_in_C(i) = '1' then
        partial_MUX(i) := ROP2_in_B(i);
      else
        partial_MUX(i) := ROP2_in_A(i);
      end if;

      -- the ROP2 mux
      if ROP2_in_A(i) = '1' then
        if ROP2_in_B(i) = '1' then
          partial_ROP(i) := ROP2_function_bit0(i/4);
        else
          partial_ROP(i) := ROP2_function_bit1(i/4);
        end if;
      else
        if ROP2_in_B(i) = '1' then
          partial_ROP(i) := ROP2_function_bit2(i/4);
        else
          partial_ROP(i) := ROP2_function_bit3(i/4);
        end if;
      end if;

    end loop;


    L2_loop : for i in MAXSIZE-1 downto 0 loop
      partial_AND1(i) := partial_ROP(8*i)
                     and partial_ROP(8*i+1)
                     and partial_ROP(8*i+2)
                     and partial_ROP(8*i+3);
      partial_AND2(i) := partial_ROP(8*i+4)
                     and partial_ROP(8*i+5)
                     and partial_ROP(8*i+6)
                     and partial_ROP(8*i+7);
      partial_OR1(i)  := partial_ROP(8*i)
                      or partial_ROP(8*i+1)
                      or partial_ROP(8*i+2)
                      or partial_ROP(8*i+3);
      partial_OR2(i)  := partial_ROP(8*i+4)
                      or partial_ROP(8*i+5)
                      or partial_ROP(8*i+6)
                      or partial_ROP(8*i+7);

      partial_AND(i*2)   := partial_AND1(i) and partial_AND2(i);
      partial_AND(i*2+1) := partial_AND1(i) and partial_AND2(i);
      partial_OR(i*2)    := partial_OR1(i)  or  partial_OR2(i);
      partial_OR(i*2+1)  := partial_OR1(i)  or  partial_OR2(i);
    end loop;

-- YG> I'm still uncertain about the best way to write a multi-size version
-- YG> because the latency might explode the ROP2 unit's performance.
-- YG> So the multi-size version is dropped until it becomes necessary.
-- YG> Let's stick to plain bytes...


    L3_loop : for i in F_RANGE loop
      -- final selection stage :
      case ROP2_mode is
        when ROP2_DIRECT_MODE => ROP2_tmp(i) := partial_ROP(i);
        when ROP2_AND_MODE    => ROP2_tmp(i) := partial_AND(i/4);
        when ROP2_OR_MODE     => ROP2_tmp(i) := partial_OR (i/4);
        when others           => ROP2_tmp(i) := partial_MUX(i); -- ROP2_MUX_MODE and unknowns
      end case;
    end loop;


    ROP2_out <= ROP2_tmp;
  end process;

end;
