-------------------------------------------------------------------------------
-- register_set.vhdl : the FC0 register set
-- created sun feb 10 11:46:32 GMT 2002 by whygee@f-cpu.org
-- version sun feb 17 06:43:23 GMT 2002 : added synchronous version
--
--------------------------BEGIN-VHDL-LICENCE-----------------------------
-- This program is free software; you can redistribute it and/or modify
-- it under the terms of the GNU General Public License as published by
-- the Free Software Foundation; either version 2 of the License, or
-- (at your option) any later version.
--
-- This program is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-- GNU General Public License for more details.
--
-- You should have received a copy of the GNU General Public License
-- along with this program; if not, write to the Free Software
-- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
---------------------------END-VHDL-LICENCE------------------------------
--
-- This file implements the FC0 register set by connecting several
-- building block and the appropriate signals.
--
-- Though implementing the sign and LSB flags is trivial, the zero and the NaN
-- flags are not yet implemented.
--
-- Clocking :
-- * the read side is purely "combinational" : remember that the input
-- addresses are latched by the instruction fetcher so there is no clock input.
-- * the write side is controlled by the scheduler queue for the address and
-- mask, the data comes from the Xbar. the "clock" is simply the "enable" bit
-- which latches the register on the falling edge. This is why the register set
-- can only read the same cell during the next cycle (the data must be stable).
--
-- New option :
-- synchronous memory blocks are added, in case asynchronous timing is not
-- possible. It's a failsafe solution but certainly consumes more power (clock)
-- and surface (FF instead of transparent latches).
--
--------------------------------------------------------------------------

LIBRARY ieee;
    USE ieee.std_logic_1164.ALL;
LIBRARY work;
    USE work.FCPU_config.ALL;   
    USE work.sram3r2w;
    
entity register_set is
  port (
    R7_clk : in std_ulogic; -- may be used or not, depending on the configuration.

-- read ports :

    R7_read_address_0,
    R7_read_address_1,
    R7_read_address_2 : in std_ulogic_vector(5 downto 0);

    R7_read_port_0,
    R7_read_port_1,
    R7_read_port_2 : inout F_VECTOR; -- it's inout so we can plug
           -- R7_MSB and R7_LSB to the corresponding bits easily.
    R7_MSB,
    R7_LSB,
    R7_zero,
    R7_NaN : out std_ulogic; -- NaN is implemented only when FP exists.

    R7_align : out std_ulogic_vector(LOGMAXSIZE-1 downto 0);
         -- goes to the decoder to check whether the pointer is aligned
         -- against the size specified in the instruction.

-- write ports :

    R7_write_address_0,
    R7_write_address_1 :  in std_ulogic_vector(5 downto 0);

    R7_write_mask_0,
    R7_write_mask_1 : in std_ulogic_vector(MAXSIZE/2 downto 0);
      -- note that the off-by-one is on purpose !

    R7_write_port_0,
    R7_write_port_1 : in F_VECTOR

  );

end register_set;

architecture simple of register_set is
  signal OR_slice1, OR_slice2, out_slice, tmp_slice : std_ulogic_vector(MAXSIZE/2 downto 0);
  signal bypass_0, bypass_1 : std_ulogic;

begin  -- simple

-- instanciates the slices :

  -- first come the 2 low bytes :
  byte_0 : entity sram3r2w
    generic map (
      width => 8 )
    port map (
      clk        => R7_clk,
      AdrRead1   => R7_read_address_0,
      AdrRead2   => R7_read_address_1,
      AdrRead3   => R7_read_address_2,
      AdrWrite1  => R7_write_address_0,
      AdrWrite2  => R7_write_address_1,
      DataWrite1 => R7_write_port_0(7 downto 0),
      DataWrite2 => R7_write_port_1(7 downto 0),
      WriteEn1   => R7_write_mask_0(0),
      WriteEn2   => R7_write_mask_1(0),
      DataRead1  => R7_read_port_0(7 downto 0),
      DataRead2  => R7_read_port_1(7 downto 0),
      DataRead3  => R7_read_port_2(7 downto 0));

  OR_slice1(0) <= '1' when (R7_write_port_0(7 downto 0) /= (others=>'0')) else '0';
  OR_slice2(0) <= '1' when (R7_write_port_1(7 downto 0) /= (others=>'0')) else '0';

  byte_1 : entity sram3r2w
    generic map (
      width => 8 )
    port map (
      clk        => R7_clk,
      AdrRead1   => R7_read_address_0,
      AdrRead2   => R7_read_address_1,
      AdrRead3   => R7_read_address_2,
      AdrWrite1  => R7_write_address_0,
      AdrWrite2  => R7_write_address_1,
      DataWrite1 => R7_write_port_0(15 downto 8),
      DataWrite2 => R7_write_port_1(15 downto 8),
      WriteEn1   => R7_write_mask_0(1),
      WriteEn2   => R7_write_mask_1(1),
      DataRead1  => R7_read_port_0(15 downto 8),
      DataRead2  => R7_read_port_1(15 downto 8),
      DataRead3  => R7_read_port_2(15 downto 8));

  OR_slice1(1) <= '1' when (R7_write_port_0(15 downto 8) /= (others=>'0')) else '0';
  OR_slice2(1) <= '1' when (R7_write_port_1(15 downto 8) /= (others=>'0')) else '0';

  
  -- then come the remaining 16-bit slices :
  loop_word: for word in 1 to (MAXSIZE/2)-1 generate
    word_N : entity sram3r2w
    generic map (
      width => 16 )
    port map (
      clk        => R7_clk,
      AdrRead1   => R7_read_address_0,
      AdrRead2   => R7_read_address_1,
      AdrRead3   => R7_read_address_2,
      AdrWrite1  => R7_write_address_0,
      AdrWrite2  => R7_write_address_1,
      DataWrite1 => R7_write_port_0((word*16)+15 downto (word*16)),
      -- spans from 16-31 to 48-63 (for LOGMAXSIZE = 3)
      DataWrite2 => R7_write_port_1((word*16)+15 downto (word*16)),
      WriteEn1   => R7_write_mask_0(word+1),  -- spans from 2 to MAXSIZE/2
      WriteEn2   => R7_write_mask_1(word+1),
      DataRead1  => R7_read_port_0((word*16)+15 downto (word*16)),
      DataRead2  => R7_read_port_1((word*16)+15 downto (word*16)),
      DataRead3  => R7_read_port_2((word*16)+15 downto (word*16)));

    OR_slice1(word+1) <= '1'
       when (R7_write_port_0((word*16)+15 downto (word*16)) /= (others=>'0'))
       else '0';
    OR_slice2(word+1) <= '1'
       when (R7_write_port_1((word*16)+15 downto (word*16)) /= (others=>'0'))
       else '0';

  end generate loop_word;

  -- the zero flags :

  zero_block : entity sram1r2w
    generic map (
      width => (MAXSIZE/2)+1 )
    port map (
      clk        => R7_clk,
      AdrRead    => R7_read_address_0,
      AdrWrite1  => R7_write_address_0,
      AdrWrite2  => R7_write_address_1,
      DataWrite1 => OR_slice1,
      DataWrite2 => OR_slice2,
      WriteEn1   => R7_write_mask_0,
      WriteEn2   => R7_write_mask_1,
      DataRead   => out_slice);

 ------------------------------------------------------------------------------
 -- version with transparent latches : no bypass to worry about.
 ------------------------------------------------------------------------------
  R7_zero <= '1' when (out_slice /= (others=>'0')) else '0';

 ------------------------------------------------------------------------------
 -- version with flip-flops : a bypass net is necessary, and the collision
 -- detection is necessary (takes some time). this ends with a 3->1 mux.
 ------------------------------------------------------------------------------
  bypass_0 <= '1' when (R7_read_address_0 = R7_write_address_0) else '0';
  bypass_1 <= '1' when (R7_read_address_0 = R7_write_address_1) else '0';
  tmp_out  <= (out_slice and not (bypass_0 or bypass_1))  hhhhhhhhhhhhhhhhhhhhhhhhhhhh
           or (OR_slice1 and (others=>bypass_0) and R7_write_mask_0)
           or (OR_slice2 and (others=>bypass_1) and R7_write_mask_1);
  R7_zero  <= '1' when (tmp_slice /= (others=>'0')) else '0';
  
  -- and finally, the flags :

  R7_LSB <= R7_read_port_0(0);
  R7_MSB <= R7_read_port_0(63);   -- should that really be 63 ?...
  R7_align <= R7_read_port_0(LOGMAXSIZE-1 downto 0);
  
end simple;
