-----------------------------------------------------------------------
-- f-cpu/vhdl/scheduler/scheduler_slot.vhdl
-- Describes one SQ slot of the FC0
-- Copyright (C) 2001 Yann GUIDON (whygee@f-cpu.org)
-- created lun dec 17 21:03:25 GMT 2001 by whygee@f-cpu.org
--
--------------------------BEGIN-VHDL-LICENCE-----------------------------
-- This program is free software; you can redistribute it and/or modify
-- it under the terms of the GNU General Public License as published by
-- the Free Software Foundation; either version 2 of the License, or
-- (at your option) any later version.
--
-- This program is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-- GNU General Public License for more details.
--
-- You should have received a copy of the GNU General Public License
-- along with this program; if not, write to the Free Software
-- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
---------------------------END-VHDL-LICENCE------------------------------
--
-- First revision : let's see if it works :-)
--
-- warning : this unit spans over 2 clock cycles and pipeline stages !
--
--
-----------------------------------------------------------------------


LIBRARY ieee;
    USE ieee.std_logic_1164.ALL;
    USE ieee.numeric_std.all;
LIBRARY work;
    USE work.scheduler_definitions.ALL;
    USE work.scheduler_slot;


entity scheduler is
  port (  -- still incomplete !
    clk, reset   : in  Std_ulogic;
    instruction  : in  Std_ulogic_vector(31 downto 0);
    --
    -- missing : Xbar mux control lines
    issue_out : out Std_ulogic); -- warning : issue_out can encounter some timing troubles.
end scheduler;

architecture first_try of scheduler is
  -- the following signals connect the slots to each others :
  type t_SQ_slot_array is array (SQ_depth-1 downto 0) of t_SQ_slot;
  signal slot_in, slot_out : t_SQ_slot_array;
  signal R1_busy, R2_busy, R3_busy, R4_busy : Std_ulogic_vector(SQ_depth-1 downto 0);
  signal R1_is_busy, R2_is_busy, R3_is_busy, R4_is_busy, issue : Std_ulogic;
  signal old_R3, old_R2 : Std_ulogic_vector(5 downto 0);

  signal table_out,old_table : t_dec_interface;

  -- declares the component :
  component scheduler_slot is
  port (
    clk, reset       : in  Std_ulogic;
    data_in          : in  t_SQ_slot;
    R1, R2, R3       : in  Std_ulogic_vector(5 downto 0);
    --
    data_out         : out t_SQ_slot;
    R1_busy, R2_busy,
    R3_busy, R4_busy : out Std_ulogic);
  end component;

  component decoding_table is
  port(
    F_IN : in Std_ulogic_vector(31 downto 0);  -- the instruction that comes from the fetcher
    interface : out t_dec_interface
  );
  end component;

begin  -- first_try

-------------------------------------------------------------------------------
--  These parts belong to both cycles :
-------------------------------------------------------------------------------

  -- instanciates the scheduling queue :
  queue: for i in SQ_depth-1 downto 0 generate
      cut : component scheduler_slot
    port map (
      clk => clk,   -- fanout ...
      reset => reset,  -- fanout ...
      data_in => slot_in(i),
      R1 => instruction(17 downto 12),
      R2 => instruction(11 downto 6),   -- fanout, too ...
      R3 => instruction(5 downto 0),
      data_out => slot_out(i),
      R1_busy => R1_busy(i),
      R2_busy => R2_busy(i),
      R3_busy => R3_busy(i),
      R4_busy => R4_busy(i)
    );
    -- we have to implement the MUXES here !
  end generate queue;  

  slot_in <= (others =>slot_cleared);  -- will be modified ... think about the IDU !

  -- pipelines the register values :
  old_register: process (clk)
  begin  -- process old_register
    if rising_edge(clk) and (issue='1') then
      old_table <= table_out;    -- not all bits are useful here. i'll sort them later ...
      old_R2 <= instruction(11 downto 6);  -- used when updating a pointer
      old_R3 <= instruction(5 downto 0);   -- R4 is R3 xor "000001"
      -- the PC should also propagate here, too, but it's too early for this now.
    end if;
  end process old_register;
  

-------------------------------------------------------------------------------
--  These parts belong to the first cycle :
-------------------------------------------------------------------------------

  -- instanciates the decoding table :
  table : component decoding_table
  port map (
    F_IN => instruction,
    interface => table_out
  );


  --
  --
  
  -- combine all the "register busy" bits :
    -- WARNING ! there is certainly an off-by-one
    -- because we don't need to check the last queue stages !!!!!!!!!!!!
  R1_is_busy <= '1' when (R1_busy(R1_busy'range) /= (others=>'0'))
                else '0';
  R2_is_busy <= '1' when (R2_busy /= (others=>'0'))
                else '0';
  R3_is_busy <= '1' when (R3_busy /= (others=>'0'))
                else '0';
  R4_is_busy <= '1' when (R4_busy /= (others=>'0'))
                else '0';
-- these bits should be "latched" for use in the next cycle.



-------------------------------------------------------------------------------
--  These parts belong to the second cycle :
-------------------------------------------------------------------------------

  -- choose a slot
  

  -- The last, big, beautiful, unbelievable combination :
  issue <= not (
    -- 1) check the register hazard :
         (R1_is_busy_ and (table_out.requires_src1))
      or (R2_is_busy_ and (table_out.requires_src2 ))
      or (R3_is_busy_ and (table_out.requires_src3 ))
      or (R4_is_busy_ and (table_out.requires_src1 ))

    -- 2) check the write slot hazards :

    -- 3) check pointer hazards :

    -- 4) check conditions :

    -- to be continued
  );

  

end first_try;
