--------------------------------------------------------------------
-- 
-- F-CPU project : behavioural description of the instruction cache
-- (c) Yann Guidon 30 sept. 2000 whygee@f-cpu.org
-- GPL applies.
-- 
--------------------------------------------------------------------
--
-- Disclaimers :
--
-- This behavioural description lets us debug the LRU algorithms.
-- This is a "quick-and-dirty" first attempt and my first VHDL
-- file ever since.... A more acurate and synthetisable version
-- will be done later. This file should compile without too much
-- trouble, but it has not yet been tested : a testbench will come
-- soon. Warning, i'm not a good VHDL coder.
-- 
--------------------------------------------------------------------
--
-- Here are some requirements/specification for the instruction cache :
-- - line width : 32 bytes (256 bits)
-- - number of lines : undetermined, could be as low as 4 for the
--    tests or 256 for a final version. Size doesn't matter, behaviour
--    is more important.
-- - strategy : "true LRU", 1 set/way, to avoid nasty thrashing.
--    this could change in the future and it's up to everyone's taste
--    and need. 2- or 4-way associative may be implemented instead
--    of fully associative.
-- - 1 read and 1 write data ports for simultaneous access
--    of 2 different items -> 1 read and 1 write address buses
-- - 1 "cache hit/miss" output bit.
-- - latency : 1, 2 or 3 cycles. 1 cycle is necessary for the simple
--     tests, 2 cycles might be necessary to speedup the clock in the future.
-- - It should be possible to invalidate/flush a certain cache line
--    if it corresponds to a specified address range. The address masks
--    for this case are not yet implemented.
-- 
--------------------------------------------------------------------
-- 
-- This very implementation of the Icache is composed of three elements :
--  * The LRU stack
--  * The address tags & comparators
--  * The cache lines themselves.
-- Each of them take one cycle to go through.
--
-- Algorithm for the three modes :
-- 
-- Read :
-- Cycle 1) Send the address on the read bus. The address is
-- compared with every valid address tag and the result is
-- a bit vector. This vector is sent to the read lines of the cache,
-- and encoded for the LRU stack. The "hit" signal is sent.
-- Cycle 2) Update the LRU and read the selected cache line.
-- 
-- Write : (fits in 1 cycle)
--  * The LRU stack always outputs the number of the LRU line,
--  so it's "predecoded". it is sent as a bit vector to the
--  write signal of each cache line, and allowed by the general WRITE signal.
--  * In the same time, the LRU has a special update cycle.
--  * The data is sent to the data_in bus of the cache block.
--  * The address is sent to the write address bus of the address tag block.
--  
-- Invalidation :
--  * the invalid signal is sent
--  * the invalidated address is sent to the read address bus of the tag block.
-- 
-- 
-- Because the read takes 2 cycles, there might be conflictual situations.
-- All the conflicts must be tested and delayed before the requested cycle
-- is accepted into the "pipeline". The conflicts are NOT tested yet.
-- Read and invalidation cycles should not collide either : they use some
-- common ressources.
-- 
--------------------------------------------------------------------

-- oh, and i also forgot the "line freeze" (or "lock") feature.


library ieee;
use ieee.std_logic_1164.all;
use IEEE.std_logic_unsigned.all;
use ieee.std_logic_arith.all;
use std.textio.all;
use ieee.std_logic_textio.all;


ENTITY ICache IS
  generic(
    ABWIDTH : INTEGER := 16   ; -- address bus width (this makes a 21-bit physical address space)
    DBWIDTH : INTEGER := 256  ; -- data bus width
    NBLINES : INTEGER := 64  ; -- number of cache lines
    LOGLINES : INTEGER := 6  -- log2(NBLINES)
  );
  PORT(
    Reset, CLK, FlushEn, ReadEn, WriteEn : IN std_logic;
    ICacheHit : INOUT std_logic;
    WriteAddr, ReadAddr : IN std_logic_vector(ABWIDTH-1 downto 0); 
    Din : IN std_logic_vector(DBWIDTH-1 downto 0);
    Dout  : OUT std_logic_vector(DBWIDTH-1 downto 0)
  );
END ICache;

ARCHITECTURE ess1 OF ICache IS
-- the cache block :
  type cache_block_type is array(0 to NBLINES-1) of std_logic_vector(DBWIDTH-1 downto 0);
  signal Icache_block : cache_block_type;

-- the line command signals :
  signal write_signal, read_signal : std_logic_vector (NBLINES-1 downto 0);

-- the address tags :
  type address_tag_type is array(0 to NBLINES-1) of std_logic_vector(ABWIDTH-1 downto 0);
  signal adddress_tag : address_tag_type;
  signal address_valid_tag : std_logic_vector(NBLINES-1 downto 0);

-- the LRU tags :
  type LRU_tag_type is array(0 to NBLINES-1) of std_logic_vector(LOGLINES-1 downto 0);
  signal LRU_tag : LRU_tag_type;
  signal FIFO_input : std_logic_vector(LOGLINES-1 downto 0);

  function init_cst(N:integer) return LRU_tag_type is
    variable t : LRU_tag_type;
  begin
    for i in t'range loop
      t(i):=CONV_STD_LOGIC_VECTOR(i,LOGLINES);
    end loop;
    return t;
  end init_cst;

  constant Cst_LRU_init : LRU_tag_type := init_cst(1);

BEGIN

------------------------------------------------------------------
-- quick and asynchronous reset of the Icache state
------------------------------------------------------------------

Icache_reset : process (Reset)
begin
  if (Reset = '1') then
    write_signal <= (others=>'0');
    read_signal <= (others=>'0');
    address_valid_tag <= (others=>'0');
    LRU_tag <= Cst_LRU_init;
  end if;
end process Icache_reset;


Icache_memory_block : process(clk)
  variable i : integer;
  variable modified : boolean:=false;
  variable t : std_logic_vector(LOGLINES-1 downto 0);
begin
  if (clk = '1') then       -- only on the rising edge

------------------------------------------------------------------
-- Read one Icache line
------------------------------------------------------------------
    for i in 0 to NBLINES-1 loop
      if (read_signal(i)='1') then
        modified:=true;
        exit;
      end if;
    end loop;
        
    if modified=true then
      Dout <= Icache_block (i);
    else
      Dout <= (others=>'Z');
    end if;

------------------------------------------------------------------
-- Write one Icache line
------------------------------------------------------------------
    for i in 0 to NBLINES-1 loop
      if (WriteEn and write_signal(i))='1' then
        Icache_block (i) <= Din;
        exit;
      end if;
    end loop;

  end if;
end process Icache_memory_block;


------------------------------------------------------------------
-- The address lookup table
------------------------------------------------------------------

Icache_address_table : process (clk)
  variable t_read : std_logic_vector (NBLINES-1 downto 0):=(others=>'0');
  variable i : integer;
  variable hit: boolean:=false;
begin
  if (clk = '1') then       -- only on the rising edge

------------------------------------------------------------------
-- search for the same address tag.
------------------------------------------------------------------
    for i in 0 to NBLINES-1 loop
      if ((address_valid_tag(i)='1') and (adddress_tag(i)=ReadAddr))
      then  -- line : found
        if (FlushEn='1') then
          address_valid_tag(i) <= '0';
        else
          if ReadEn='1' then
            hit:=true;
            t_read(i):='1';
          end if;
        end if;
      end if;

-- manage a write enable :
      if (WriteEn and write_signal(i))='1' then
        adddress_tag(i) <= WriteAddr;
      end if;

    end loop;

    write_signal <= t_read;
    if (hit=true) then
      ICacheHit<='1';
    else
      ICacheHit<='0';
    end if;

    FIFO_input<=CONV_STD_LOGIC_VECTOR(i,LOGLINES);
  -- in real life, this is a binary encoder of 64->6 bits.

  end if;
end process Icache_address_table;



Icache_LRU_update : process (clk)
  variable t : std_logic_vector (NBLINES-1 downto 0) := (others=>'0');
  variable u : LRU_tag_type;
  variable i : integer;
  variable j : boolean:=false;
begin
  if (clk = '1') then       -- only on the rising edge

------------------------------------------------------------------
-- Update LRU during write (beware of the priority, it might change)
------------------------------------------------------------------
    if WriteEn='1' then
      -- full rotate :
      u(0):=LRU_tag(NBLINES-1);
      for i in 1 to NBLINES-1 loop
        u(i):=LRU_tag(i-1);
      end loop;
    else
------------------------------------------------------------------
-- Update LRU after read (beware of the collisions, too)
------------------------------------------------------------------
      if ICacheHit='1' then
        for i in NBLINES-1 downto 1 loop
          if LRU_tag(i)=FIFO_input then
            j:=true;
          end if;
          if j=true then
            u(i):=LRU_tag(i-1);
          end if;
-- this is not a great algorithm but it should work for the sims...
        end loop;
        u(0):=FIFO_input;
      end if;
    end if;

    LRU_tag<= u;

    -- generate the write signals from the LRU queue :
    for i in t'range loop
      if LRU_tag(NBLINES-1)=CONV_STD_LOGIC_VECTOR(i,LOGLINES) then
        t(i):='1';
      end if;
    end loop;
    write_signal<=t;

  end if;
end process Icache_LRU_update;


END ess1;

------------------------------------------------------------------
--                                            that's all, folks...
------------------------------------------------------------------
