--------------------------------------------------------------------
-- 
-- F-CPU project
-- (c) Yann Guidon 29 sept. 2000 whygee@f-cpu.org
-- GPL applies.
-- 
--------------------------------------------------------------------
-- 
-- Warning, i'm not a good VHDL coder.
-- 
--------------------------------------------------------------------
--
-- Here are some requirements/specification for the instruction cache :
-- - line width : 32 bytes (256 bits)
-- - number of lines : undetermined, could be as low as 4 for the
--    tests or 256 for a final version. Size doesn't matter, behaviour
--    is more important.
-- - strategy : "true LRU", 1 set/way, to avoid nasty thrashing.
--    this could change in the future and it's up to everyone's taste
--    and need. 2- or 4-way associative may be implemented instead
--    of fully associative.
-- - 1 read and 1 write data ports for simultaneous access
--    of 2 different items -> 1 read and 1 write address buses
-- - 1 "cache hit/miss" output bit.
-- - latency : 1, 2 or 3 cycles. 1 cycle is necessary for the simple
--     tests, 2 cycles might be necessary to speedup the clock in the future.
-- - It should be possible to invalidate/flush a certain cache line
--    if it corresponds to a specified address range. The address masks
--    for this case are not yet implemented.
-- 
--------------------------------------------------------------------
-- 
-- This very implementation of the Icache is composed of three elements :
--  * The LRU stack
--  * The address tags & comparators
--  * The cache lines themselves.
-- Each of them take one cycle to go through.
--
-- Algorithm for the three modes :
-- 
-- Read :
-- Cycle 1) Send the address on the read bus. The address is
-- compared with every valid address tag and the result is
-- a bit vector. This vector is sent to the read lines of the cache,
-- and encoded for the LRU stack. The "hit" signal is sent.
-- Cycle 2) Update the LRU and read the selected cache line.
-- 
-- Write : (fits in 1 cycle)
--  * The LRU stack always outputs the number of the LRU line,
--  so it's "predecoded". it is sent as a bit vector to the
--  write signal of each cache line, and allowed by the general WRITE signal.
--  * In the same time, the LRU has a special update cycle.
--  * The data is sent to the data_in bus of the cache block.
--  * The address is sent to the write address bus of the address tag block.
--  
-- Invalidation :
--  * the invalid signal is sent
--  * the invalidated address is sent to the read address bus of the tag block.
-- 
-- 
-- Because the read takes 2 cycles, there might be conflictual situations.
-- All the conflicts must be tested and delayed before the requested cycle
-- is accepted into the "pipeline". The conflicts are NOT tested yet.
-- Read and invalidation cycles should not collide either : they use some
-- common ressources.
-- 
--------------------------------------------------------------------

library ieee;
use ieee.std_logic_1164.all;
use IEEE.std_logic_unsigned.all;

ENTITY ICache IS
  generic(
    ABWIDTH : INTEGER := 16   ; -- address bus width (this makes a 21-bit physical address space)
    DBWIDTH : INTEGER := 256  ; -- data bus width
    NBLINES : INTEGER := 64  ; -- number of cache lines
    LOGLINES : INTEGER := 6  -- log2(NBLINES)
  );
  PORT(
    CLK, FlushEn, ReadEn, WriteEn : IN std_logic;
    ICacheHit : OUT std_logic;
    WriteAddr, ReadAddr : IN std_logic_vector(ABWIDTH-1 downto 0); 
    Din : IN std_logic_vector(DBWIDTH-1 downto 0);
    Dout  : OUT std_logic_vector(DBWIDTH-1 downto 0)
  );
END ICache;

-- i'm still working below this line :

ARCHITECTURE ess1 OF ICache IS
  type cache_block_type is array(0 to NBLINES-1) of std_logic_vector(DBWIDTH-1 downto 0);
  signal Icache_block : cache_block_type;
BEGIN

cache_lookup : process(clk) begin
  if (ReadEn='1') and (ReadAddr<NBLINES) then
    Dout <= Icache_block (CONV_INTEGER(ReadAddr));
  end if;
end process cache_lookup;

END ess1;
