-- icache.vhdl - instruction cache for the F-CPU
-- Copyright (C) 2000 Michael Riepe <michael@s...>
--
-- This program is free software; you can redistribute it and/or modify
-- it under the terms of the GNU General Public License as published by
-- the Free Software Foundation; either version 2 of the License, or
-- (at your option) any later version.
--
-- This program is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-- GNU General Public License for more details.
--
-- You should have received a copy of the GNU General Public License
-- along with this program; if not, write to the Free Software
-- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

-- $Id: icache.vhdl,v 1.4 2000/10/01 20:52:02 michael Exp $

library IEEE;
use IEEE.std_logic_1164.all;
use IEEE.std_logic_arith.all;   -- gotta get rid of that later
use IEEE.numeric_std.all;

entity ICache is
        generic (
                NBITS : integer := 256; -- cache line size (32 bytes)
                NADDRS : integer := 27; -- number of address bits (32 - 5) => 4 GByte
                NLINES : integer := 64; -- number of cache lines
                NLOG : integer := 6             -- log2(NLINES)
        );
        port (
                -- read port
                Addr : in std_logic_vector(NADDRS-1 downto 0);  -- read address
                Read : in std_logic;    -- read line(addr)
                -- invalidate (mutually exclusive with read, uses Addr)
                Mask : in std_logic_vector(NADDRS-1 downto 0);  -- mask for invalidate
                Inv : in std_logic;             -- invalidate lines(addr, mask)
                -- fill port
                Fill : in std_logic_vector(NLINES-1 downto 0);  -- fill cache line
                Ain : in std_logic_vector(NADDRS-1 downto 0);   -- new tag
                Din : in std_logic_vector(NBITS-1 downto 0);    -- new data
                -- clock & reset
                Clk : in std_logic;             -- global clock
                Rst : in std_logic;             -- asynchronous reset
                -- outputs
                Dout : out std_logic_vector(NBITS-1 downto 0);
                Hit : out std_logic
        );
end ICache;

architecture ICache_arch1 of ICache is
        -- Data types
        type Line_Array is array(NLINES-1 downto 0) of std_logic_vector(NBITS-1 downto 0);
        type Tag_Array is array(NLINES-1 downto 0) of std_logic_vector(NADDRS-1 downto 0);
        type LRU_Array is array(NLINES-1 downto 0) of std_logic_vector(NLOG-1 downto 0);
        -- Cache subunit
        signal data : Line_Array;
        signal tags : Tag_Array;
        signal valid : std_logic_vector(NLINES-1 downto 0);
        signal addr_cmp : Tag_Array;    -- tags(<>) xor Addr
        -- LRU subunit
        signal lru : LRU_Array;
        signal match : std_logic_vector(NLINES-1 downto 0);
        signal enable : std_logic_vector(NLINES-2 downto 0);
        -- LRU control interface
        signal lru_mru : std_logic_vector(NLOG-1 downto 0);
        signal lru_update : std_logic;
begin
        --
        -- Part 1: the LRU subunit
        --

        -- 1a: find the matching register
        process (lru, lru_mru)
                variable i : natural;
        begin
                match <= (others => '0');
                for i in NLINES-1 downto 0 loop
                        if (lru(i) = lru_mru) then
                                match(i) <= '1';
                        end if;
                end loop;
        end process;

        -- 1b: block the rest of the queue
        process (match)
                variable zero : std_logic_vector(NLINES-1 downto 0) := (others=> '0');
                variable i : natural;
        begin
                enable <= (others => '0');
                for i in NLINES-2 downto 0 loop
                        if (match(NLINES-1 downto i+1) = zero(NLINES-1 downto i+1)) then
                                enable(i) <= '1';
                        end if;
                end loop;
        end process;

        -- 1c: shift (and reset) circuitry
        process (Clk, Rst)
                variable i : natural;
        begin
                if (Rst = '1') then
                        -- asynchronous reset
                        for i in NLINES-1 downto 0 loop
                                lru(i) <= conv_std_logic_vector(i, NLOG);
                        end loop;
                elsif Clk'event and (Clk = '1') then
                        -- LRU update
                        if (lru_update = '1') then
                                for i in 0 to NLINES-2 loop
                                        if (enable(i) = '1') then
                                                lru(i) <= lru(i + 1);
                                        end if;
                                end loop;
                                lru(NLINES-1) <= lru_mru;
                        end if;
                end if;
        end process;

        --
        -- Part 2: the cache itself
        --

        -- 2a: compare tags and address
        process (Addr, tags)
                variable i : natural;
        begin
                -- don't combine the bits yet... we need them later
                for i in NLINES-1 downto 0 loop
                        addr_cmp(i) <= Addr xor tags(i);
                end loop;
        end process;

        -- 2b: do all the work
        process (Clk, Rst)
                variable zero : std_logic_vector(NADDRS-1 downto 0) := (others=> '0');
                variable o : std_logic_vector(NBITS-1 downto 0);
                variable h : std_logic;
                variable line : std_logic_vector(NLOG-1 downto 0);
                variable update : std_logic;
                variable i : natural;
        begin
                -- default values
                o := (others => '0');   -- why use 'Z' here?
                h := '0';
                line := (others => '0');
                update := '0';

                -- let the show begin
                if (Rst = '1') then
                        -- asynchronous reset
                        valid <= (others => '0');
                elsif Clk'event and (Clk = '1') then
                        -- Inv has higher priority than Read/Fill
                        if (Inv = '1') then
                                -- invalidate lines(addr, mask)
                                -- to invalidate ALL lines, set Mask to (others => '0')
                                for i in NLINES-1 downto 0 loop
                                        if ((addr_cmp(i) and Mask) = zero) then
                                                valid(i) <= '0';
                                        end if;
                                end loop;
                                -- TODO: stall/abort colliding writes?
                        else
                                -- read
                                if (Read = '1') then
                                        -- find cache line
                                        -- default is least recently used line(cache miss)
                                        line := lru(0);
                                        for i in NLINES-1 downto 0 loop
                                                if (valid(i) = '1') and (addr_cmp(i) = zero) then
                                                        -- cache hit, output data
                                                        o := data(i);
                                                        h := '1';
                                                        -- remember cache linenumber
                                                        line := conv_std_logic_vector(i, NLOG);
                                                        exit;
                                                end if;
                                        end loop;
                                        if (h /= '1') then
                                                -- handle cache miss:
                                                -- invalidate least recently used line
                                                -- (re-validated by fill operation)
                                                valid(to_integer(unsigned(line))) <= '0';
                                                -- TODO: tell memory controller to fetch this line
                                                -- TODO: stall IF/ID unit
                                        end if;
                                        -- update LRU at the next clock cycle
                                        update := '1';
                                        --
                                        -- Note: the LRU is updated after EVERY read, even
                                        -- when a cache miss occurs (and NOT updated when the
                                        -- line is finally filled).  If we didotherwise, the
                                        -- next cache miss might use the same cache line again.
                                        --
                                end if;

                                -- line fill
                                for i in NLINES-1 downto 0 loop
                                        if (Fill(i) = '1') then
                                                -- fill line and re-validate it
                                                tags(i) <= Ain;
                                                data(i) <= Din;
                                                valid(i) <= '1';
                                                exit;
                                        end if;
                                end loop;
                        end if;
                end if;

                -- LRU control
                lru_mru <= line;
                lru_update <= update;

                -- outputs
                Dout <= o;
                Hit <= h;
        end process;
end ICache_arch1;

-- vi: set ts=4 sw=4 : please
