-- imul8.vhdl - F-CPU 8x8-Bit Integer Multiplication Unit
-- Copyright (C) 2000 Michael Riepe <michael@s...>
--
-- This program is free software; you can redistribute it and/or modify
-- it under the terms of the GNU General Public License as published by
-- the Free Software Foundation; either version 2 of the License, or
-- (at your option) any later version.
--
-- This program is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-- GNU General Public License for more details.
--
-- You should have received a copy of the GNU General Public License
-- along with this program; if not, write to the Free Software
-- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

-- $Id: imul8.vhdl,v 1.2 2000/10/11 12:16:16 michael Exp $

library IEEE;
use IEEE.std_logic_1164.all;

--
-- 8xN-bit signed/unsigned multiplier/adder
--
entity Mul8xN is
        generic (
                BWIDTH : natural := 8   -- width of multiplicator (affects output width)
        );
        port (
                -- inputs
                A : in std_ulogic_vector(7 downto 0);
                B : in std_ulogic_vector(BWIDTH-1 downto 0);
                -- optional full-width `add' input
                X : in std_ulogic_vector(BWIDTH+7 downto 0) := (others => '0');
                -- signed/unsigned mode switches
                SignedA, SignedB : in std_ulogic;
                -- full-width output
                Y : out std_ulogic_vector(BWIDTH+7 downto 0)
        );
end Mul8xN;

architecture Arch_1 of Mul8xN is
        type std_ulogic_matrix is array(natural range <>, natural range <>) ofstd_ulogic;

        signal s3 : std_ulogic_matrix(5 downto 0, Y'length-1 downto 0);
        signal s6 : std_ulogic_matrix(1 downto 0, Y'length-1 downto 0);
begin
        stage1 : process (A, B, X, SignedA, SignedB)
                variable s1 : std_ulogic_matrix(10 downto 0, Y'length-1 downto0);
                variable s2 : std_ulogic_matrix(7 downto 0, Y'length-1 downto 0);
                variable Aneg, Bneg : std_ulogic;
        begin
                --
                -- unsigned multiplier bit matrix
                --
                s1 := (others => (others => '0'));
                for j in A'length-1 downto 0 loop
                        for i in B'length-1 downto 0 loop
                                s1(j, j + i) := A(A'low + j) and B(B'low + i);
                        end loop;
                end loop;
                --
                -- additional summand
                --
                for i in X'length-1 downto 0 loop
                        s1(8, i) := X(X'low + i);
                end loop;
                --
                -- subtract B << A'length if A < 0
                --
                Aneg := SignedA and A(A'high);
                for i in B'length-1 downto 0 loop
                        s1(9, A'length + i) := Aneg and not B(B'low + i);
                end loop;
                -- place carry in unused bit(s)
                for i in A'length-1 downto 0 loop
                        s1(9, i) := Aneg;
                end loop;
                s1(1, 0) := Aneg;
                --
                -- subtract A << B'length if B < 0
                --
                Bneg := SignedB and B(B'high);
                for i in A'length-1 downto 0 loop
                        s1(10, B'length + i) := Bneg and not A(A'low + i);
                end loop;
                -- place carry in unused bit
                s1(0, B'length) := Bneg;

                --
                -- first level of wallace tree
                --
                s2 := (others => (others => '0'));
                for i in s2'range(2) loop
                        --
                        -- Note that the tree is slightly irregular.
                        -- This reduces the delay for the last three rows
                        -- and makes room for more sophisticated handling
                        -- of the `X' operand.
                        --
                        s2(0, i) := s1(0, i);
                        s2(1, i) := s1(1, i);
                        s2(2, i) := s1(2, i) xor s1(3, i) xor s1(4, i);
                        s2(4, i) := s1(5, i) xor s1(6, i) xor s1(7, i);
                        s2(6, i) := s1(8, i) xor s1(9, i) xor s1(10, i);
                        if i < s2'high(2) then
                                s2(3, i+1) := (s1(2, i) and s1(3, i))
                                                   or (s1(2, i) and s1(4, i))
                                                   or (s1(3, i) and s1(4, i));
                                s2(5, i+1) := (s1(5, i) and s1(6, i))
                                                   or (s1(5, i) and s1(7, i))
                                                   or (s1(6, i) and s1(7, i));
                                s2(7, i+1) := (s1(8, i) and s1(9, i))
                                                   or (s1(8, i) and s1(10, i))
                                                   or (s1(9, i) and s1(10, i));
                        end if;
                end loop;

                --
                -- second level of wallace tree
                --
                s3 <= (others => (others => '0'));
                for i in s3'range(2) loop
                        s3(0, i) <= s2(0, i) xor s2(1, i) xor s2(2, i);
                        s3(1, i) <= s2(3, i) xor s2(4, i) xor s2(5, i);
                        if i < s3'high(2) then
                                s3(2, i+1) <= (s2(0, i) and s2(1, i))
                                                   or (s2(0, i) and s2(2, i))
                                                   or (s2(1, i) and s2(2, i));
                                s3(3, i+1) <= (s2(3, i) and s2(4, i))
                                                   or (s2(3, i) and s2(5, i))
                                                   or (s2(4, i) and s2(5, i));
                        end if;
                        s3(4, i) <= s2(6, i);
                        s3(5, i) <= s2(7, i);
                end loop;
        end process;

        stage2 : process (s3)
                variable s4 : std_ulogic_matrix(3 downto 0, Y'length-1 downto 0);
                variable s5 : std_ulogic_matrix(2 downto 0, Y'length-1 downto 0);
        begin
                --
                -- third level of wallace tree
                --
                s4 := (others => (others => '0'));
                for i in s4'range(2) loop
                        s4(0, i) := s3(0, i) xor s3(1, i) xor s3(2, i);
                        s4(1, i) := s3(3, i) xor s3(4, i) xor s3(5, i);
                        if i < s4'high(2) then
                                s4(2, i+1) := (s3(0, i) and s3(1, i))
                                                   or (s3(0, i) and s3(2, i))
                                                   or (s3(1, i) and s3(2, i));
                                s4(3, i+1) := (s3(3, i) and s3(4, i))
                                                   or (s3(3, i) and s3(5, i))
                                                   or (s3(4, i) and s3(5, i));
                        end if;
                end loop;

                --
                -- fourth level of wallace tree
                --
                s5 := (others => (others => '0'));
                for i in s5'range(2) loop
                        s5(0, i) := s4(0, i) xor s4(1, i) xor s4(2, i);
                        if i < s5'high(2) then
                                s5(1, i+1) := (s4(0, i) and s4(1, i))
                                                   or (s4(0, i) and s4(2, i))
                                                   or (s4(1, i) and s4(2, i));
                        end if;
                        s5(2, i) := s4(3, i);
                end loop;

                --
                -- fifth level of wallace tree
                --
                s6 <= (others => (others => '0'));
                for i in s6'range(2) loop
                        s6(0, i) <= s5(0, i) xor s5(1, i) xor s5(2, i);
                        if i < s6'high(2) then
                                s6(1, i+1) <= (s5(0, i) and s5(1, i))
                                                   or (s5(0, i) and s5(2, i))
                                                   or (s5(1, i) and s5(2, i));
                        end if;
                end loop;
        end process;

        stage3 : process (s6)
                variable s7, s8 : std_ulogic_vector(Y'length-1 downto 0);
        begin
                --
                -- carry look-ahead adder
                --
                s8(S8'low) := '0';
                for i in s7'low to s7'high loop
                        s7(i) := s6(0, i) xor s6(1, i);
                        if i < s7'high then
                                s8(i+1) := (s6(0, i) and s6(1, i)) or (s7(i) and s8(i));
                        end if;
                end loop;
                Y <= s7 xor s8;
        end process;
end Arch_1;

-- vi: set ts=4 sw=4 : please
