-- rop2.vhdl - F-CPU Logical Operation Unit
-- Copyright (C) 2003 Michael Riepe <michael@stud.uni-hannover.de>
--
-- This program is free software; you can redistribute it and/or modify
-- it under the terms of the GNU General Public License as published by
-- the Free Software Foundation; either version 2 of the License, or
-- (at your option) any later version.
--
-- This program is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-- GNU General Public License for more details.
--
-- You should have received a copy of the GNU General Public License
-- along with this program; if not, write to the Free Software
-- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

-- @(#) $Id: rop2.vhdl,v 1.2 2003/04/21 15:29:19 michael Exp $

library IEEE;
use IEEE.std_logic_1164.all;

use work.Bit_Manipulation.all;

entity Rop2 is
	generic (
		WIDTH : natural := 64
	);
	port (
		-- inputs
		A : in std_ulogic_vector(WIDTH-1 downto 0);
		B : in std_ulogic_vector(WIDTH-1 downto 0);
		C : in std_ulogic_vector(WIDTH-1 downto 0);
		-- function switch:
		--  000 => a and b
		--  001 => a and not b
		--  010 => a xor b
		--  011 => a or b
		--  100 => not (a or b)
		--  101 => a xnor b
		--  110 => a or not b
		--  111 => not (a and b)
		Func : in std_ulogic_vector(2 downto 0);
		-- mode switch:
		--  00 => direct
		--  01 => combine-and
		--  10 => combine-or
		--  11 => mux
		Mode : in std_ulogic_vector(1 downto 0);
		-- SIMD size vector
		U : in std_ulogic_vector(2 downto 0);
		-- clock/reset/Enable inputs (unused)
		Clk : in std_ulogic;
		Rst : in std_ulogic;
		En : in std_ulogic;
	--
		-- normal output
		Y : out std_ulogic_vector(WIDTH-1 downto 0);
		-- bitop output
		Z : out std_ulogic_vector(WIDTH-1 downto 0)
	);
--pragma synthesis_off
begin
	assert (WIDTH >= 64) and (WIDTH mod 64 = 0);
--pragma synthesis_on
end Rop2;

architecture Behave_1 of Rop2 is
begin
	process (A, B, C, U, Func, Mode)
		function decode (A, U : in std_ulogic_vector) return std_ulogic_vector is
			constant L : natural := A'length;
			variable aa : std_ulogic_vector(L-1 downto 0);
			variable na : std_ulogic_vector(L-1 downto 0);
			variable bb : std_ulogic_vector(L-1 downto 0);
			variable cc : std_ulogic_vector(L/8-1 downto 0);
			variable yy : std_ulogic_vector(L-1 downto 0);
			variable uu : std_ulogic_vector(2 downto 0);
		begin
--pragma synthesis_off
			assert L mod 64 = 0;
			assert A'length = L;
			assert U'length = 3;
--pragma synthesis_on
			-- d=0
			aa := A;
			uu := to_X01(U);
			-- d=1
			na := not aa;
			-- d=2
			for i in L/8-1 downto 0 loop
				bb(8*i+0) := na(8*i+2) and na(8*i+1) and na(8*i+0);
				bb(8*i+1) := na(8*i+2) and na(8*i+1) and aa(8*i+0);
				bb(8*i+2) := na(8*i+2) and aa(8*i+1) and na(8*i+0);
				bb(8*i+3) := na(8*i+2) and aa(8*i+1) and aa(8*i+0);
				bb(8*i+4) := aa(8*i+2) and na(8*i+1) and na(8*i+0);
				bb(8*i+5) := aa(8*i+2) and na(8*i+1) and aa(8*i+0);
				bb(8*i+6) := aa(8*i+2) and aa(8*i+1) and na(8*i+0);
				bb(8*i+7) := aa(8*i+2) and aa(8*i+1) and aa(8*i+0);
			end loop;
			-- d=3
			case uu is
				when "000" =>
					cc := (others => '1');
				when "001" =>
					for i in L/16-1 downto 0 loop
						bb(16*i+15 downto 16*i) :=
							vector_duplicate(bb(16*i+7 downto 16*i), 16/8);
						cc(2*i+0) := na(16*i+3);
						cc(2*i+1) := aa(16*i+3);
					end loop;
				when "011" =>
					for i in L/32-1 downto 0 loop
						bb(32*i+31 downto 32*i) :=
							vector_duplicate(bb(32*i+7 downto 32*i), 32/8);
						cc(4*i+0) := na(32*i+4) and na(32*i+3);
						cc(4*i+1) := na(32*i+4) and aa(32*i+3);
						cc(4*i+2) := aa(32*i+4) and na(32*i+3);
						cc(4*i+3) := aa(32*i+4) and aa(32*i+3);
					end loop;
				when "111" =>
					for i in L/64-1 downto 0 loop
						bb(64*i+63 downto 64*i) :=
							vector_duplicate(bb(64*i+7 downto 64*i), 64/8);
						cc(8*i+0) := na(64*i+5) and na(64*i+4) and na(64*i+3);
						cc(8*i+1) := na(64*i+5) and na(64*i+4) and aa(64*i+3);
						cc(8*i+2) := na(64*i+5) and aa(64*i+4) and na(64*i+3);
						cc(8*i+3) := na(64*i+5) and aa(64*i+4) and aa(64*i+3);
						cc(8*i+4) := aa(64*i+5) and na(64*i+4) and na(64*i+3);
						cc(8*i+5) := aa(64*i+5) and na(64*i+4) and aa(64*i+3);
						cc(8*i+6) := aa(64*i+5) and aa(64*i+4) and na(64*i+3);
						cc(8*i+7) := aa(64*i+5) and aa(64*i+4) and aa(64*i+3);
					end loop;
				when others =>
					bb := (others => 'X');
					cc := (others => 'X');
			end case;
			-- d=4
			return bb and bit_duplicate(cc, 8);
		end decode;

		function combine_and (A, U : in std_ulogic_vector) return std_ulogic_vector is
			constant L : natural := A'length;
			variable aa : std_ulogic_vector(L-1 downto 0);
			variable x04 : std_ulogic_vector(WIDTH/4-1 downto 0);
			variable x08 : std_ulogic_vector(WIDTH/8-1 downto 0);
			variable x16 : std_ulogic_vector(WIDTH/16-1 downto 0);
			variable x32 : std_ulogic_vector(WIDTH/32-1 downto 0);
			variable x64 : std_ulogic_vector(WIDTH/64-1 downto 0);
			variable yy : std_ulogic_vector(L-1 downto 0);
			variable uu : std_ulogic_vector(2 downto 0);
		begin
--pragma synthesis_off
			assert L mod 64 = 0;
			assert A'length = L;
			assert U'length = 3;
--pragma synthesis_on
			-- d=0
			aa := A;
			uu := to_X01(U);
			-- d=1 t=1
			for i in L/4-1 downto 0 loop
				x04(i) := aa(4*i+0) and aa(4*i+1)
					  and aa(4*i+2) and aa(4*i+3);
			end loop;
			-- d=2 t=2
			for i in L/16-1 downto 0 loop
				x08(2*i+0) := x04(4*i+0) and x04(4*i+1);
				x08(2*i+1) := x04(4*i+2) and x04(4*i+3);
				x16(1*i+0) := x04(4*i+0) and x04(4*i+1)
						  and x04(4*i+2) and x04(4*i+3);
			end loop;
			-- d=3 t=3
			for i in L/64-1 downto 0 loop
				x32(2*i+0) := x16(4*i+0) and x16(4*i+1);
				x32(2*i+1) := x16(4*i+2) and x16(4*i+3);
				x64(1*i+0) := x16(4*i+0) and x16(4*i+1)
						  and x16(4*i+2) and x16(4*i+3);
			end loop;
			-- d=4 t=4
			case uu is
				when "000" => yy := bit_duplicate(x08, 8);
				when "001" => yy := bit_duplicate(x16, 16);
				when "011" => yy := bit_duplicate(x32, 32);
				when "111" => yy := bit_duplicate(x64, 64);
				when others => yy := (others => 'X');
			end case;
			return yy;
		end combine_and;

		function combine_or (A, U : in std_ulogic_vector) return std_ulogic_vector is
			constant L : natural := A'length;
			variable aa : std_ulogic_vector(L-1 downto 0);
			variable x04 : std_ulogic_vector(WIDTH/4-1 downto 0);
			variable x08 : std_ulogic_vector(WIDTH/8-1 downto 0);
			variable x16 : std_ulogic_vector(WIDTH/16-1 downto 0);
			variable x32 : std_ulogic_vector(WIDTH/32-1 downto 0);
			variable x64 : std_ulogic_vector(WIDTH/64-1 downto 0);
			variable yy : std_ulogic_vector(L-1 downto 0);
			variable uu : std_ulogic_vector(2 downto 0);
		begin
--pragma synthesis_off
			assert L mod 64 = 0;
			assert A'length = L;
			assert U'length = 3;
--pragma synthesis_on
			-- d=0
			aa := A;
			uu := to_X01(U);
			-- d=1 t=1
			for i in L/4-1 downto 0 loop
				x04(i) := aa(4*i+0) or aa(4*i+1)
					   or aa(4*i+2) or aa(4*i+3);
			end loop;
			-- d=2 t=2
			for i in L/16-1 downto 0 loop
				x08(2*i+0) := x04(4*i+0) or x04(4*i+1);
				x08(2*i+1) := x04(4*i+2) or x04(4*i+3);
				x16(1*i+0) := x04(4*i+0) or x04(4*i+1)
						   or x04(4*i+2) or x04(4*i+3);
			end loop;
			-- d=3 t=3
			for i in L/64-1 downto 0 loop
				x32(2*i+0) := x16(4*i+0) or x16(4*i+1);
				x32(2*i+1) := x16(4*i+2) or x16(4*i+3);
				x64(1*i+0) := x16(4*i+0) or x16(4*i+1)
						   or x16(4*i+2) or x16(4*i+3);
			end loop;
			-- d=4 t=4
			case uu is
				when "000" => yy := bit_duplicate(x08, 8);
				when "001" => yy := bit_duplicate(x16, 16);
				when "011" => yy := bit_duplicate(x32, 32);
				when "111" => yy := bit_duplicate(x64, 64);
				when others => yy := (others => 'X');
			end case;
			return yy;
		end combine_or;

		constant c0 : std_ulogic_vector(WIDTH-1 downto 0) := (others => '0');
		constant c1 : std_ulogic_vector(WIDTH-1 downto 0) := (others => '1');

		variable aa : std_ulogic_vector(WIDTH-1 downto 0);
		variable bb : std_ulogic_vector(WIDTH-1 downto 0);
		variable bx : std_ulogic_vector(WIDTH-1 downto 0);
		variable cc : std_ulogic_vector(WIDTH-1 downto 0);
		variable na : std_ulogic_vector(WIDTH-1 downto 0);
		variable s0 : std_ulogic_vector(WIDTH-1 downto 0);
		variable s1 : std_ulogic_vector(WIDTH-1 downto 0);
		variable yy : std_ulogic_vector(WIDTH-1 downto 0);
		variable zz : std_ulogic_vector(WIDTH-1 downto 0);
		variable ca : std_ulogic_vector(WIDTH-1 downto 0);
		variable co : std_ulogic_vector(WIDTH-1 downto 0);
		variable ff : std_ulogic_vector(2 downto 0);
		variable mm : std_ulogic_vector(1 downto 0);
		variable uu : std_ulogic_vector(2 downto 0);
	begin
		-- d=0 t=0
		aa := to_X01(A);
		bb := to_X01(B);
		ff := to_X01(Func);
		mm := to_X01(Mode);
		uu := to_X01(U);

		-- d=1 t=1
		na := not aa;
		case mm is
			when "11" => cc := to_X01(C);
			when others => cc := c0;
		end case;

		-- d=2 t=2
		s0 := (others => 'X');
		s1 := (others => 'X');
		case ff is
			when "000" => s1 := aa; s0 := cc;	-- and
			when "001" => s1 := cc; s0 := aa;	-- andn
			when "010" => s1 := na; s0 := aa;	-- xor
			when "011" => s1 := c1; s0 := aa;	-- or
			when "100" => s1 := c0; s0 := na;	-- nor
			when "101" => s1 := aa; s0 := na;	-- xnor
			when "110" => s1 := aa; s0 := c1;	-- orn
			when "111" => s1 := na; s0 := c1;	-- nand
			when others => null;
		end case;

		-- d=3 t=3
		for i in WIDTH-1 downto 0 loop
			if bb(i) = '1' then
				yy(i) := s1(i);
			else
				yy(i) := s0(i);
			end if;
		end loop;

		-- Note: 6G rule violated, 10T rule ok.
		-- If this is a problem, we need a second stage.
		-- d=8 t=8
		case mm is
			when "01" => yy := combine_and(yy, uu);
			when "10" => yy := combine_or(yy, uu);
			when others => null;
		end case;

		-- regular output
		Y <= yy;

		-- d=4 t=4
		bb := decode(bb, uu);

		-- d=5 t=5
		for i in WIDTH-1 downto 0 loop
			if bb(i) = '1' then
				zz(i) := s1(i);
			else
				zz(i) := s0(i);
			end if;
		end loop;

		-- bitop output
		Z <= zz;
	end process;
end Behave_1;

-- vi: set ts=4 sw=4 equalprg="fmt -72 -p--": please
