-- iadd.vhdl -- F-CPU 64-bit Add/Subtract Unit
-- Copyright (C) 2000, 2001, 2003 Michael Riepe <michael@stud.uni-hannover.de>
--
-- This program is free software; you can redistribute it and/or modify
-- it under the terms of the GNU General Public License as published by
-- the Free Software Foundation; either version 2 of the License, or
-- (at your option) any later version.
--
-- This program is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-- GNU General Public License for more details.
--
-- You should have received a copy of the GNU General Public License
-- along with this program; if not, write to the Free Software
-- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

-- @(#) $Id: iadd.vhdl,v 1.58 2003/04/18 15:48:39 michael Exp $

library IEEE;
use IEEE.std_logic_1164.all;

use work.Bit_Manipulation.all;
use work.Generic_Adder.all;

entity IAdd is
	generic (
		WIDTH : natural := 64;
		PIPELINED : integer := 0
	);
	port (
		-- operand inputs
		A : in std_ulogic_vector(WIDTH-1 downto 0);
		B : in std_ulogic_vector(WIDTH-1 downto 0);
		-- subtract mode enable
		Sub : in std_ulogic;
		-- saturate/floor mode enable
		Sat : in std_ulogic;
		-- signed saturation mode
		Sig : in std_ulogic;
		-- increment (add) or decrement (sub) result by one
		Inc : in std_ulogic;
		-- average mode enable
		Avg : in std_ulogic;
		-- SIMD mode switches
		U : in std_ulogic_vector(2 downto 0);
		-- clock/reset inputs
		Clk : in std_ulogic;
		Rst : in std_ulogic;
		En : in std_ulogic;
	--
		-- 8-bit tap outputs
		Y8l : out std_ulogic_vector(WIDTH-1 downto 0);
		Y8h : out std_ulogic_vector(WIDTH-1 downto 0);
		-- regular outputs
		Yl : out std_ulogic_vector(WIDTH-1 downto 0);
		Yh : out std_ulogic_vector(WIDTH-1 downto 0)
	);
--pragma synthesis_off
begin
	assert (WIDTH >= 64) and (WIDTH mod 64 = 0)
		report "width of IAdd must be an integer multiple of 64"
		severity failure;
--pragma synthesis_on
end IAdd;

--  The IAdd unit is a multi-level carry-select adder with SIMD
--  capabilities.  Its first level calculates 4-bit slices using carry
--  look-ahead; the second and third level are just carry propagate
--  logic and muxes that select the right chunk.  There is also a
--  `tap' output that provides fast 8-bit results for some operations.
--  Subtraction is implemented as `not ((not A) + B)' rather than
--  the usual `A + (not B) + 1' because that makes the saturation
--  modes easier to implement.
--
-- Known limitations:
--
--  1: Not tested exhaustively.
--
--  2: Some 8-bit operations take two clock cycles (see the table below).
--
--  3: subb mode differs from F-CPU manual:  In the examples section,
--  the `borrow' output is set to all 1's (numeric: -1), This unit sets
--  it to `0...01' (numeric: +1).  See the rationale in the code below.
--
-- Operating Modes (`?' means don't care):
--
--  Avg Sat Sig Sub Inc | Yl                    | F-CPU insn
--  ==========================================================
--   ?   0   ?   0   0  | A + B                 | add[c]
--   ?   0   ?   0   1  | A + B + 1             | --- (add1)
--   ?   0   ?   1   0  | A - B                 | sub[b]
--   ?   0   ?   1   1  | A - B - 1             | --- (sub1)
--   ?   1   0   0   0  | usat(A + B)       [*] | addus aka adds
--   ?   1   0   0   1  | usat(A + B + 1)   [*] | --- (addus1)
--   ?   1   0   1   0  | usat(A - B)       [*] | subus aka subf
--   ?   1   0   1   1  | usat(A - B - 1)   [*] | --- (subus1)
--   ?   1   1   0   0  | ssat(A + B)       [*] | addss
--   ?   1   1   0   1  | ssat(A + B + 1)   [*] | --- (addss1)
--   ?   1   1   1   0  | ssat(A - B)       [*] | subss
--   ?   1   1   1   1  | ssat(A - B - 1)   [*] | --- (subss1)
--
--  Avg Sat Sig Sub Inc | Yh                    | F-CPU insn
--  ==========================================================
--   0   ?   ?   0   0  | A + B     >= 2**width | addc
--   0   ?   ?   0   1  | A + B + 1 >= 2**width | --- (addc1)
--   0   ?   ?   1   0  | A u< B                | subb
--   0   ?   ?   1   1  | A u< B + 1            | --- (subb1)
--   1   ?   0   0   0  | (A + B) / 2       [*] | --- (avg)
--   1   ?   0   0   1  | (A + B + 1) / 2   [*] | --- (avg1)
--   1   ?   0   1   0  | (A - B) / 2       [*] | --- (diff)
--   1   ?   0   1   1  | (A - B - 1) / 2   [*] | --- (diff1)
--   1   ?   1   0   0  | (A + B) / 2       [*] | --- (avgs)
--   1   ?   1   0   1  | (A + B + 1) / 2   [*] | --- (avgs1)
--   1   ?   1   1   0  | (A - B) / 2       [*] | --- (diff1)
--   1   ?   1   1   1  | (A - B - 1) / 2   [*] | --- (diffs1)
--
-- [*] Avg and Sat operations always take two cycles
-- Note: Avg and Sat may be connected to the same bit
--
-- SIMD Modes:
--
--  U = "000": 8-bit mode
--  U = "001": 16-bit mode
--  U = "011": 32-bit mode
--  U = "111": 64-bit mode
--  (others combinations are invalid)

architecture Behave_1 of IAdd is
	signal M : std_ulogic_vector(7 downto 0);

	-- pipeline registers
	signal r_Y1, r_Z1 : std_ulogic_vector(WIDTH-1 downto 0);
	signal r_S2, r_T2 : std_ulogic_vector(WIDTH/4-1 downto 0);
	signal r_G2, r_P2 : std_ulogic_vector(WIDTH/16-1 downto 0);
	signal r_C08 : std_ulogic_vector(WIDTH/8-1 downto 0);
	signal r_SatY : std_ulogic_vector(WIDTH/8-1 downto 0);
	signal r_SatZ : std_ulogic_vector(WIDTH/8-1 downto 0);
	signal r_SatBits : std_ulogic_vector(WIDTH/8-1 downto 0);
	signal r_SatSign : std_ulogic_vector(WIDTH/8-1 downto 0);
	signal r_ExtY : std_ulogic_vector(WIDTH/8-1 downto 0);
	signal r_ExtZ : std_ulogic_vector(WIDTH/8-1 downto 0);
	signal r_M : std_ulogic_vector(7 downto 0);
	signal r_En : std_ulogic;

	-- indices for M and r_M vectors
	constant Mode_Sub : natural := 3;
	constant Mode_Sat : natural := 4;
	constant Mode_Sig : natural := 5;
	constant Mode_Inc : natural := 6;
	constant Mode_Avg : natural := 7;
begin
	M <= (
		0 => U(0),
		1 => U(1),
		2 => U(2),
		Mode_Sub => Sub,
		Mode_Sat => Sat,
		Mode_Sig => Sig,
		Mode_Inc => Inc,
		Mode_Avg => Avg
	);

	stage_1 : process (A, B, M, Clk, Rst, En)
		-- signals used by stage 1 exclusively
		variable G0, P0 : std_ulogic_vector(WIDTH-1 downto 0);
		variable Y1, Z1 : std_ulogic_vector(WIDTH-1 downto 0);
		variable S1, T1 : std_ulogic_vector(WIDTH-1 downto 0);
		variable G1, P1 : std_ulogic_vector(WIDTH/4-1 downto 0);
		variable S2, T2 : std_ulogic_vector(WIDTH/4-1 downto 0);
		variable G2, P2 : std_ulogic_vector(WIDTH/16-1 downto 0);
		variable C08 : std_ulogic_vector(WIDTH/8-1 downto 0);
		variable neg, n_A : std_ulogic_vector(WIDTH-1 downto 0);
		variable yh, yl, zl : std_ulogic_vector(WIDTH-1 downto 0);
		variable sat_sign : std_ulogic_vector(WIDTH/8-1 downto 0);
		variable sat_bits : std_ulogic_vector(WIDTH/8-1 downto 0);
		variable sat_y : std_ulogic_vector(WIDTH/8-1 downto 0);
		variable sat_z : std_ulogic_vector(WIDTH/8-1 downto 0);
		variable sat0, sat1, sat2 : std_ulogic;
		variable ext_y : std_ulogic_vector(WIDTH/8-1 downto 0);
		variable ext_z : std_ulogic_vector(WIDTH/8-1 downto 0);
		variable ext1, ext2 : std_ulogic;
	begin
		-- input stage
		-- (half adders with A input inverted by Sub)
		-- d=2 t=4
		neg := (others => M(Mode_Sub));
		n_A := A xor neg;
		P0 := n_A xor B;
		G0 := n_A and B;

		-- first-level carry look-ahead
		-- d=3-4 t=5-6
		CLA(G0, P0, G1, P1);
		CSV(G0, P0, S1, T1);

		-- 4-bit partial results
		-- d=5 t=8
		Y1 := P0 xor S1;	-- n_A + B
		Z1 := P0 xor T1;	-- n_A + B + 1

		-- 8-bit SIMD add/sub tap
		-- d=6
		-- yl := Y1 xor neg
		-- zl := Z1 xor neg
		-- Note the identities:
		--  P0 xor neg = A xor B
		--  Y1 xor neg = (P0 xor S1) xor neg = (A xor B) xor S1
		--  Z1 xor neg = (P0 xor T1) xor neg = (A xor B) xor T1
		-- d=1 t=2
		zl := A xor B;
		-- d=5 t=8
		yl := zl xor S1;
		zl := zl xor T1;
		-- d=6 t=9
		for i in WIDTH/8-1 downto 0 loop
			if to_X01(M(Mode_Inc)) = '1' then
				yl(8*i+3 downto 8*i+0) := zl(8*i+3 downto 8*i+0);
			end if;
			if to_X01(G1(2*i) or (P1(2*i) and M(Mode_Inc))) = '1' then
				yl(8*i+7 downto 8*i+4) := zl(8*i+7 downto 8*i+4);
			end if;
		end loop;
		Y8l <= yl;

		-- 8-bit carry vector
		-- d=6 t=8
		for i in WIDTH/8-1 downto 0 loop
			C08(i) := G1(2*i+1)
				or (P1(2*i+1) and G1(2*i+0))
				or (P1(2*i+1) and P1(2*i+0) and M(Mode_Inc));
		end loop;

		-- 8-bit carry out tap
		-- d=6 t=8
		yh := (others => '0');
		for i in WIDTH/8-1 downto 0 loop
			yh(8*i) := C08(i);
		end loop;
		Y8h <= yh;

		-- 8-bit saturate and extension vectors
		-- d=5 t=7
		for i in WIDTH/8-1 downto 0 loop
			-- saturated result bits
			-- d=2 t=2
			sat_sign(i) := B(8*i+7) or not M(Mode_Sig);
			sat_bits(i) := not (B(8*i+7) and M(Mode_Sig));
			-- d=2 t=3
			sat0 := n_A(8*i+7) or B(8*i+7);
			-- d=3 t=4
			sat1 := M(Mode_Sat) and G0(8*i+7);
			-- d=4 t=6
			sat2 := M(Mode_Sat) and (sat0 xor M(Mode_Sig));
			-- d=4 t=6
			if to_X01(M(Mode_Sig)) = '1' then
				-- d=2 t=3
				ext1 := sat0;
				ext2 := G0(8*i+7);
			else
				-- d=3 t=5
				ext1 := G0(8*i+7) xor M(Mode_Sub);
				ext2 := sat0 xor M(Mode_Sub);
			end if;
			-- d=5 t=7
			if to_X01(S1(8*i+7)) = '1' then
				sat_y(i) := sat2;
				ext_y(i) := ext2;
			else
				sat_y(i) := sat1;
				ext_y(i) := ext1;
			end if;
			if to_X01(T1(8*i+7)) = '1' then
				sat_z(i) := sat2;
				ext_z(i) := ext2;
			else
				sat_z(i) := sat1;
				ext_z(i) := ext1;
			end if;
		end loop;

		-- second-level carry look-ahead
		-- d=6 t=8
		CLA(G1, P1, G2, P2);
		for i in WIDTH/16-1 downto 0 loop
			S2(4*i+0) := '0';
			S2(4*i+1) := G1(4*i+0);
			S2(4*i+2) := (M(0) and G1(4*i+1))
				or (M(0) and P1(4*i+1) and G1(4*i+0));
			S2(4*i+3) := G1(4*i+2)
				or (M(0) and P1(4*i+2) and G1(4*i+1))
				or (M(0) and P1(4*i+2) and P1(4*i+1) and G1(4*i+0));
			T2(4*i+0) := '1';
			T2(4*i+1) := G1(4*i+0) or P1(4*i+0);
			T2(4*i+2) := G1(4*i+1)
				or (P1(4*i+1) and G1(4*i+0))
				or (P1(4*i+1) and P1(4*i+0))
				or (not M(0));
			T2(4*i+3) := (G1(4*i+2) or (P1(4*i+2) and not M(0)))
				or (P1(4*i+2) and G1(4*i+1))
				or (P1(4*i+2) and P1(4*i+1) and G1(4*i+0))
				or (P1(4*i+2) and P1(4*i+1) and P1(4*i+0));
		end loop;

		-- end of first stage
		if PIPELINED = 0 then
			r_Y1 <= Y1;
			r_Z1 <= Z1;
			r_S2 <= S2;
			r_T2 <= T2;
			r_G2 <= G2;
			r_P2 <= P2;
			r_C08 <= C08;
			r_SatY <= sat_y;
			r_SatZ <= sat_z;
			r_SatBits <= sat_bits;
			r_SatSign <= sat_sign;
			r_ExtY <= ext_y;
			r_ExtZ <= ext_z;
			r_M <= M;
			r_En <= En;
		elsif to_X01(Rst) = '1' then
			r_Y1 <= (others => '0');
			r_Z1 <= (others => '0');
			r_S2 <= (others => '0');
			r_T2 <= (others => '0');
			r_G2 <= (others => '0');
			r_P2 <= (others => '0');
			r_C08 <= (others => '0');
			r_SatY <= (others => '0');
			r_SatZ <= (others => '0');
			r_SatBits <= (others => '0');
			r_SatSign <= (others => '0');
			r_ExtY <= (others => '0');
			r_ExtZ <= (others => '0');
			r_M <= (others => '0');
			r_En <= '0';
		elsif rising_edge(Clk) then
			if to_X01(En) = '1' then
				r_Y1 <= Y1;
				r_Z1 <= Z1;
				r_S2 <= S2;
				r_T2 <= T2;
				r_G2 <= G2;
				r_P2 <= P2;
				r_C08 <= C08;
				r_SatY <= sat_y;
				r_SatZ <= sat_z;
				r_SatBits <= sat_bits;
				r_SatSign <= sat_sign;
				r_ExtY <= ext_y;
				r_ExtZ <= ext_z;
				r_M <= M;
			end if;
			r_En <= En;
		end if;
	end process;

	stage_2 : process (r_Y1, r_Z1, r_S2, r_T2, r_G2, r_P2, r_C08,
					   r_SatY, r_SatZ, r_SatBits, r_SatSign,
					   r_ExtY, r_ExtZ,
					   r_M, Clk, Rst, r_En)
		variable Y1, Z1 : std_ulogic_vector(WIDTH-1 downto 0);
		variable Y2, Z2 : std_ulogic_vector(WIDTH-1 downto 0);
		variable S2, T2 : std_ulogic_vector(WIDTH/4-1 downto 0);
		variable G2, P2 : std_ulogic_vector(WIDTH/16-1 downto 0);
		variable S3, I3 : std_ulogic_vector(WIDTH/16-1 downto 0);
		variable G3, P3 : std_ulogic_vector(WIDTH/64-1 downto 0);
		variable cv : std_ulogic_vector(WIDTH/8-1 downto 0);
		variable sv : std_ulogic_vector(WIDTH-1 downto 0);
		variable hv : std_ulogic_vector(WIDTH-1 downto 0);
		variable lv : std_ulogic_vector(WIDTH-1 downto 0);
		variable tv : std_ulogic_vector(WIDTH-1 downto 0);
		variable mm : std_ulogic_vector(1 downto 0);
		variable sat_y : std_ulogic_vector(WIDTH/8-1 downto 0);
		variable sat_z : std_ulogic_vector(WIDTH/8-1 downto 0);
		variable sat_sign : std_ulogic_vector(WIDTH/8-1 downto 0);
		variable sat_bits : std_ulogic_vector(WIDTH/8-1 downto 0);
		variable sat : std_ulogic_vector(WIDTH/8-1 downto 0);
		variable ext_y : std_ulogic_vector(WIDTH/8-1 downto 0);
		variable ext_z : std_ulogic_vector(WIDTH/8-1 downto 0);
		variable ext : std_ulogic_vector(WIDTH/8-1 downto 0);
		variable tmp : std_ulogic;
	begin
		Y1 := r_Y1;
		Z1 := r_Z1;
		S2 := r_S2;
		T2 := r_T2;
		G2 := r_G2;
		P2 := r_P2;
		sat_y := r_SatY;
		sat_z := r_SatZ;
		sat_sign := r_SatSign;
		sat_bits := r_SatBits;
		ext_y := r_ExtY;
		ext_z := r_ExtZ;

		-- 16-bit partial results
		-- d=1 t=1
		for i in WIDTH/4-1 downto 0 loop
			if to_X01(S2(i)) = '1' then
				Y2(4*i+3 downto 4*i) := Z1(4*i+3 downto 4*i);
			else
				Y2(4*i+3 downto 4*i) := Y1(4*i+3 downto 4*i);
			end if;
			if to_X01(T2(i)) = '1' then
				Z2(4*i+3 downto 4*i) := Z1(4*i+3 downto 4*i);
			else
				Z2(4*i+3 downto 4*i) := Y1(4*i+3 downto 4*i);
			end if;
		end loop;

		-- 16-bit ext/sat vectors
		-- d=1 t=1
		for i in WIDTH/8-1 downto 0 loop
			tmp := sat_y(i);
			if to_X01(S2(2*i+1)) = '1' then
				tmp := sat_z(i);
			end if;
			if to_X01(T2(2*i+1)) = '0' then
				sat_z(i) := sat_y(i);
			end if;
			sat_y(i) := tmp;
			tmp := ext_y(i);
			if to_X01(S2(2*i+1)) = '1' then
				tmp := ext_z(i);
			end if;
			if to_X01(T2(2*i+1)) = '0' then
				ext_z(i) := ext_y(i);
			end if;
			ext_y(i) := tmp;
		end loop;

		-- third-level carry look-ahead
		-- d=1-2 t=1-2
		CLA(G2, P2, G3, P3);
		-- d=2 t=2
		for i in WIDTH/64-1 downto 0 loop
			if to_X01(r_M(2)) = '1' then
				I3(4*i+0) := r_M(Mode_Inc);
				I3(4*i+1) := r_M(Mode_Inc) and P2(4*i+0);
				I3(4*i+2) := r_M(Mode_Inc) and P2(4*i+1) and P2(4*i+0);
				I3(4*i+3) := r_M(Mode_Inc) and P2(4*i+2) and P2(4*i+1) and P2(4*i+0);
			elsif to_X01(r_M(1)) = '1' then
				I3(4*i+0) := r_M(Mode_Inc);
				I3(4*i+1) := r_M(Mode_Inc) and P2(4*i+0);
				I3(4*i+2) := r_M(Mode_Inc);
				I3(4*i+3) := r_M(Mode_Inc) and P2(4*i+2);
			else
				I3(4*i+0) := r_M(Mode_Inc);
				I3(4*i+1) := r_M(Mode_Inc);
				I3(4*i+2) := r_M(Mode_Inc);
				I3(4*i+3) := r_M(Mode_Inc);
			end if;
			S3(4*i+0) := '0';
			S3(4*i+1) := (r_M(1) and G2(4*i+0));
			S3(4*i+2) := (r_M(2) and G2(4*i+1))
				or (r_M(2) and P2(4*i+1) and G2(4*i+0));
			S3(4*i+3) := (r_M(1) and G2(4*i+2))
				or (r_M(2) and P2(4*i+2) and G2(4*i+1))
				or (r_M(2) and P2(4*i+2) and P2(4*i+1) and G2(4*i+0));
		end loop;

		-- 64-bit result (after increment, if any)
		-- d=3 t=3
		for i in WIDTH/16-1 downto 0 loop
			if to_X01(S3(i)) = '1' then
				lv(16*i+15 downto 16*i) := Z2(16*i+15 downto 16*i);
				sat(2*i+1 downto 2*i) := sat_z(2*i+1 downto 2*i);
				ext(2*i+1 downto 2*i) := ext_z(2*i+1 downto 2*i);
			elsif to_X01(I3(i)) = '1' then
				lv(16*i+15 downto 16*i) := Z2(16*i+15 downto 16*i);
				sat(2*i+1 downto 2*i) := sat_z(2*i+1 downto 2*i);
				ext(2*i+1 downto 2*i) := ext_z(2*i+1 downto 2*i);
			else
				lv(16*i+15 downto 16*i) := Y2(16*i+15 downto 16*i);
				sat(2*i+1 downto 2*i) := sat_y(2*i+1 downto 2*i);
				ext(2*i+1 downto 2*i) := ext_y(2*i+1 downto 2*i);
			end if;
		end loop;

		-- carry and saturate vectors
		-- d=4 t=4
		if to_X01(r_M(2)) = '1' then
			-- 64 bit
			-- d=3 t=3
			cv := (others => '0');
			for i in WIDTH/64-1 downto 0 loop
				cv(8*i) := G3(i) or (P3(i) and r_M(Mode_Inc));
			end loop;
			-- d=0 t=0
			sv := bit_duplicate(bit_extract(sat_bits, 8, 7), 64);
			for i in WIDTH/64-1 downto 0 loop
				sv(64*i+63) := sat_sign(8*i+7);
			end loop;
			-- d=3 t=3
			sat := bit_duplicate(bit_extract(sat, 8, 7), 8);
		elsif to_X01(r_M(1)) = '1' then
			-- 32 bit
			-- d=2 t=2
			cv := (others => '0');
			for i in WIDTH/32-1 downto 0 loop
				cv(4*i) := G2(2*i+1)
					or (P2(2*i+1) and G2(2*i+0))
					or (P2(2*i+1) and P2(2*i+0) and r_M(Mode_Inc));
			end loop;
			-- d=0 t=0
			sv := bit_duplicate(bit_extract(sat_bits, 4, 3), 32);
			for i in WIDTH/32-1 downto 0 loop
				sv(32*i+31) := sat_sign(4*i+3);
			end loop;
			-- d=3 t=3
			sat := bit_duplicate(bit_extract(sat, 4, 3), 4);
		elsif to_X01(r_M(0)) = '1' then
			-- 16 bit
			-- d=2 t=2
			cv := (others => '0');
			for i in WIDTH/16-1 downto 0 loop
				cv(2*i) := G2(i) or (P2(i) and r_M(Mode_Inc));
			end loop;
			-- d=0 t=0
			sv := bit_duplicate(bit_extract(sat_bits, 2, 1), 16);
			for i in WIDTH/16-1 downto 0 loop
				sv(16*i+15) := sat_sign(2*i+1);
			end loop;
			-- d=3 t=3
			sat := bit_duplicate(bit_extract(sat, 2, 1), 2);
		else
			-- 8 bit
			-- d=0 t=0
			cv := r_C08;
			-- d=0 t=0
			sv := bit_duplicate(bit_extract(sat_bits, 1, 0), 8);
			for i in WIDTH/8-1 downto 0 loop
				sv(8*i+7) := sat_sign(1*i+0);
			end loop;
			-- d=3 t=3
			sat := bit_duplicate(bit_extract(sat, 1, 0), 1);
		end if;

		-- helper vector
		-- d=0 t=0
		tv := (others => r_M(Mode_Sub));

		-- high output vector
		-- d=6 t=7
		if to_X01(r_M(Mode_Avg)) = '1' then
			-- average mode
			-- d=3 t=3
			hv := rshift(lv, 1);
			-- d=4 t=4
			if to_X01(r_M(2)) = '1' then
				for i in WIDTH/64-1 downto 0 loop
					hv(64*i+63) := ext(8*i+7);
				end loop;
			elsif to_X01(r_M(1)) = '1' then
				for i in WIDTH/32-1 downto 0 loop
					hv(32*i+31) := ext(4*i+3);
				end loop;
			elsif to_X01(r_M(0)) = '1' then
				for i in WIDTH/16-1 downto 0 loop
					hv(16*i+15) := ext(2*i+1);
				end loop;
			else
				for i in WIDTH/8-1 downto 0 loop
					hv(8*i+7) := ext(1*i+0);
				end loop;
			end if;
			-- d=5 t=6
			hv := hv xor tv;
		else
			-- carry/borrow mode
			-- d=4 t=4
			hv := (others => '0');
			for i in WIDTH/8-1 downto 0 loop
				hv(8*i) := cv(i);
			end loop;
		end if;

		-- saturate
		-- d=5 t=5
		for i in WIDTH/8-1 downto 0 loop
			if to_X01(sat(i)) = '1' then
				lv(8*i+7 downto 8*i) := sv(8*i+7 downto 8*i);
			end if;
		end loop;

		-- low output vector
		-- d=6 t=7
		lv := lv xor tv;

		-- outputs
		Yh <= hv;
		Yl <= lv;
	end process;
end Behave_1;

-- vi: set ts=4 sw=4 equalprg="fmt -72 -p--": please
