Skip to content

Instantly share code, notes, and snippets.

@MikeRomaa
Created April 21, 2025 18:54
Show Gist options
  • Save MikeRomaa/1d67ff9138831cabc4469564399dc8ed to your computer and use it in GitHub Desktop.
Save MikeRomaa/1d67ff9138831cabc4469564399dc8ed to your computer and use it in GitHub Desktop.
Floating point VHDL

Floating Point VHDL

Running in ModelSim

  1. Compile float_adder.vhdl
  2. Open the float_adder component from the work library
  3. Add /float_adder/x, /float_adder/y, and /float_adder/z to the waveform viewer

Test Cases

Enter the commands listed under "inputs" into the console to set up each test case. Then run the simulation and verify that the value of /float_adder/z matches the expected output.

Normal Addition

Inputs

  • x = 0 10000001 111 1100 0000 0000 0000 0000 = 7.875
  • y = 0 01111100 100 0000 0000 0000 0000 0000 = 0.1875
force -freeze sim:/float_adder/x 01000000111111000000000000000000 0
force -freeze sim:/float_adder/y 00111110010000000000000000000000 0

Output

  • z = 0 10000010 000 0001 0000 0000 0000 0000 = 8.0625

NaN

Inputs

  • x = 0 11111111 100 0000 0000 0000 0000 0000 = NaN
  • y = 0 11111111 100 0000 0000 0000 0000 0000 = NaN
force -freeze sim:/float_adder/x 01111111110000000000000000000000 0
force -freeze sim:/float_adder/y 01111111110000000000000000000000 0

Output

  • z = 1 11111111 111 1111 1111 1111 1111 1111 = NaN

+Infinity

Inputs

  • x = 0 11111111 000 0000 0000 0000 0000 0000 = Infinity
  • y = 0 10000000 000 0000 0000 0000 0000 0000 = 2.0
force -freeze sim:/float_adder/x 01111111100000000000000000000000 0
force -freeze sim:/float_adder/y 01000000000000000000000000000000 0

Output

  • z = 0 11111111 000 0000 0000 0000 0000 0000 = Infinity

-Infinity

Inputs

  • x = 1 11111111 000 0000 0000 0000 0000 0000 = -Infinity
  • y = 0 10000000 000 0000 0000 0000 0000 0000 = 2.0
force -freeze sim:/float_adder/x 11111111100000000000000000000000 0
force -freeze sim:/float_adder/y 01000000000000000000000000000000 0

Output

  • z = 1 11111111 000 0000 0000 0000 0000 0000 = -Infinity
library IEEE;
use IEEE.STD_LOGIC_1164.all;
use IEEE.NUMERIC_STD.all;
entity FLOAT_ADDER is
port (
x : in STD_LOGIC_VECTOR(31 downto 0);
y : in STD_LOGIC_VECTOR(31 downto 0);
z : out STD_LOGIC_VECTOR(31 downto 0)
);
end FLOAT_ADDER;
architecture DATAFLOW of FLOAT_ADDER is
-- Inputs
signal x_sign : STD_LOGIC := '0';
signal y_sign : STD_LOGIC := '0';
signal x_exponent : INTEGER := 0;
signal y_exponent : INTEGER := 0;
signal x_mantissa : UNSIGNED(22 downto 0) := (others => '0');
signal y_mantissa : UNSIGNED(22 downto 0) := (others => '0');
-- Outputs
signal z_sign : STD_LOGIC := '0';
signal z_exponent : STD_LOGIC_VECTOR(7 downto 0) := (others => '0');
signal z_mantissa : STD_LOGIC_VECTOR(22 downto 0) := (others => '0');
-- Temporaries
signal tmp_x_mantissa : UNSIGNED(24 downto 0) := (others => '0');
signal tmp_y_mantissa : UNSIGNED(24 downto 0) := (others => '0');
signal tmp_exponent : INTEGER := 0;
signal tmp_mantissa : UNSIGNED(24 downto 0) := (others => '0');
begin
x_sign <= x(31);
x_exponent <= TO_INTEGER(UNSIGNED(x(30 downto 23))) - 127;
x_mantissa <= UNSIGNED(x(22 downto 0));
y_sign <= y(31);
y_exponent <= TO_INTEGER(UNSIGNED(y(30 downto 23))) - 127;
y_mantissa <= UNSIGNED(y(22 downto 0));
process(x_sign, x_exponent, x_mantissa, y_sign, y_exponent, y_mantissa)
variable x_inf : BOOLEAN;
variable x_nan : BOOLEAN;
variable y_inf : BOOLEAN;
variable y_nan : BOOLEAN;
variable exponent_diff : INTEGER;
variable result_set : BOOLEAN := FALSE;
begin
-- First step is to deal with special inputs such as NaN and infinity
x_inf := x_exponent = 128 and TO_INTEGER(x_mantissa) = 0;
x_nan := x_exponent = 128 and not x_inf;
y_inf := y_exponent = 128 and TO_INTEGER(y_mantissa) = 0;
y_nan := y_exponent = 128 and not y_inf;
-- Addition with any NaN value returns NaN
-- Addition of two infinities with different signs also returns NaN
-- There are many different NaN values, but we use one with all bits set to `1`
if x_nan or y_nan or (x_inf and y_inf and (x_sign /= y_sign)) then
z_sign <= '1';
z_exponent <= (others => '1');
z_mantissa <= (others => '1');
result_set := TRUE;
end if;
-- Addition with an infinity returns that same signed-infinity
-- Note that the case of differently-signed infinities was handled earlier
if not result_set and x_inf then
z_sign <= x_sign;
z_exponent <= (others => '1');
z_mantissa <= (others => '0');
result_set := TRUE;
end if;
if not result_set and y_inf then
z_sign <= y_sign;
z_exponent <= (others => '1');
z_mantissa <= (others => '0');
result_set := TRUE;
end if;
-- Next we must make the two inputs to have the same exponent
-- The number with a smaller exponent is shifted to the right
if not result_set then
tmp_x_mantissa(24) <= '0';
if x_exponent = -127 then
-- Denormalized
tmp_x_mantissa(23) <= '0';
else
-- Normalized
tmp_x_mantissa(23) <= '1';
end if;
tmp_x_mantissa(22 downto 0) <= x_mantissa;
tmp_y_mantissa(24) <= '0';
if y_exponent = -127 then
-- Denormalized
tmp_y_mantissa(23) <= '0';
else
-- Normalized
tmp_y_mantissa(23) <= '1';
end if;
tmp_y_mantissa(22 downto 0) <= y_mantissa;
if x_exponent < y_exponent then
-- x has a smaller exponent and needs to be shifted
tmp_exponent <= y_exponent;
tmp_x_mantissa <= tmp_x_mantissa srl NATURAL(y_exponent - x_exponent);
else
-- y has a smaller (or equal) exponent and (may) need to be shifted
tmp_exponent <= x_exponent;
tmp_y_mantissa <= tmp_y_mantissa srl NATURAL(x_exponent - y_exponent);
end if;
-- Next we add the mantissas together
if x_sign = y_sign then
z_sign <= x_sign;
tmp_mantissa <= tmp_x_mantissa + tmp_y_mantissa;
else
if tmp_x_mantissa > tmp_y_mantissa then
z_sign <= x_sign;
tmp_mantissa <= tmp_x_mantissa - tmp_y_mantissa;
else
z_sign <= y_sign;
tmp_mantissa <= tmp_y_mantissa - tmp_x_mantissa;
end if;
end if;
-- Normalize the mantissa and adjust the exponent
if tmp_mantissa(24) = '1' then
-- Handle overflow
tmp_mantissa <= tmp_mantissa srl 1; -- Shift right
tmp_exponent <= tmp_exponent + 1; -- Increment exponent
else
-- Handle underflow
while tmp_mantissa(23) = '0' and tmp_exponent > -126 loop
if tmp_mantissa = 0 then
exit; -- Exit the loop if mantissa is zero
end if;
tmp_mantissa <= tmp_mantissa sll 1; -- Shift left
tmp_exponent <= tmp_exponent - 1; -- Decrement exponent
end loop;
end if;
-- Handle denormalization
if tmp_exponent < -126 then
tmp_mantissa <= tmp_mantissa srl (-126 - tmp_exponent); -- Denormalize
tmp_exponent <= -126;
end if;
-- Handle overflow and special cases
if tmp_exponent > 127 then
-- Overflow to infinity
z_exponent <= (others => '1'); -- Exponent becomes all ones
z_mantissa <= (others => '0'); -- Mantissa is zero
elsif tmp_mantissa = 0 then
-- Handle zero result
z_sign <= '0';
z_exponent <= (others => '0');
z_mantissa <= (others => '0');
else
-- Normal case
z_exponent <= STD_LOGIC_VECTOR(TO_UNSIGNED(tmp_exponent + 127, 8));
z_mantissa <= STD_LOGIC_VECTOR(tmp_mantissa(22 downto 0));
end if;
end if;
end process;
z(31) <= z_sign;
z(30 downto 23) <= z_exponent;
z(22 downto 0) <= z_mantissa;
end DATAFLOW;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment