`timescale 1ns / 1ps
//////////////////////////////////////////////////////////////////////////////////
// Company: 
// Engineer: 
// 
// Create Date: 01/14/2022 03:14:43 PM
// Design Name: 
// Module Name: polynomial_64_multiplication
// Project Name: 
// Target Devices: 
// Tool Versions: 
// Description: 
// 
// Dependencies: 
// 
// Revision:
// Revision 0.01 - File Created
// Additional Comments:
// 
//////////////////////////////////////////////////////////////////////////////////

// This is an unoptimized bit-parallel modular multiplier with q=2^64. 
// So, the result of a modular reduction is simply taking the least 64 bits of the input.
// We assume that the latency of the modular multiplier is 2 cycles. 
module modular_multiplier(clk, mul_ina, mul_inb, mul_start, mul_out, mul_done);

input clk;
input [63:0] mul_ina, mul_inb;
input mul_start;
output [63:0] mul_out;
output mul_done;

wire [127:0] int_mul_result;
reg [63:0] r1, r2;
reg done_r1, done_r2;
 
assign int_mul_result = mul_ina * mul_inb;  // This is a very unoptimized integer multiplier of 64 bits.

always @(posedge clk)
begin
    r1 <= int_mul_result[63:0];  // Modular reduction by q=2^64.
    r2 <= r1;
end

always @(posedge clk)
begin
    done_r1 <= mul_start;
    done_r2 <= done_r1;   
end

assign mul_out = r2;
assign mul_done = done_r2;

endmodule
 
 
 
 

module polynomial_multiplication(clk, rst, read_poly_op_sel, read_address, write_address, wea,
                                 data64_in, done, data64_out);

                    
input clk;
input rst;                      // Active high
output [9:0] read_address;      // BRAM address for reading a 64-bit word
output read_poly_op_sel;        // Select one of the two polynomial operands
input [63:0] data64_in;         // Word read from the BRAM

output [9:0] write_address;     // BRAM address where the result word will be written
output wea;                     // BRAM write enable signal. The data in the write-bus will get written when this is 1.
output [63:0] data64_out;       // Word to be written to BRAM in the memory address specified by write_address
output done;                    // This signal becomes 1 when a given polynomial multiplication gets computed. 
                                // After that it should stay 1 until rst becomes 1.
                                


// Start: Dummy code example. DELETE it.
// In this dummy, I will read two words from the memory and multiply the two words. 
// I will write them in 64-bit chunks into the memory. 

reg [2:0] state, nextstate; // I assumed the number of states in the FSM will be maximum 8. Change it according to your need.

reg [9:0] read_address_reg, write_address_reg;
reg inc_read_address_reg, inc_write_address_reg;
reg rst_read_address_reg, rst_write_address_reg;
reg wea_reg;
reg read_poly_op_sel_reg;

reg [63:0] reg_a, reg_b;
reg en_reg_a, en_reg_b;

wire [63:0] mul_ina, mul_inb, mul_out; 
reg mul_start; 
wire last_result_write, mul_done;

modular_multiplier Mult(.clk(clk), .mul_ina(mul_ina), .mul_inb(mul_inb), .mul_start(mul_start), .mul_out(mul_out), .mul_done(mul_done));

assign read_address = read_address_reg;
assign write_address = write_address_reg;
assign read_poly_op_sel = read_poly_op_sel_reg;
assign wea = wea_reg;
assign mul_ina = reg_a; 
assign mul_inb = reg_b;
assign data64_out = mul_out;


always @(posedge clk)
begin
    if(en_reg_a)
        reg_a <= data64_in;    
    if(en_reg_b)
        reg_b <= data64_in;    
end


always @(posedge clk)
begin
    if(rst)
        state <= 3'd0;
    else
        state <= nextstate;    
end

always @(posedge clk)
begin
    if(rst_read_address_reg)
        read_address_reg <= 10'd0;
    else if(inc_read_address_reg)
        read_address_reg <= read_address_reg + 1; 
    else
        read_address_reg <= read_address_reg;    
end

always @(posedge clk)
begin
    if(rst_write_address_reg)
        write_address_reg <= 10'd0;
    else if(inc_write_address_reg)
        write_address_reg <= write_address_reg + 1; 
    else
        write_address_reg <= write_address_reg;    
end

assign last_result_write = (write_address_reg==10'd255) ? 1'b1 : 1'b0;

    // Generally, a state machine produces control signals only and does not deal with data variables directly. 
    // This is called "control-path" -- "data-path" separation. 
    // Control signals are only a few bits wide. They are used to select multiplexer inputs, write registers, etc. 
    // All control signals that are generated inside the FSM (finite state machine) are of type 'reg' (this is a requirement in Verilog). 
    
    
always @(state)
begin
    case(state)
    3'd0:   begin   // This is the initial state.
                rst_read_address_reg<=1; rst_write_address_reg<=1; inc_read_address_reg<=0; inc_write_address_reg<=0; read_poly_op_sel_reg<=0; 
                wea_reg<=0; en_reg_a<=0; en_reg_b<=0; mul_start<=0;
            end
            
    3'd1:   begin   // Initiate read polya_word0;
                rst_read_address_reg<=0; rst_write_address_reg<=0; inc_read_address_reg<=0; inc_write_address_reg<=0; read_poly_op_sel_reg<=0; 
                wea_reg<=0; en_reg_a<=0; en_reg_b<=0; mul_start<=0;
            end
    3'd2:   begin   // Initiate read polyb_word0; Write polya_word0 (now available in the read bus) into reg_a; 
                rst_read_address_reg<=0; rst_write_address_reg<=0; inc_read_address_reg<=0; inc_write_address_reg<=0; read_poly_op_sel_reg<=1; 
                wea_reg<=0; en_reg_a<=1; en_reg_b<=0; mul_start<=0;
            end
    3'd3:   begin   // Write polyb_word0 (now available in the read bus) into reg_b; 
                rst_read_address_reg<=0; rst_write_address_reg<=0; inc_read_address_reg<=0; inc_write_address_reg<=0; read_poly_op_sel_reg<=0; 
                wea_reg<=0; en_reg_a<=0; en_reg_b<=1; mul_start<=0;
            end
    3'd4:   begin   // Start modular multiplication 
                rst_read_address_reg<=0; rst_write_address_reg<=0; inc_read_address_reg<=0; inc_write_address_reg<=0; read_poly_op_sel_reg<=0; 
                wea_reg<=0; en_reg_a<=0; en_reg_b<=0; mul_start<=1;
            end
    3'd5:   begin   // Wait for the completion of multiplication 
                rst_read_address_reg<=0; rst_write_address_reg<=0; inc_read_address_reg<=0; inc_write_address_reg<=0; read_poly_op_sel_reg<=0; 
                wea_reg<=0; en_reg_a<=0; en_reg_b<=0; mul_start<=1;
            end
    3'd6:   begin   // Write the result word to BRAM; Also increment the write address after the current read. 
                rst_read_address_reg<=0; rst_write_address_reg<=0; inc_read_address_reg<=1; inc_write_address_reg<=1; read_poly_op_sel_reg<=0; 
                wea_reg<=1; en_reg_a<=0; en_reg_b<=0; mul_start<=0;
            end            
    3'd7:   begin   // last state of FSM
                rst_read_address_reg<=1; rst_write_address_reg<=1; inc_read_address_reg<=0; inc_write_address_reg<=0; read_poly_op_sel_reg<=0; 
                wea_reg<=0; en_reg_a<=0; en_reg_b<=0; mul_start<=0;
            end
    default: begin   
                rst_read_address_reg<=0; rst_write_address_reg<=0; inc_read_address_reg<=0; inc_write_address_reg<=0; read_poly_op_sel_reg<=0; 
                wea_reg<=0; en_reg_a<=0; en_reg_b<=0; mul_start<=0;
            end                                    
    endcase
end    


always @(state or mul_done or last_result_write)
begin
    case(state)
    3'd0: nextstate <= 3'd1;
    3'd1: nextstate <= 3'd2;
    3'd2: nextstate <= 3'd3;
    3'd3: nextstate <= 3'd4;
    3'd4: nextstate <= 3'd5;
    3'd5: begin
            if(mul_done)
                nextstate <= 3'd6;
            else
                nextstate <= 3'd5;
         end
    3'd6: begin 
            if(last_result_write)
                nextstate <= 3'd7;
            else 
                nextstate <= 3'd1;
          end      
    3'd7: nextstate <= 3'd7;
    default: nextstate <= 3'd0;                                             
    endcase
end

                              
assign done = (state==3'd7) ? 1'b1 : 1'b0;

// End: Dummy code example. DELETE it.
                                        
endmodule


