/*
f-cpu/qdcpoc2/qdcpoc2.h
"Quick and dirty coded proof of concept" for the F-CP0 pipeline, 2nd version !
created Wed Jul 11 01:36:14 2001
version Sun Aug  5 04:34:50 2001 : switch to QDCPOC-][

(C) whygee@f-cpu.org all rights reserved (you're warned, it's shit !)

this file is included by f-cpu/qdcpoc2/main.c

*/

/* the usual libraries */
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>  /* for fstat() */
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

/* F-CPU compatibility interface : */
#include <f-cpu_config.h>
/* "../" must be in the include path ! */

/****************************************************
      FC0 state variables and pipeline latches :
 ****************************************************/

/********************
  The Register set
  (or R7 for short)
 ********************/

/* interface for reading in the R7 : */
unsigned int
  R7_read_address_0,
  R7_read_address_1,   /* address in */
  R7_read_address_2;
UMAX
  R7_read_port_0,
  R7_read_port_1,  /* data out */
  R7_read_port_2;

/* interface for writing in the R7 : (in) */
UMAX
  R7_write_port_0,   /* data in */
  R7_write_port_1;
unsigned int
  R7_write_mask_0, /* each 5 bits enables a sub-bank in the selected register */
  R7_write_mask_1,
  R7_write_address_0,  /* address in */
  R7_write_address_1; /* range : 1-63  */

/* R7 flags (inout) : */
u64 R7_ZERO, /* the combination of all the partial 0 flags */
    R7_LSB, /* here we don't need partial results, */
    R7_MSB; /* all 63 bits are exploited */


/********************
   The opcode LUT :
 ********************/

/* this structure describes the properties and
   needed informations for each opcode. IT is
   only a first preliminary version, a lot of
   things are missing. */
typedef struct {

  /* latency :
  indicates which queue entry will be filled */
  unsigned int latency_zero : 1;      /* nop (could be removed ?) */
  unsigned int latency_direct : 1;    /* move, loadcons */
  unsigned int latency_cycle_1 : 1;   /* rop2, inc */
  unsigned int latency_cycle_2 : 1;   /* ASU */
  unsigned int latency_multiply : 1;  /* imul (more ports ?) */
  unsigned int latency_idiv : 1;      /* idiv */

  /* opcode format :
  indicates which fields are necessary before we issue the instruction */
  unsigned int need_src0 : 1;
  unsigned int need_src1 : 1;
  unsigned int need_src2 : 1;
  unsigned int need_rw2  : 1;
  unsigned int need_cond : 1;

  /* queue reservation : */
  unsigned int need_w1 : 1; /* indicate that we want 1 write slot
  (in this case, src2 is written to either slot0 or slot1) */
  unsigned int need_w2 : 1; /* indicate that we want 2 simultaneous write slots
  (in this case, src2 and rw2 are written to the 2 slots) */

  /* use of the constants :
   it directly drives the Xbar */
  unsigned int op_imm8 : 1;
  unsigned int op_imm16 : 1;

  /* more will be added as special cases are discovered */

} opcode_lookup_type;

opcode_lookup_type opcode_LUT[256], opcode_type;


/********************
   Fetcher :
 ********************/

UMAX
 FETCH_PIP,                /* Physical adressing Instruction Pointer */
/* FETCH_NIP,                 Next Instruction Pointer (CIP+4) [physical address] */
 FETCH_current_page_base,  /* current virtual addressing page for the CIP
                              (copy of the page base in the TLB) */
 FETCH_current_page_size,  /* mask for selecting the bits */
 FETCH_VIP;                /* virtual address of the CIP, compound from PIP and FETCH_current_page_ */

bool fetch_next_instruction; /* !=0 when a new instruction must be fetched */

u32 instruction_buffer;  /* the currently fetched instruction */
off_t fetcher_buffer_size; /* number of instructions in the buffer (size in bytes) */
char *fetcher_buffer=NULL;


/********************
   The "Xbar" :
 ********************/

/* inputs : */
int /* control signals */
 Xbar_read_from_0,
 Xbar_read_from_1,  /* select the source */
 Xbar_read_from_2,
 Xbar_write_to_0,
 Xbar_write_to_1;   /* select the destination */

UMAX /* data */
 Xbar_R7_read_port_0,
 Xbar_R7_read_port_1,  /* from the register set */
 Xbar_R7_read_port_2,
 Xbar_read_immediate,  /* from the decoder */
 Xbar_EU_ROP2,         /* from the ROP2 EU */
 Xbar_read_CIP,        /* from the decoder (which delays the fetcher) */

/* outputs : ("buffer" mode in VHDL, so we can do internal some bypass) */
 Xbar_read_port_0,
 Xbar_read_port_1, /* goes to the EUs */
 Xbar_read_port_2,
 Xbar_write_port_0,
 Xbar_write_port_1; /* goes to R7 */


/********************
    the ROP2 EU :
 ********************/

/* inputs : */
UMAX
 ROP2_input_0,
 ROP2_input_1,         /* copy the Xbar Read outputs */
 ROP2_input_2,
 ROP2_function_bit0,
 ROP2_function_bit1,   /* pre-buffered booleans */
 ROP2_function_bit2,
 ROP2_function_bit3;
bool ROP2_exec;     /* triggers the operation */
int ROP2_mode;         /* from the instruction decoder */

/* outputs : */
UMAX ROP2_output_port; /* data output */


/* much more to come here !!! */


#ifdef otherssssssss

int Xbar_EU_ROP2,       /* delays the signal */
    Xbar_ROP2_mode;


/* Output of the other EUs : */
UMAX Xbar_ASU_port0,
     Xbar_ASU_port1;


/********************
  the decoder :
 ********************/

/* outputs to the Xbar/EUs : */
UMAX DEC_imm_port,  /* immediate data */
     DEC_NIP,  /* Next Instruction Pointer (CIP+4) */
     DEC_CIP;  /* Current Instruction Pointer */

int EU_ROP2,       /* enable signal (triggers the computation) */
    ROP2_mode,     /* 0: normal, 1: AND, 2: OR, 3: select  */
    ROP2_function, /* 0-7, + translated & buffered during Xbar read cycle */
    emit_instruction,  /* set when the instruction can be issued. */
    valid_opcode,
    pointer_ready,     /* currently unused */
    condition_true,    /* warning : doubtful polarity. */
    register0_ready,   /* set to 1 if the corresponding register */
    register1_ready,   /* is ready (not used in the FIFO) */
    register2_ready,
    RW2_ready,
    register0_on_Xbar0,   /* bypass condition : */
    register1_on_Xbar0,   /*  */
    register2_on_Xbar0,
    RW2_on_Xbar0,
    register0_on_Xbar1,
    register1_on_Xbar1,
    register2_on_Xbar1,
    RW2_on_Xbar1;

/* internal : */
u8 current_opcode,
   RW2,
   imm8;
u16 imm16, loadcons_shift;
int condition_code; /* field from the opcode. Used during
 issue to determine if the condition is a valid/legal combination. */

/* Communication with the Fetcher : */
u32 instruction_buffer; /* the currently decoded instruction */
bool dec_next_instruction; /* signal that asks the fetcher to do its job... */


/********************
    Scheduler :
 ********************/

typedef struct s_queue_entry {
  unsigned int write0 : 6;   /* register number */
  unsigned int unit0 :  3;   /* from which unit to get the data */
  unsigned int mask0 :  5;   /* write mask */
  unsigned int busy0 :  1;   /* this is set when the slot is not empty */
  unsigned int write1 : 6;
  unsigned int unit1 :  3;
  unsigned int mask1  : 5;
  unsigned int busy1 :  1;
} t_queue_entry;

/* depth of the scheduler's FIFO */
#define DEPTH_QUEUE 5

t_queue_entry sched_queue[DEPTH_QUEUE];

/********************
   IDIV :
 ********************/

int IDIV_complete, IDIV_counter;
u8 IDIV_write_destination, IDIV_write_destination_bis,
   IDIV_write_mask0, IDIV_write_mask1,
   IDIV_unit0, IDIV_unit1,
   IDIV_busy0, IDIV_busy1;

#endif
