/*

    F-CPU PROJECT

 WHYGEE'S ASSEMBLER PARSER ( whygee@mime.univ-paris8.fr )
 w/ labels and recursive file includes, defines and ifdefs

 still missing : "OUTPUTFILE" and "STRING"


 KNOWN PROBLEMS:
 - too few overflow checks, i should make byte-based
   multiprecision int routines one day.
 - The Include and define routines don't catch the comments !

 Sept 15, 1999 : adapted for the F-CPU :
 CPU-specific stuffs begin at line 148

 */

 /* the file swapping is "manually" handled in YY_INPUT */
%option noyywrap

 /* keywords are case insensitive but not the variables and the labels */
%option caseless

 /* well... */
%array

 /* 'include' state */
%x incl

 /* 'define' state */
%x def1

 /* 'ifdef' states */
%x ifdef
%x ifndef
%x skip

%{

/*
 *
 *  COPIED TO THE OUTPUT
 *
 */

/* include BISON's #defines */
#include "syntaxe.h"

/* assembler definitions and functions */
#include "assemble.c"

/* F-CPU opcode definitions */
#include "yg_fcpu.h"

%}

 /*
  *    LEXICAL RULES
  */

ID [_a-zA-Z][_a-zA-Z0-9]{0,31}

%%

 /*
  * COMMENTS
  */

"//".*       {  /* nothing */ }

"/*"         {  /* eat up multiple-line comments */
   register int _c;
   for(;;)
   {
      _c=input();
      switch(_c)
      {
         case '/':  if (count_nested_comments<enable_nested_comments
                         && (_c=input())=='*')
                      count_nested_comments++;
                    break;
         case '*':  do _c=input(); while (_c=='*');
                    if (_c=='/')
                    {
                      if (count_nested_comments==0)
                        goto end_comment;    /* found the end */
                      count_nested_comments--;
                      break;
                    }
      }
   }
end_comment:
}

"{"         {  /* eat up multiple-line, PASCAL style comments */
   register int _c;
   for(;;)
   {
      _c=input();
      switch(_c)
      {
         case '{':  if (count_nested_comments<enable_nested_comments)
                      count_nested_comments++;
                    break;
         case '}':  if (count_nested_comments==0)
                      goto end_comment2;    /* found the end */
                    count_nested_comments--;
                    break;
      }
   }
end_comment2:
}

 /* The following is for the internal memory "space" management. leave it... */
 
PROPERTIES[ \t]*X   { yylval=0; return PROPERTIES; }
PROPERTIES[ \t]*R   { yylval=1; return PROPERTIES; }
PROPERTIES[ \t]*W   { yylval=2; return PROPERTIES; }
PROPERTIES[ \t]*RW  { yylval=3; return PROPERTIES; }
PROPERTIES[ \t]*RX  { yylval=4; return PROPERTIES; }
PROPERTIES[ \t]*WX  { yylval=5; return PROPERTIES; }
PROPERTIES[ \t]*RWX { yylval=6; return PROPERTIES; }
 
 /* and these are useful symbols */
ALIGN { return ALIGN; }
BYTE  { return BYTE;  }
DWORD { return DWORD;  }
INDEX { return INDEX; }
NOP   { return NOP;   }
 /* PC    { return PC;    } */
SPACE { return SPACE; }
WORD  { return WORD;  }
IF    { return IF; }

SIZE[0-3] {
  yylval=yytext[4]-'0';
  if (yylval>=4) error("wrong size",NULL);
  return SIZE;
}


 /*
  *
  *  KEYWORDS: add here your own CPU-specific opcodes, words, register names, etc
  *
  */
 
"=="   { return EQUAL; }
"!="   { return UNEQUAL; }
[.]LSB { return DOTLSB; }
[.]S   { return SIMD; }
[.]B   { return DOT_B; }
[.]W   { return DOT_W; }
[.]D   { return DOT_D; }
[.]Q   { return DOT_Q; }


 /* The general purpose registers : */
R[0-9]{1,2} {
  yylval=yytext[1]-'0';
  if (yytext[2])
    yylval=(yylval*10)+(yytext[2]-'0');
  if (yylval>63) error ("wrong register name",NULL);
  return R;
}


LOOP      { return LOOP; }
LOOPENTRY { return LOOPENTRY; }
MOVE      { return MOVE; }
MOVS      { return MOVS; }
MOVZ      { return MOVZ; }
MOVI      { return MOVI; }
MOVIS     { return MOVIS; }
BR        { return BR; }
SHIFTL    { return SHIFTL; }
SHIFTR    { return SHIFTR; }
SHIFTSR   { return SHIFTSR; }
ROTL      { return ROTL; }
ROTR      { return ROTR; }
SHIFTLI   { return SHIFTLI; }
SHIFTRI   { return SHIFTRI; }
SHIFTSRI  { return SHIFTSRI; }
ROTLI     { return ROTLI; }
ROTRI     { return ROTRI; }
SHL       { return SHIFTL; }
SHR       { return SHIFTR; }
SHSR      { return SHIFTSR; }
ROL       { return ROTL; }
ROR       { return ROTR; }
SHLI      { return SHIFTLI; }
SHRI      { return SHIFTRI; }
SHSRI     { return SHIFTSRI; }
ROLI      { return ROTLI; }
RORI      { return ROTRI; }
LOGIC[0-1]{4} {
   yylval=  (yytext[8]&1)
         | ((yytext[7]&1)<<1)
         | ((yytext[6]&1)<<2)
         | ((yytext[5]&1)<<3); return LOGIC;
}
LOGICI[0-1]{4} {
   yylval=  (yytext[9]&1)
         | ((yytext[8]&1)<<1)
         | ((yytext[7]&1)<<2)
         | ((yytext[6]&1)<<3); return LOGICI;
}
ADD       { return ADD; }
SUB       { return SUB; }
ADDI      { return ADDI; }
SUBI      { return SUBI; }


 /*
  *  End of CPU-specific keywords
  */ 



  /* ARCHITECTURE FILE AND OTHER ASSEMBLER KEYWORDS */
  /* they are here to prevent someone to use them   */
  /* as variable names or to compile other's files */

 /* PARTICULAR, "NON STANDARD" KEYWORDS: */
 /*    they are specific to the GRAZER   */
EOF { return 0; } /* end the file */
ORG { return ORG; }
EQU { return EQU; }
IP  { return IP; } /* beware !!! IP != PC */
NESTED_COMMENTS { enable_nested_comments = 1000; }
NO_NESTED_COMMENTS { enable_nested_comments = 0; }

 /***********
  *         *
  * INCLUDE *
  *         *
  ***********/

#INCLUDE[ \t]*  { BEGIN(incl); }

<incl>\".*\"  {
  register struct include_node *_p;

/* fixes the file name, removing a " */
  yytext[--yyleng]=0;  /* ASCIIZ */

/* we check here if the file has already been included sothat we can avoid "loops": */
  _p=include_prec;   /* tail of the linked list */

  /* this is a while(){} because it allows to include a file named "stdin" without error */
  /* otherwise, recursive inclusions of the main file will be caught at the second pass */
    /* anyway, i renamed it after to "<stdin>" and it's a naughty hack... but it works. */
  while (_p->next != _p) /* head ? */
  {
/* string comparison */
    if (strcmp(_p->filename,yytext+1)==0)
      error("can't recurse inclusions",NULL);
    _p = _p->next;
  }

/* eats the rest of the line */
  while (input()!='\n');

/* backing up */
  _p=mem_increase(sizeof(struct include_node)+yyleng);
  include_prec->node_line_number = Line_Number;

/* opens the file */
  if ((_p->input_file=fopen(yytext+1,"rb"))==NULL)
  {
    yytext[0]='`';
    strcpy(yytext+yyleng,"' can't be opened");
    error(yytext,NULL);
  }

  memcpy(_p->filename,yytext+1,yyleng);
  _p->next = include_prec;
  include_prec=_p;

  Line_Number = 1;
  BEGIN(INITIAL);
}

<incl>[^"] {  error("wrong parameter for #INCLUDE",NULL);  }

 /************
  *          *
  *  DEFINE  *
  *          *
  ************/

#DEFINE[ \t]*  { BEGIN(def1); }

<def1>{ID} {
  char c,*d,e;
  int buffer_size=0,
      limit=16;

  if (define_exists(yytext,yyleng))
    warning (strcat(yytext," already defined"),NULL);

  d=malloc(limit);  /* hoping it is enough */

/* reads the definition */
  do
  {
    c=input();
/* tries to find a new line */
    if (c=='\\')
    {
      e=input();
      if (e!='\n')
      {
        d[buffer_size++]=c;
        if (buffer_size>=limit)
          if ((d=realloc(d,1+(limit+=256)))==NULL)
            error("memory exhausted",NULL);
      }
      c=e;
    }
    d[buffer_size++]=c;

    if (buffer_size>=limit)
      if ((d=realloc(d,1+(limit+=256)))==NULL)
        error("memory exhausted",NULL);
  }
  while (c!='\n');

  create_define(yytext,yyleng,d,buffer_size);

  BEGIN(INITIAL);
}

<def1>[^_a-zA-Z]  { error ("only identifiers can be defined",NULL); }

 /*************
  *           *  This crap seems to work in most common cases
  *   IFDEF   *  but is not absolutely 100% reliable with very
  *           *  complex structures (no check is performed on
  *************  the skipped parts). Well, at l(e)ast it works. */

#IFDEF[ \t]*  { BEGIN(ifdef); }
<ifdef>{ID} {
  ifdef_count++;
  if (ifdef_count>=MAX_IFDEF)
    error("too many #IF(N)DEF",NULL);
  else_count=(else_count << 1)|1;
  if (define_exists(yytext,yyleng))
    BEGIN(INITIAL); /* execute */
  else
    BEGIN(skip); /* skips until #ELSE or #ENDIF */
}

#IFNDEF[ \t]*  { BEGIN(ifndef); } /* Almost the same as above... could even be simplified */
<ifndef>{ID} {
  ifdef_count++;
  if (ifdef_count>=MAX_IFDEF)
    error("too many #IF(N)DEF",NULL);
  else_count=(else_count << 1)|1; /* "pushes" a flag */
  if (!define_exists(yytext,yyleng))
    BEGIN(INITIAL); /* execute */
  else
    BEGIN(skip); /* skips until #ELSE or #ENDIF */
}

<ifndef,ifdef>[^_a-zA-Z]  { error ("only identifiers can be defined",NULL); }

<skip>#IFDEF { /* nested conditional defines */
  skip_count++;
}

<skip>#IFNDEF { /* nested conditional defines */
  skip_count++;
}

<skip>#ENDIF {
  if (skip_count>0) /* if skipping, simply nest.... */
    skip_count--;
  else
  {    /* end skipping */
    ifdef_count--;
    else_count>>=1; /* "pops" the else flag off the stack */
    BEGIN(INITIAL); /* execute */
  }
}

<skip>#ELSE {
  if (skip_count==0) /* if end of skipping */
  {
    if ((else_count & 1)!=1) /* if #ELSE not allowed*/
      error("unexpected #ELSE",NULL);
    else_count=else_count & ~1; /* masks LSB out: no more #ELSE allowed*/
    BEGIN(INITIAL); /* execute */
  }
  /* else... nothing. */
}
<skip>.      { /* just */ }
<skip>\n     { /* skip */ }

#ELSE {
  if ((else_count & 1)!=1)
    error("unexpected #ELSE",NULL);
  else_count=else_count & ~1; /* masks LSB out*/
  BEGIN(skip); /* skip */
}

#ENDIF {
  if (ifdef_count<1)
    error("unexpected #ENDIF",NULL);
  else_count>>=1; /* "pops" the flag out of the stack */
  ifdef_count--; /* execute */
}

 /*
  *  NUMBERS (Beware of the sizeof(int)!!!)
  */

B#[01]{1,48}  { /* evaluates the binary value */
  register int a=3;
  yy_number=yytext[2] & 1;
  while (a<yyleng)
    yy_number=(yy_number<<1)|(yytext[a++] & 1); /* you better use 64 b ints ! */
  return NUMBER;
}

0[0-7]{0,16}  { /* evaluates the octal value */
  register int a=1;
  yy_number=yytext[0] & 7;
  while (a<yyleng)
    yy_number=(yy_number<<3)|(yytext[a++] & 7);
  return NUMBER;
}

[1-9][0-9]{0,14}  { /* evaluates the decimal value */
  register int a=1;
  yy_number=yytext[0]-'0';
  while (a<yyleng)
    yy_number=(yy_number*10)+(yytext[a++]-'0');
  return NUMBER;
}

(0x|H#)[0-9a-fA-F]{1,12}  { /* evaluates the hexadecimal value */
  register int a=3;
  yy_number=hex2int(yytext[2]);
  while (a<yyleng)
    yy_number=(yy_number<<4)|hex2int(yytext[a++]);
  return NUMBER;
}

 /* insert your FP conversion here (and then send it to me :-D) */

 /*
  *  LABELS AND IDENTIFIERS
  */

^{ID}:  { /* labels */
   referenced_label=create_label(yytext, yyleng-1, LABEL_DECLARED);
   return LABEL;
}

{ID}    { /* */
  register struct label_struct *_p;
  register struct define_node *_q;

/* test if it's already defined as a label */
  if ((_p = label_exists(yytext,yyleng))!=NULL)
  {
    referenced_label=_p;
    if (_p->undefined_list !=NULL)
      return NEW_LABEL; /* second or more-th occurence of an undefined label */
    else
      return LABEL_REFERENCE;
  }

/* test if it's already "#define"d */
  if ((_q = define_exists(yytext,yyleng))!=NULL)
  {
/* prevents "loops" */
    if (_q->char_counter != -1)
      error ("can't recurse #DEFINEs",NULL);
/* switch to defined buffer */
    if (scan_from_define)
      _q->last_scan=current_define;
    else
      _q->last_scan=NULL;
    scan_from_define=1;
    _q->char_counter=0;
    current_define=_q;
/* prevents a buffer clash by saving the read character */
    _q->buffer[_q->buffer_size]=input();
  }
  else
  {
/* no -> return an unitinialised label */
    referenced_label=create_label(yytext,yyleng,0);
    return NEW_LABEL;
  }
}

[ \n\t\f\x0B\x0C\x0D]   { /* eat up simple spaces and naughty characters */ }

. { return yytext[0]; }

%%

/* include BISON's "yyparse" */
#include "syntaxe"

void main(int argc, char **argv)
{
  register struct label_struct *_p;
  int i,j;

  for (i=0; i<MAXSPACE; i++){
    if ((instr_tab[i]=malloc(MAX_INSTR*4)) == NULL)
      error ("can't allocate any memory",NULL);
    memset (instr_tab[i],0,MAX_INSTR*4);
  }
  memset (instr_tab_offset,0,sizeof(instr_tab_offset));
  memset (original_index,0,sizeof(original_index));
  memset (property,EXECUTABLEREADWRITE,sizeof(property));
  create_define("GRAZER",6,"0.003",5);
  

/* filename: */
  ++argv, --argc;
  if (argc > 0)
  {  /* file mode */
    include_prec=(struct include_node *)mem_increase(sizeof(*include_prec)+strlen(argv[0]));
    strcpy(include_prec->filename,argv[0]);
    if ((include_prec->input_file = fopen( argv[0], "rb" ))== NULL)
      error("can't open source file",NULL);
    list_output = fopen("out.lst", "wb" );
  }
  else
  {  /* interactive mode (input and output through the console) */
    include_prec=(struct include_node *)mem_increase(sizeof(*include_prec)+6);
    strcpy(include_prec->filename,"<stdin>");
    include_prec->input_file=stdin;
  }

/* closes the linked list */
  include_prec->next = include_prec;

/* *********** run YACC, euh, BISON. *********** */

  yyparse();

/* *********** cleanup *********** */

/* flush the instruction buffer */
  fprintf(list_output,"\nHexadecimal dump:\n");
  for (j=0; j<MAXSPACE; j++) {
    if (instr_tab_offset[j]) {  /* if the space is used, flush it to the file */
      fprintf(list_output,"\n$Space %d\n$Properties %s\n$Index %d\n",
        j,properties[property[j]],original_index[j]);
      fprintf(list_output,"$Size %d\n",instr_tab_offset[j]);
      for (i=0; i<instr_tab_offset[j]; i+=4)
        fprintf(list_output,"%02X%02X%02X%02X\n",
         instr_tab[j][i],  instr_tab[j][i+1],   /* you can fix the endianness here */
         instr_tab[j][i+2],instr_tab[j][i+3]);
      fprintf(list_output,"\n");
    }
  }

  fclose(include_prec->input_file);
  if (ifdef_count!=0)
    error("Endless #IF(N)DEF",NULL);

/* print a little report: */
  if (label_prec==NULL) {
    fprintf(list_output,"\nNo Label table.\n");
  }
  else
  {
    fprintf(list_output,"\nLabel table:\n");

    /* label check (not complete) */
    _p=label_prec;
    while(_p!=NULL)
    {
      fprintf(list_output,"line %03d, \"%s\"=0X%04X (%d)\n",_p->line,_p->string+1,_p->value,_p->space);
      if ((_p->properties & LABEL_DECLARED) == 0)
      {
        Line_Number = _p->line;
        error ("symbol declared but not defined",_p);
      }
      _p = _p->next;
    }
  }

  fprintf(list_output,"\nEOF\n");
  fclose(list_output);
}
