vortex/src/obj.cpp

/*******************************************************************************
 HARPtools by Chad D. Kersey, Summer 2011
*******************************************************************************/
#include "include/types.h"
#include "include/obj.h"
#include "include/util.h"
#include "include/asm-tokens.h"
#include "include/debug.h"

#include <iostream>
#include <stdlib.h>
#include <FlexLexer.h>

#include <cctype>
#include <cstdio>

#include <map>

using namespace std;
using namespace Harp;
using namespace HarpTools;

extern struct rval_t { std::string s; uint64_t u; } yylval;
extern unsigned yyline;

static void asmReaderError(unsigned line, const string &message) {
  cout << "Assembly reader error, line " << line << ": " << message << '\n';
  exit(1);
}

static int skip_parens(const string &s, int i) {
  int paren_level = 1;
  do {
    i++;
    if (s[i] == '(') paren_level++;
    if (s[i] == ')') paren_level--;
  } while (paren_level > 0);

  return i;
}

// Probably the worst recursive descent parser ever written, but it's an easy
// way to make our assembly language pretty.
static uint64_t readParenExpression(const string &s, const map<string, Word> &d,
                                    int start=0, int end=-1)
{
  uint64_t (* const rPE)(const string&, const map<string, Word>&, int, int)
    = readParenExpression;
  if (end == start) return 0;

  if (end==-1) end = s.length();

  while (isspace(s[start])) start++;
  while (isspace(s[end-1])) end--;

  for (int i = start; i < end; i++) {
    if (s[i] == '(') { i = skip_parens(s, i); continue; }

    if (s[i] == '<') return rPE(s, d, start, i) << rPE(s, d, i+2, end);
    if (s[i] == '>') return rPE(s, d, start, i) >> rPE(s, d, i+2, end);
  }

  for (int i = start; i < end; i++) {
    if (s[i] == '(') { i = skip_parens(s, i); continue; }
    if (s[i] == '+') return rPE(s, d, start, i) + rPE(s, d, i+1, end);
    if (s[i] == '-') return rPE(s, d, start, i) - rPE(s, d, i+1, end);
    if (s[i] == '|') return rPE(s, d, start, i) | rPE(s, d, i+1, end);
    if (s[i] == '^') return rPE(s, d, start, i) ^ rPE(s, d, i+1, end);
  }

  for (int i = start; i < end; i++) {
    if (s[i] == '(') { i = skip_parens(s, i); continue; }
    if (s[i] == '*') return rPE(s, d, start, i) * rPE(s, d, i+1, end);
    if (s[i] == '/') return rPE(s, d, start, i) / rPE(s, d, i+1, end);
    if (s[i] == '%') return rPE(s, d, start, i) % rPE(s, d, i+1, end);
    if (s[i] == '&') return rPE(s, d, start, i) & rPE(s, d, i+1, end);
  }

  // Unary -
  if (s[start] == '-') return -rPE(s, d, start+1, end);

  if (isdigit(s[start])) {
    unsigned long long u;
    sscanf(s.substr(start, end-start).c_str(), "%lli", &u);
    return u;
  }

  if (s[start] == '(') return rPE(s, d, start+1, end-1);

  map<string, Word>::const_iterator it = d.find(s.substr(start, end-start));
  if (it != d.end()) return it->second;

  cout << "Error on " << yyline << ": ";
  exit(1);
}

int lexerBits;
Obj *AsmReader::read(std::istream &input) {
  lexerBits = wordSize;
  FlexLexer *f = new yyFlexLexer(&input);
  Obj *o = new Obj();
  std::vector<Chunk>::reverse_iterator cur;
  bool permR(true), permW(false), permX(false), entry(false), nextPred(false),
       global(false);

  map <string, Word> defs;

  /* Pre-defined defs. */
  defs["__WORD"] = wordSize;

  map <string, Instruction::Opcode> opMap;

  // Build opMap
  for (size_t i = 0; Instruction::instTable[i].opString; i++)
    opMap[std::string(Instruction::instTable[i].opString)]
                                                       = Instruction::Opcode(i);

  enum {
    ST_INIT, ST_DEF1, ST_DEF2, ST_PERM, ST_WORD1, ST_SPACE, ST_STRING1,
    ST_STRING2, ST_BYTE1, ST_BYTE2, ST_ALIGN, ST_INST1, ST_INST2
  } state(ST_INIT);

  enum { OS_NOCHUNK, OS_TEXTCHUNK, OS_DATACHUNK } outstate(OS_NOCHUNK);
  TextChunk *tc;
  DataChunk *dc;
  Instruction *curInst;
  string string_arg, next_chunk_name;
  Size next_chunk_align(0);
  uint64_t num_arg;
  RegNum nextPredNum;

  AsmTokens t;
  while ((t = (AsmTokens)f->yylex()) != 0) {
    switch (t) {
      case ASM_T_DIR_DEF:
        if (state == ST_INIT) state = ST_DEF1;
        else { asmReaderError(yyline, "Unexpected .def"); }
        break;
      case ASM_T_DIR_PERM:
        if (state == ST_INIT) {
          state = ST_PERM;
          permR = permW = permX = false;
          if (outstate != OS_NOCHUNK) {
            outstate = OS_NOCHUNK;
            entry = false;
            global = false;
          }
        } else { asmReaderError(yyline, "Unexpected .perm"); }
        break;
      case ASM_T_DIR_BYTE:
        if (state == ST_INIT) {
          state = ST_BYTE1;
        } else { asmReaderError(yyline, "Unexpected .byte"); }
        break;
      case ASM_T_DIR_WORD:
        if (state == ST_INIT) {
          state = ST_WORD1;
        } else { asmReaderError(yyline, "Unexpected .word"); }
        break;
      case ASM_T_DIR_SPACE:
        if (state == ST_INIT) state = ST_SPACE;
        else                  asmReaderError(yyline, "Unexpected .space");
        break;
      case ASM_T_DIR_STRING:
        if (state == ST_INIT) {
          state = ST_STRING1;
        } else { asmReaderError(yyline, "Unexpected .string"); }
        break;
      case ASM_T_DIR_ALIGN:
        if (state == ST_INIT) {
          state = ST_ALIGN;
        } else { asmReaderError(yyline, "Unexpected .align"); }
        break;
      case ASM_T_DIR_ENTRY:
        outstate = OS_NOCHUNK;
        entry = true;
        break;
      case ASM_T_DIR_GLOBAL:
        outstate = OS_NOCHUNK;
        global = true;
        break;
      case ASM_T_DIR_ARG_NUM:
        switch (state) {
          case ST_DEF2: defs[string_arg] = yylval.u; state = ST_INIT; break;
          case ST_WORD1: {
            if (outstate != OS_DATACHUNK) {
              outstate = OS_DATACHUNK;
              dc = new DataChunk(next_chunk_name, next_chunk_align?
                                                  next_chunk_align:wordSize,
                                 flagsToWord(permR, permW, permX));
              next_chunk_align = 0;
              o->chunks.push_back(dc);
              if (entry) o->entry = o->chunks.size() - 1;
              if (global) dc->setGlobal();
            }
            dc->size += wordSize;
            dc->contents.resize(dc->size);
            wordToBytes(&*(dc->contents.end()-wordSize), yylval.u, wordSize);
          } break;
          case ST_SPACE: {
            // TODO: the following statement is basically copied from above. Fix
            // this.
            if (outstate != OS_DATACHUNK) {
              outstate = OS_DATACHUNK;
              dc = new DataChunk(next_chunk_name, next_chunk_align?
                                                  next_chunk_align:wordSize,
                                 flagsToWord(permR, permW, permX));
              next_chunk_align = 0;
              o->chunks.push_back(dc);
              if (entry) o->entry = o->chunks.size() - 1;
              if (global) dc->setGlobal();
            }
            size_t oldSize = dc->size;
            dc->size += wordSize * yylval.u;
            dc->contents.resize(dc->size);
            for (size_t i = oldSize; i < dc->size; ++i) dc->contents[i] = 0;
          } break;
          case ST_BYTE1:
            if (outstate != OS_DATACHUNK) {
              // TODO: more of this pasted code
              outstate = OS_DATACHUNK;
              dc = new DataChunk(next_chunk_name, next_chunk_align?
                                                  next_chunk_align:wordSize,
                                 flagsToWord(permR, permW, permX));
              next_chunk_align = 0;
              o->chunks.push_back(dc);
              if (entry) o->entry = o->chunks.size() - 1;
              if (global) dc->setGlobal();
            }
            dc->size++;
            dc->contents.resize(dc->size);
            *(dc->contents.end() - 1) = yylval.u;
            state = ST_INIT;
            break;
          case ST_ALIGN:
            next_chunk_align = yylval.u;
            if (outstate != OS_NOCHUNK) {
              outstate = OS_NOCHUNK;
              entry = false;
              global = false;
            }
            state = ST_INIT;
            break;
          default: asmReaderError(yyline, "Unexpected literal argument");
        }
        break;
      case ASM_T_DIR_ARG_STRING:
        if (state == ST_STRING1) {
          if (outstate != OS_DATACHUNK) {
            // TODO: pasted code (see above)
            outstate = OS_DATACHUNK;
            dc = new DataChunk(next_chunk_name,
                               next_chunk_align?next_chunk_align:wordSize,
                               flagsToWord(permR, permW, permX));
            next_chunk_align = 0;
            o->chunks.push_back(dc);
            if (entry) o->entry = o->chunks.size() - 1;
            if (global) dc->setGlobal();
          }
          const char *s = yylval.s.c_str();
          do {
            if (*s == '\\') {
              switch (*(++s)) {
                case 'n': dc->contents.push_back('\n');   break;
                case '"': dc->contents.push_back(*s); break;
                default:  dc->contents.push_back(*s); break;
              }
            } else {
              dc->contents.push_back(*s);
            }
            dc->size++;
          } while(*(s++));
        } else {
          asmReaderError(yyline, "Unexpected string literal.");
        }
        state = ST_INIT;
        break;
      case ASM_T_DIR_ARG_SYM:
        switch (state) {
          case ST_DEF1: string_arg = yylval.s; state = ST_DEF2; break;
          default: asmReaderError(yyline, "");
        };
        break;
      case ASM_T_DIR_ARG_R:
        permR = true;
        break;
      case ASM_T_DIR_ARG_W:
        permW = true;
        break;
      case ASM_T_DIR_ARG_X:
        permX = true;
        break;
      case ASM_T_DIR_END:
        if (state == ST_INST1 || state == ST_INST2) {
          if (outstate == OS_TEXTCHUNK) {
            tc->instructions.push_back(curInst);
          } else {
            asmReaderError(yyline, "Inst not in text chunk(internal error)");
          }
        }
        state = ST_INIT;
        break;
      case ASM_T_LABEL:
        if (outstate != OS_NOCHUNK) {
          entry = false;
          global = false;
          outstate = OS_NOCHUNK;
        }
        next_chunk_name = yylval.s;
        break;
      case ASM_T_PRED:
        nextPred = true;
        nextPredNum = yylval.u;
        break;
      case ASM_T_INST:
        if (state == ST_INIT) {
          Instruction::Opcode opc = opMap[yylval.s];
          if (outstate != OS_TEXTCHUNK) {
            tc = new TextChunk(next_chunk_name, next_chunk_align,
                               flagsToWord(permR, permW, permX));
            next_chunk_align = 0;
            o->chunks.push_back(tc);
            if (entry) o->entry = o->chunks.size() - 1;
            if (global) tc->setGlobal();
            outstate = OS_TEXTCHUNK;
          }
          curInst = new Instruction();
          curInst->setOpcode(opc);
          if (nextPred) {
            nextPred = false;
            curInst->setPred(nextPredNum);
          }
          state = Instruction::instTable[opc].allSrcArgs?ST_INST2:ST_INST1;
        } else { asmReaderError(yyline, "Unexpected token"); }
        break;
      case ASM_T_PREG:
        switch (state) {
          case ST_INST1: curInst->setDestPReg(yylval.u);
                         state = ST_INST2;
                         break;
          case ST_INST2: curInst->setSrcPReg(yylval.u);
                         break;
          default: asmReaderError(yyline, "Unexpected predicate register");
        }
        break;
      case ASM_T_REG:
        switch (state) {
          case ST_INST1: curInst->setDestReg(yylval.u);
                         state = ST_INST2;
                         break;
          case ST_INST2: curInst->setSrcReg(yylval.u);
                         break;
          default: asmReaderError(yyline, "Unexpected register");
        }
        break;
      case ASM_T_PEXP:
        // Decode the paren expression.
        yylval.u = readParenExpression(yylval.s, defs);
      case ASM_T_LIT:
        switch (state) {
          case ST_INST1: asmReaderError(yyline, "Unexpected literal");
          case ST_INST2: curInst->setSrcImm(yylval.u);
                         break;
          default: asmReaderError(yyline, "Unexpected literal");
        }
        break;
      case ASM_T_SYM:
        switch (state) {
          case ST_INST1: asmReaderError(yyline, "Unexpected symbol");
          case ST_INST2: if (defs.find(yylval.s) != defs.end()) {
                           curInst->setSrcImm(defs[yylval.s]);
                         } else {
                           Ref *r = new
                             SimpleRef(yylval.s, *curInst->setSrcImm(),
                               curInst->hasRelImm());
                           tc->refs.push_back(r);
                           curInst->setImmRef(*r);
                         }
                         break;
          default: asmReaderError(yyline, "Unexpected symbol");
        }
        break;
      default: asmReaderError(yyline, "Invalid state(internal error)");
    };
  }

  return o;
}

void AsmWriter::write(std::ostream &output, const Obj &obj) {
  Word prevFlags(0);

  for (size_t j = 0; j < obj.chunks.size(); j++) {
    Chunk * const &c = obj.chunks[j];

    /* Write out the flags. */
    if (c->flags != prevFlags) {
      bool r, w, x;
      wordToFlags(r, w, x, c->flags);
      output << ".perm ";
      if (r) output << 'r';
      if (w) output << 'w';
      if (x) output << 'x';
      output << '\n';
      prevFlags = c->flags;
    }

    /* Write align if set. */
    if (c->alignment) output << ".align 0x" << hex << c->alignment << '\n';

    TextChunk * const tc = dynamic_cast<TextChunk* const>(c);
    DataChunk * const dc = dynamic_cast<DataChunk* const>(c);

    if (tc) {
      if (j == obj.entry) output << "\n.entry\n";
      if (c->isGlobal()) output << "\n.global\n";
      if (tc->name != "") output << tc->name << ':';

      for (size_t i = 0; i < tc->instructions.size(); i++) {
        output << "\t" << *(tc->instructions[i]) << '\n';
      }
    } else if (dc) {
      Size i;
      for (i = 0; i < dc->contents.size();) {
        Size tmpWordSize = (dc->contents.size() - i < wordSize) ?
                             dc->contents.size() - i : wordSize;

        i += tmpWordSize;
        Word w = 0;
        for (size_t j = 0; j < tmpWordSize; j++) {
          w <<= 8;
          w |= dc->contents[i - j - 1];
        }

        if (i == tmpWordSize && c->name != "") {
          output << c->name << ':' << endl;
          output << "  .word " << " 0x" << hex << w << endl;
        } else {
          output << "  .word " << " 0x" << hex << w << endl;
        }
      }

      if (i % wordSize) i += (wordSize - (i%wordSize));

      if (dc->size > i) {
        Size fillSize = (dc->size - i)/wordSize;
        output << ".word 0x" << hex << fillSize << '\n';
      }
    } else {
      cout << "Unrecognized chunk type in AsmWriter.\n";
      exit(1);
    }
  }
}

enum HOFFlag { HOF_GLOBAL = 1 };

Word getHofFlags(Chunk &c) {
  Word w = 0;
  if (c.isGlobal()) w |= HOF_GLOBAL;
  return w;
}

static void outputWord(std::ostream &out, Word w,
                       vector<Byte> &tmp, Size wordSize)
{
  Size n(0);
  writeWord(tmp, n, wordSize, w);
  out.write((char*)&tmp[0], wordSize);
}

void HOFWriter::write(std::ostream &output, const Obj &obj) {
  string archString(arch);
  Size wordSize(arch.getWordSize()), n, offsetVectorPos;

  vector<Byte> tmp;
  vector<Size> offsets(obj.chunks.size());

  /* Magic number, arch string, and padding. */
  output.write("HARP", 4);
  output.write(archString.c_str(), archString.length()+1);
  Size padBytes = (wordSize-(4+archString.length()+1)%wordSize)%wordSize;
  for (Size i = 0; i < padBytes; i++) output.put(0);

  /* Write out the entry chunk index. */
  outputWord(output, obj.entry, tmp, wordSize);

  /* Write out the number of chunks. */
  outputWord(output, obj.chunks.size(), tmp, wordSize);

  /* Skip the chunk size offset vector. */
  offsetVectorPos = output.tellp();
  output.seekp(output.tellp() + streampos(wordSize * obj.chunks.size()));

  /* Write out the chunks, keeping track of their offsets. */
  for (Size i = 0; i < obj.chunks.size(); i++) {
    offsets[i] = output.tellp();

    // Is it a data chunk?
    DataChunk *dc = dynamic_cast<DataChunk*>(obj.chunks[i]);
    if (!dc) { cout << "HOFWriter::write(): invalid chunk type.\n"; exit(1); }

    D(1, "Writing chunk \"" << dc->name << "\", size=" << dc->contents.size());

    // Chunk name
    output.write(dc->name.c_str(), dc->name.length() + 1);

    /* Padding */
    padBytes = (wordSize - (dc->name.length()+1)%wordSize)%wordSize;
    for (Size i = 0; i < padBytes; i++) output.put(0);

    /* Chunk alignment, flags, address, size (in RAM and disk) */
    outputWord(output, dc->alignment, tmp, wordSize);
    outputWord(output, dc->flags, tmp, wordSize);
    outputWord(output, getHofFlags(*dc), tmp, wordSize);
    outputWord(output, dc->bound?dc->address:0, tmp, wordSize);
    outputWord(output, dc->size, tmp, wordSize);
    outputWord(output, dc->contents.size(), tmp, wordSize);

    /* References */
    outputWord(output, dc->refs.size(), tmp, wordSize);
    for (Size j = 0; j < dc->refs.size(); j++) {
      OffsetRef *r = dynamic_cast<OffsetRef*>(dc->refs[j]);
      if (!r) { cout << "HOFWriter::write(): invalid ref type.\n"; exit(1); }
      /* Reference name */
      output.write(r->name.c_str(), r->name.length() + 1);
      /* Padding */
      padBytes = (wordSize - (r->name.length() + 1)%wordSize)%wordSize;
      for (Size i = 0; i < padBytes; i++) output.put(0);
      /* Compute flags word. */
      Word rFlags(0);
      if (r->relative) rFlags |= 1;
      /* Output flags word. */
      outputWord(output, rFlags, tmp, wordSize);
      /* Offset from which relative branches are computed. */
      outputWord(output, r->ibase, tmp, wordSize);
      /* Reference offset in block. */
      outputWord(output, r->getOffset(), tmp, wordSize);
      /* Reference size in bits. */
      outputWord(output, r->getBits(), tmp, wordSize);
    }

    /* Chunk data. */
    output.write((char*)&(dc->contents[0]), dc->contents.size());

    /* Chunk padding. */
    padBytes = (wordSize - dc->contents.size()%wordSize)%wordSize;
    for (Size i = 0; i < padBytes; i++) output.put(0);
  }

  /* Write out the chunk offset vector. */
  output.seekp(offsetVectorPos);
  for (Size i = 0; i < obj.chunks.size(); i++) {
    outputWord(output, offsets[i], tmp, wordSize);
  }
}

static Word inputWord(std::istream &input, Size wordSize, vector<Byte> &tmp) {
  Size n(0), pos(input.tellg());
  if (tmp.size() < wordSize) tmp.resize(wordSize);

  /* Seek to the next word-aligned place. */
  if (input.tellg()%wordSize) {
    input.seekg(input.tellg() +
                streampos((wordSize - input.tellg()%wordSize)%wordSize));
  }

  input.read((char*)&tmp[0], wordSize);

  return readWord(tmp, n, wordSize);
}

static string inputString(std::istream &input) {
  string s;
  char c;

  while (input && (c = input.get()) != '\0') s += c;

  return s;
}

Obj *HOFReader::read(std::istream &input) {
  Size wordSize(arch.getWordSize());
  Obj *o = new Obj();

  vector<Byte> tmp(4);

  input.read((char*)&tmp[0], 4);
  if (tmp[0] != 'H' || tmp[1] != 'A' || tmp[2] != 'R' || tmp[3] != 'P') {
    cout << "Bad magic number in HOFReader::read().\n";
    exit(1);
  }

  string archString(inputString(input));
  ArchDef fileArch(archString);
  if (fileArch != arch) {
    cout << "File arch does not match reader arch in HOFReader::read().\n";
    exit(1);
  }

  o->entry = inputWord(input, wordSize, tmp);

  Size nChunks(inputWord(input, wordSize, tmp));

  vector<Size> chunkOffsets(nChunks);

  /* Read in the chunk offsets. */
  for (Size i = 0; i < nChunks; i++) {
    chunkOffsets[i] = inputWord(input, wordSize, tmp);
  }

  /* Read in the chunks. */
  o->chunks.resize(nChunks);
  for (Size i = 0; i < nChunks; i++) {
    input.seekg(chunkOffsets[i]);
    string name(inputString(input));
    Word alignment(inputWord(input, wordSize, tmp)),
         flags(inputWord(input, wordSize, tmp)),
         hofFlags(inputWord(input, wordSize, tmp)),
         address(inputWord(input, wordSize, tmp)),
         size(inputWord(input, wordSize, tmp)),
         dSize(inputWord(input, wordSize, tmp)),
         nRefs(inputWord(input, wordSize, tmp));
    DataChunk *dc = new DataChunk(name, alignment, flags);
    if (hofFlags & HOF_GLOBAL) dc->setGlobal();
    dc->address = address;
    dc->bound = address?true:false;
    dc->contents.resize(dSize);

    /* Get the refs. */
    for (Size j = 0; j < nRefs; j++) {
      string rName(inputString(input));
      Word rFlags(inputWord(input, wordSize, tmp)),
           ibase(inputWord(input, wordSize, tmp)),
           offset(inputWord(input, wordSize, tmp)),
           bits(inputWord(input, wordSize, tmp));
      OffsetRef *r =
        new OffsetRef(rName, dc->contents, offset, bits, wordSize, rFlags&1,
                      ibase);
      dc->refs.push_back(r);
    }

    D(1, "Reading chunk \"" << name << "\", size " << dSize);

    /* Get the contents. */
    input.read((char*)&dc->contents[0], dSize);
    dc->size = size;

    o->chunks[i] = dc;
  }

  return o;
}