Hatena::Groupgeneration1986

hogelogの日記

 | 

2008-03-25

変態言語whitespaceのパーサ

21:32

尻馬のっかってなんか構文解析とか、さいきん妙にしてる気がする。

yuyarinがwhitespaceおもしれーなどとおっしゃってたので、whitespaceを書いてみた。パーサ部分だけ。ちなみに一回も実行してない。mainとか書いてねえし。

(たぶん)LR(0)っぽく書いたら構文解析だけで300行越えた。コードがめちゃくちゃワンパターンであり、このくらいのパーサならかなり簡単にパーサジェネレータで書けそう。あとwsはオリジナルのソースコードがかなり読みやすかった。あそこからHaskell入門するのもかなり良いと思う。

#include <stdio.h>
#include <stdlib.h>

#define A ' '
#define B '\t'
#define C '\n'

#define PROGRAM_FULL(p) \
  ((p)->size == (p)->capacity)
#define PROGRAM_PUSH_OP(p,op) \
  ((p)->data[(p)->size++].operation = (op))
#define PROGRAM_PUSH_NUM(p,num) \
  ((p)->data[(p)->size++].number = (num))
#define RETURN_ERROR(ch,stream) do { \
  ungetc((ch), (stream)); \
  return ERROR; \
} while(0)

#define INIT_PROGRAM_SIZE 10000

typedef enum Operation {
  PUSH, DUP, SWAP, DISCARD,
  PLUS, MINUS, TIMES, DIVIDE, MODULO,
  STORE, RETRIEVE,
  L_STRING, CALL_L, JUMP_L, IZ_L, IN_L, RETURN, END,
  OUTPUTC, OUTPUTN, READC, READN,
  ERROR,
} Operation;
typedef union Instruction {
    Operation operation;
    int number;
} Instruction;
typedef struct Program {
  int size, capacity;
  Instruction *data;
} Program;

Operation read_A_C(FILE *input) {
  int ch = getc(input);
  switch(ch) {
    case A:
      return DUP;
    case B:
      return SWAP;
    case C:
      return DISCARD;
    case EOF:
      RETURN_ERROR(EOF, input);
    default:
      return read_A_C(input);
  }
}
Operation read_A(FILE *input) {
  int ch = getc(input);
  switch(ch) {
    case A:
      return PUSH;
    case B:
      RETURN_ERROR(B, input);
    case EOF:
      RETURN_ERROR(EOF, input);
    case C:
      return read_A_C(input);
    default:
      return read_A(input);
  }
}
Operation read_BA_A(FILE *input) {
  int ch = getc(input);
  switch(ch) {
    case A:
      return PLUS;
    case B:
      return MINUS;
    case C:
      return TIMES;
    case EOF:
      RETURN_ERROR(EOF, input);
    default:
      return read_BA_A(input);
  }
}
Operation read_BA_B(FILE *input) {
  int ch = getc(input);
  switch(ch) {
    case A:
      return DIVIDE;
    case B:
      return MODULO;
    case C:
      RETURN_ERROR(C, input);
    case EOF:
      RETURN_ERROR(EOF, input);
    default:
      return read_BA_B(input);
  }
}
Operation read_BA(FILE *input) {
  int ch = getc(input);
  switch(ch) {
    case A:
      return read_BA_A(input);
    case B:
      return read_BA_B(input);
    case C:
      RETURN_ERROR(C, input);
    case EOF:
      RETURN_ERROR(EOF, input);
    default:
      return read_BA(input);
  }
}
Operation read_BB(FILE *input) {
  int ch = getc(input);
  switch(ch) {
    case A:
      return STORE;
    case B:
      return RETRIEVE;
    case C:
      RETURN_ERROR(C, input);
    case EOF:
      RETURN_ERROR(EOF, input);
    default:
      return read_BB(input);
  }
}
Operation read_C_A(FILE *input) {
  int ch = getc(input);
  switch(ch) {
    case A:
      return L_STRING;
    case B:
      return CALL_L;
    case C:
      return JUMP_L;
    case EOF:
      RETURN_ERROR(EOF, input);
    default:
      return read_C_A(input);
  }
}
Operation read_C_B(FILE *input) {
  int ch = getc(input);
  switch(ch) {
    case A:
      return IZ_L;
    case B:
      return IN_L;
    case C:
      return RETURN;
    case EOF:
      RETURN_ERROR(EOF, input);
    default:
      return read_C_B(input);
  }
}
Operation read_C_C(FILE *input) {
  int ch = getc(input);
  switch(ch) {
    case C:
      return END;
    case A:
      RETURN_ERROR(A, input);
    case B:
      RETURN_ERROR(B, input);
    case EOF:
      RETURN_ERROR(EOF, input);
    default:
      return read_C_C(input);
  }
}
Operation read_C(FILE *input) {
  int ch = getc(input);
  switch(ch) {
    case A:
      return read_C_A(input);
    case B:
      return read_C_B(input);
    case C:
      return read_C_C(input);
    case EOF:
      RETURN_ERROR(EOF, input);
    default:
      return read_C(input);
  }
}
Operation read_BC_A(FILE *input) {
  int ch = getc(input);
  switch(ch) {
    case A:
      return OUTPUTC;
    case B:
      return OUTPUTN;
    case C:
      RETURN_ERROR(C, input);
    case EOF:
      RETURN_ERROR(EOF, input);
    default:
      return read_BC_A(input);
  }
}
Operation read_BC_B(FILE *input) {
  int ch = getc(input);
  switch(ch) {
    case A:
      return READC;
    case B:
      return READN;
    case C:
      RETURN_ERROR(C, input);
    case EOF:
      RETURN_ERROR(EOF, input);
    default:
      return read_BC_B(input);
  }
}
Operation read_BC(FILE *input) {
  int ch = getc(input);
  switch(ch) {
    case A:
      return read_BC_A(input);
    case B:
      return read_BC_B(input);
    case C:
      RETURN_ERROR(C, input);
    case EOF:
      RETURN_ERROR(EOF, input);
    default:
      return read_BC(input);
  }
}
Operation read_B(FILE *input) {
  int ch = getc(input);
  switch(ch) {
    case A:
      return read_BA(input);
    case B:
      return read_BB(input);
    case C:
      return read_BC(input);
    case EOF:
      RETURN_ERROR(EOF, input);
  }
  return read_B(input);
}
int read_Number_AB(FILE *input, int sum) {
  int ch = getc(input);
  switch(ch) {
    case A:
      return read_Number_AB(input, sum*2);
    case B:
      return read_Number_AB(input, (sum+1)*2);
    case C:
    case EOF:
      return sum;
    default:
      return read_Number_AB(input, sum);
  }
}
int read_Number(FILE *input) {
  int ch = getc(input);
  switch(ch) {
    case A:
      return read_Number_AB(input, 0);
    case B:
      return -1*read_Number_AB(input, 0);
    case C:
      return 0;
    default:
      return read_Number(input);
  }
}
Program* init_Program() {
  Program *program = (Program*)malloc(sizeof(Program));
  program->size = 0;
  program->capacity = INIT_PROGRAM_SIZE;
  program->data = (Instruction*)malloc(sizeof(Instruction)*INIT_PROGRAM_SIZE);
  return program;
}
Program* read_Program(Program *program, FILE *input) {
  int ch, chCount = 0, opCount = 0;
  Operation op;
  while(PROGRAM_FULL(program) || (ch=getc(input))!=EOF) {
    ++chCount;
    switch(ch) {
      case A:
        op = read_A(input);
        break;
      case B:
        op = read_B(input);
        break;
      case C:
        op = read_C(input);
        break;
    }
    if(op == ERROR) {
      fprintf(stderr, "error at %d byte\n", chCount);
      exit(1);
    }
    else {
      PROGRAM_PUSH_OP(program, op);
      if(op == PUSH) {
        PROGRAM_PUSH_NUM(program, read_Number(input));
      }
    }
  }
  return program;
}
 | 
最近のコメント