next up previous contents
Next: 7 Activation Records Up: 6 Semantic Analysis Previous: 6.2 Types and Type

6.3 Case Study: The Calculator Interpreter

In Section 4.2 we described the AST data structures for the calculator example. We are now ready to go back to the calculator parser calc.y to see how ASTs are constructed. Each nonterminal must have a type. This is accomplished using %type declarations in Bison. The type names must be declared as union choices in the %union part of the Bison file:

%union {
   char*       Tstring;
   ast*        Tast;
   ast_list*   Tast_list;
%type <Tstring> id
%type <Tast> exp
%type <Tast_list> expl
This indicates that each production for the nonterminal exp constructs a value of type ast* when is reduced. For example, the following line in the calculator parser:
    | exp PLUS exp      { $$ = mk_node(plus_exp,cons($1,cons($3,null))); }
constructs a new ast* for exp when the rule is reduced. This ast*, which is assigned to the variable $$, is a tree node with label plus_exp and two children, $1 and $3, which correspond to the ast* of the plus operands.

Recall that our calculator is an interpreter, rather than a compiler. The symbol table of an interpreter must bind each program variable to both its type and its value. For example, when the interpret encounters the variable x in a program, it must assert the type of x (eg, an integer) so it can tell if x is used correctly in the program. It also needs the value of x because it needs to evaluate x. A compiler would only need the type of x. Fortunately, our calculator has only one type: double floating points. So there is no need for type information since everything is expected to be (or be able to be converted to) a floating point. The calculator symbol table is a hash table with items of type symbol:

typedef struct symbol {
   char*          name;
   ast*           binding; 
   struct symbol* next;
} symbol;
#define symbol_table_size 97
symbol* symbol_table[symbol_table_size];
The binding is the actual value of the symbol, not its type. If we had multiple types, we should have had another attribute type of type ast* in symbol. The binding plays two roles: if the name is a variable name, then the binding is the actual value (a double floating point number) represented as a real_ast AST node. If the name is a function definition, then the binding is the AST of the actual definition.

To implement variable scoping, a scope stack is used as it is described in the previous section. The special marker -1 in the scope stack indicates a new scope. All the other entries are locations in the symbol table (ie, in the range 0..96):

#define scope_stack_length 1000
int scope_stack_top = 0;
int scope_stack[scope_stack_length];

/* a fast hashing function for strings */
int hash_key ( const char* s ) {
  long res = 0;
  int i=strlen(s)-1;
  for(; i>=0; i--)
     res = (res<<3) | (s[i]);
  return (res % symbol_table_size);

/* insert a new item in the symbol table */
void insert ( const char* key, ast* binding ) {
  int loc = hash_key(key);
  symbol* s = (symbol*) malloc(sizeof(symbol));
  s->name = (char*) malloc(strlen(key));
  s->binding = binding;
  s->next = symbol_table[loc];
  symbol_table[loc] = s;
  if (scope_stack_top >= scope_stack_length)
  {  error("stack overflow",mk_var(key));
  else scope_stack[scope_stack_top++] = loc;

/* lookup for an item in the symbol table */
symbol* lookup ( const char* key ) {
  int loc = hash_key(key);
  symbol* s = symbol_table[loc];
  for(; s != NULL; s=s->next)
     if (strcmp(s->name,key)==0)
        return s;
  return NULL;       // if not found

/* start a new environment */
void begin_scope () {
  if (scope_stack_top >= scope_stack_length)
  {  error("stack overflow",mk_int(0));
  else scope_stack[scope_stack_top++] = -1;

/* pop the last environment */
void end_scope () {
  int i = scope_stack_top-1;
  for(; scope_stack[i]>=0 && i>0; i--) {
     int loc = scope_stack[i];
     symbol_table[loc] = symbol_table[loc]->next;
  scope_stack_top = i;

Now we are ready to describe the interpreter. The interpreter, eval, evaluates any AST created by the parser and returns a double floating point number. When a variable is encountered, its value is retrieved from the symbol table:

double eval ( ast* e ) {
  switch (e->tag) {
  case int_ast:
       return (double) e->info.integer;
  case real_ast:
       return e->info.real;
  case var_ast: {
       symbol* s = lookup(e->info.variable);
       if (s == NULL)
          return error("Undefined variable",e);
       if (s->binding->tag != real_ast)
          return error("Name is not a variable",e);
       else return s->binding->info.real;
A function call can be either a primitive operation, such as an integer addition, or a call to a defined function. In either case, the function arguments must be evaluated before the operation/call is performed:
  case node_ast:
    if (e->info.node.tag == call_exp) {
       /* user-defined function */
       ast_list* arguments = null;
       ast_list* params;
       ast_list* r;
       ast* body;
       double res;
       symbol* s;
       s = lookup(e->info.node.arguments->elem->info.variable);
       if (s == NULL)
          return error("Undefined function",e->info.node.arguments->elem);
       if (s->binding->tag != node_ast || s->binding->info.node.tag != fnc_def)
          return error("Name is not a function",e->info.node.arguments->elem);
       body = s->binding->info.node.arguments->elem;
       params = s->binding->info.node.arguments->next;
       /* evaluate the function arguments */
       for(r = e->info.node.arguments->next; r != 0; r=r->next)
          arguments = cons(mk_real(eval(r->elem)),arguments);
       arguments = reverse(arguments);
       if (length(params) != length(arguments))
          return error("Wrong number of arguments",e);
       /* extend the environment by binding parameters (names) to arguments (values) */
       for(r = params; r != 0; r=r->next, arguments=arguments->next)
       /* evaluate the body under the new environment */
       res = eval(body);
       /* remove the functions' parameters from the current environment */
       return res;
    /* if-then-else expression must be dealt seperately since it is lazy evaluation */
    if (e->info.node.tag == if_exp && length(e->info.node.arguments) == 3)
       if (eval(e->info.node.arguments->elem) > 0)
          return eval(e->info.node.arguments->next->elem);
       else return eval(e->info.node.arguments->next->next->elem);
    /* function is the unary operation, not */
    if (e->info.node.tag==not_exp)
       if (eval(e->info.node.arguments->elem) > 0)
          return 0;
       else return 1;
    /* function is a binary arithmetic or boolean operation */
    if (length(e->info.node.arguments) == 2) {
       double left = eval(e->info.node.arguments->elem);
       double right = eval(e->info.node.arguments->next->elem);
       switch (e->info.node.tag) {
          case plus_exp: return left + right;
          case minus_exp: return left - right;
          case times_exp: return left * right;
          case div_exp: return left / right;
          case and_exp: return left && right;
          case or_exp: return left || right;
          case eq_exp: return left == right;
          case ne_exp: return left != right;
          case gt_exp: return left > right;
          case lt_exp: return left < right;
          case ge_exp: return left >= right;
          case le_exp: return left <= right;

next up previous contents
Next: 7 Activation Records Up: 6 Semantic Analysis Previous: 6.2 Types and Type
Leonidas Fegaras