2016年2月22日月曜日

開発環境

  • OS X El Capitan - Apple (OS)
  • Emacs(Text Editor)
  • Java (実行環境)
  • Python 3.5(プログラミング言語)

コンピュータシステムの理論と実装 (Noam Nisan (著)、Shimon Schocken (著)、斎藤 康毅(翻訳)、オライリージャパン)の11章(コンパイラ#2:コード生成)、11.5(プロジェクト)を取り組んでみる。

11.5(プロジェクト)

コード(Emacs)

JackAnalyzer.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-


import os
import glob
import sys
import re


class JackTokenizer:

    def __init__(self, file):
        self.file = file
        self.next_ch = ''
        self.cur_token_type = ''
        self.cur_token = ''
        self.next_token_type = ''
        self.next_token = self.get_next_token()

    def get_next_token(self):
        token = ''
        if self.next_ch != '':
            c = self.next_ch
            self.next_ch = ''
        else:
            c = self.file.read(1)
        while re.match(r'\s', c):
            c = self.file.read(1)
        if c == '':
            return ''
        while True:
            if re.match(r'\s', c):
                return self.get_next_token()
            if c == '/':
                token += c
                c = self.file.read(1)
                if c == '/':
                    self.file.readline()
                    return self.get_next_token()
                if c == '*':
                    while True:
                        c = self.file.read(1)
                        if c == '*':
                            c = self.file.read(1)
                            if c == '/':
                                break
                    return self.get_next_token()
                self.next_ch = c
                self.next_token_type = 'SYMBOL'
                return token
            if re.match(r'[-{}()\[\].,;+*/&|<>=~]', c):
                token = c
                self.next_token_type = 'SYMBOL'
                return token
            if re.match(r'\d', c):
                token = c
                while True:
                    c = self.file.read(1)
                    if re.match(r'\d', c):
                        token += c
                    else:
                        self.next_ch = c
                        break
                self.next_token_type = 'INT_CONST'
                return token
            if c == '"':
                while True:
                    c = self.file.read(1)
                    if c == '"':
                        break
                    else:
                        token += c
                self.next_token_type = 'STRING_CONST'
                return token
            token = c
            while True:
                c = self.file.read(1)
                if re.match(r'[a-zA-Z0-9_]', c):
                    token += c
                else:
                    self.next_ch = c
                    break
            if token in ['class', 'constructor', 'function', 'method', 'field',
                         'static', 'var', 'int', 'char', 'boolean', 'void',
                         'true', 'false', 'null', 'this', 'let', 'do', 'if',
                         'else', 'while', 'return']:
                self.next_token_type = 'KEYWORD'
            else:
                self.next_token_type = 'IDENTIFIER'
            return token

    def has_more_tokens(self):
        return self.next_token != ''

    def advance(self):
        self.cur_token = self.next_token
        self.cur_token_type = self.next_token_type
        self.next_token = self.get_next_token()

    def token_type(self):
        return self.cur_token_type

    def keyword(self):
        return self.cur_token

    def symbol(self):
        return self.cur_token

    def identifier(self):
        return self.cur_token

    def int_val(self):
        return int(self.cur_token)

    def string_val(self):
        return self.cur_token


class CompilationEngine:
    KIND = dict(STATIC='static', FIELD='this', ARG='argument', VAR='local')

    def __init__(self, inf, outf):
        self.tokenizer = JackTokenizer(inf)
        self.vm_writer = VMWriter(outf)
        # self.outf = outf
        self.symtab = SymbolTable()
        self.label_no = 0
        self.class_name = ''

    def make_label(self):
        label = 'label{0}'.format(self.label_no)
        self.label_no += 1
        return label

    def compile_class(self):
        self.tokenizer.advance()
        # print('<class>', file=outf)
        # print('<keyword>', self.tokenizer.keyword(),, '</keyword>',
        # file=outf)
        self.tokenizer.advance()
        self.class_name = self.tokenizer.identifier()
        # print('<identifier> {0} </identifier>'.format(
        #     self.tokenizer.identifier()), file=outf)
        self.tokenizer.advance()
        # print('<symbol> { </symbol>', file=self.outf)
        self.tokenizer.advance()
        # classVarDec*
        while self.tokenizer.token_type() == 'KEYWORD' and \
                self.tokenizer.keyword() in ['static', 'field']:
            # print('<classVarDec>', file=self.outf)
            self.compile_class_var_dec()
            # print('</classVarDec>', file=self.outf)
        # subrotuineDec*
        while self.tokenizer.token_type() == 'KEYWORD' and \
                self.tokenizer.keyword() in ['constructor', 'function',
                                             'method']:
            # print('<subroutineDec>', file=self.outf)
            self.compile_subroutine()
            # print('</subroutineDec>', file=self.outf)
        # print('<symbol> } </symbol>', file=self.outf)
        # print('</class>', file=self.outf)

    def compile_class_var_dec(self):
        # static or field
        id_kind = self.tokenizer.keyword()
        # print('<keyword> {0} </keyword>'.format(self.tokenizer.keyword()),
        #       file=outf)
        self.tokenizer.advance()
        t = self.tokenizer.token_type()
        # type
        if t == 'KEYWORD':
            id_type = self.tokenizer.keyword()
            # print('<keyword> {0} </keyword>'.format(
            #     self.tokenizer.keyword()), file=self.outf)
        elif t == 'IDENTIFIER':
            id_type = self.tokenizer.identifier()
            # print('<identifier> {0} </identifier>'.format(
            #     self.tokenizer.identifier()), file=self.outf)
        else:
            raise Exception('compile class: {0}'.format(t))
        # varName
        self.tokenizer.advance()
        id_name = self.tokenizer.identifier()
        self.symtab.define(id_name, id_type, id_kind.upper())
        # print('<identifier> {0} </identifier>'.format(
        #     self.tokenizer.identifier()), file=self.outf)
        # (',' varName)*
        self.tokenizer.advance()
        sym = self.tokenizer.symbol()
        while sym == ',':
            # print('<symbol> {0} </symbol>'.format(sym), file=self.outf)
            self.tokenizer.advance()
            id_name = self.tokenizer.identifier()
            self.symtab.define(id_name, id_type, id_kind.upper())
            # print('<identifier> {0} </identifier>'.format(
            #     self.tokenizer.identifier()), file=self.outf)
            self.tokenizer.advance()
            sym = self.tokenizer.symbol()
        if sym == ';':
            pass
            # print('<symbol> {0} </symbol>'.format(sym), file=self.outf)
        else:
            raise Exception('compile error: {0}'.format(sym))
        self.tokenizer.advance()

    def compile_subroutine(self):
        self.symtab.start_subroutine()
        sub_kind = self.tokenizer.keyword()
        # print('<keyword> {0} </keyword>'.format(self.tokenizer.keyword()),
        #       file=self.outf)
        if sub_kind == 'method':
            # 他の変数名と衝突しないように、無効な変数名を設定
            self.symtab.define('0', self.class_name, 'ARG')
        self.tokenizer.advance()
        # ('void' | type)
        # if self.tokenizer.token_type() == 'KEYWORD':
        #     print('<keyword> {0} </keyword>'.format(self.tokenizer.keyword()),
        #           file=self.outf)
        # else:
        #     print('<identifier> {0} </identifier>'.format(
        #         self.tokenizer.identifier()), file=self.outf)
        # subroutineName
        self.tokenizer.advance()
        sub_name = self.tokenizer.identifier()
        # print('<identifier> {0} </identifier>'.format(
        #     self.tokenizer.identifier()), file=self.outf)
        self.tokenizer.advance()
        # print('<symbol> ( </symbol>', file=self.outf)
        self.tokenizer.advance()
        # print('<parameterList>', file=self.outf)
        if self.tokenizer.token_type() != 'SYMBOL':
            self.compile_parameter_list()
        # print('</parameterList>', file=self.outf)
        # print('<symbol> ) </symbol>', file=self.outf)
        # subroutineBody
        # print('<subroutineBody>', file=self.outf)
        self.tokenizer.advance()
        # print('<symbol> { </symbol>', file=self.outf)
        # varDec*
        self.tokenizer.advance()
        while self.tokenizer.token_type() == 'KEYWORD' and \
                self.tokenizer.keyword() == 'var':
            self.compile_var_dec()
        n_locals = self.symtab.var_count('VAR')
        self.vm_writer.write_function(
            '{0}.{1}'.format(self.class_name, sub_name), n_locals)
        if sub_kind == 'method':
            self.vm_writer.write_push('argument', 0)
            self.vm_writer.write_pop('pointer', 0)
        elif sub_kind == 'constructor':
            n_fields = self.symtab.var_count('FIELD')
            self.vm_writer.write_push('constant', n_fields)
            self.vm_writer.write_call('Memory.alloc', 1)
            self.vm_writer.write_pop('pointer', 0)
        # statements
        # print('<statements>', file=self.outf)
        self.compile_statements()
        # print('</statements>', file=self.outf)
        # print('<symbol> } </symbol>', file=self.outf)
        self.tokenizer.advance()
        # print('</subroutineBody>', file=self.outf)

    def compile_parameter_list(self):
        if self.tokenizer.token_type() == 'SYMBOL' and \
           self.tokenizer.symbol() == ')':
            return
        if self.tokenizer.token_type() == 'KEYWORD':
            id_type = self.tokenizer.keyword()
            # print('<keyword> {0} </keyword>'.format(self.tokenizer.keyword()),
            #       file=self.outf)
        else:
            id_type = self.tokenizer.identifier()
            # print('<identifier> {0} </identifier>'.format(
            #     self.tokenizer.identifier()), file=self.outf)
        self.tokenizer.advance()
        id_name = self.tokenizer.identifier()
        self.symtab.define(id_name, id_type, 'ARG')
        # print('<identifier> {0} </identifier>'.format(
        #     self.tokenizer.identifier()), file=self.outf)
        self.tokenizer.advance()
        while self.tokenizer.symbol() != ')':
            # print('<symbol> , </symbol>', file=self.outf)
            self.tokenizer.advance()
            if self.tokenizer.token_type() == 'KEYWORD':
                id_type = self.tokenizer.keyword()
                # print('<keyword> {0} </keyword>'.format(
                #     self.tokenizer.keyword()), file=self.outf)
            else:
                id_type = self.tokenizer.identifier()
                # print('<identifier> {0} </identifier>'.format(
                #     self.tokenizer.identifier()), file=self.outf)
            self.tokenizer.advance()
            id_name = self.tokenizer.identifier()
            self.symtab.define(id_name, id_type, 'ARG')
            # print('<identifier> {0} </identifier>'.format(
            #     self.tokenizer.identifier()), file=self.outf)
            self.tokenizer.advance()

    def compile_var_dec(self):
        # print('<varDec>', file=self.outf)
        # print('<keyword> var </keyword>', file=self.outf)
        self.tokenizer.advance()
        if self.tokenizer.token_type() == 'KEYWORD':
            id_type = self.tokenizer.keyword()
            # print('<keyword> {0} </keyword>'.format(
            #     self.tokenizer.keyword()), file=self.outf)
        else:
            id_type = self.tokenizer.identifier()
            # print('<identifier> {0} </identifier>'.format(
            #     self.tokenizer.identifier()), file=self.outf)
        self.tokenizer.advance()
        id_name = self.tokenizer.identifier()
        self.symtab.define(id_name, id_type, 'VAR')
        # print('<identifier> {0} </identifier>'.format(
        #     self.tokenizer.identifier()), file=self.outf)
        self.tokenizer.advance()
        while self.tokenizer.symbol() != ';':
            # print('<symbol> , </symbol>', file=self.outf)
            self.tokenizer.advance()
            id_name = self.tokenizer.identifier()
            self.symtab.define(id_name, id_type, 'VAR')
            # print('<identifier> {0} </identifier>'.format(
            #     self.tokenizer.identifier()), file=self.outf)
            self.tokenizer.advance()
        # print('<symbol>;</symbol>', file=self.outf)
        self.tokenizer.advance()
        # print('</varDec>', file=self.outf)

    def compile_statements(self):
        while True:
            k = self.tokenizer.keyword()
            if k == 'let':
                # print('<letStatement>', file=self.outf)
                self.compile_let()
                # print('</letStatement>', file=self.outf)
            elif k == 'if':
                # print('<ifStatement>', file=self.outf)
                self.compile_if()
                # print('</ifStatement>', file=self.outf)
            elif k == 'while':
                # print('<whileStatement>', file=self.outf)
                self.compile_while()
                # print('</whileStatement>', file=self.outf)
            elif k == 'do':
                # print('<doStatement>', file=self.outf)
                self.compile_do()
                # print('</doStatement>', file=self.outf)
            elif k == 'return':
                # print('<returnStatement>', file=self.outf)
                self.compile_return()
                # print('</returnStatement>', file=self.outf)
            else:
                break

    def compile_do(self):
        # print('<keyword>do</keyword>', file=self.outf)
        # subroutineCall
        n_args = 0
        self.tokenizer.advance()
        i = self.tokenizer.identifier()
        # print('<identifier>{0}</identifier>'.format(
        #     self.tokenizer.identifier()), file=self.outf)
        self.tokenizer.advance()
        sym = self.tokenizer.symbol()
        # print('<symbol>{0}</symbol>'.format(sym), file=self.outf)
        self.tokenizer.advance()
        if sym == '.':
            sub_name = self.tokenizer.identifier()
            kind = self.symtab.kind_of(i)
            if kind is None:
                cls_name = i
                sub_name = cls_name + '.' + sub_name
            else:
                index = self.symtab.index_of(i)
                self.vm_writer.write_push(CompilationEngine.KIND[kind], index)
                sub_name = self.symtab.type_of(i) + '.' + sub_name
                n_args += 1

            # print('<identifier>{0}</identifier>'.format(
            #     self.tokenizer.identifier()), file=self.outf)
            self.tokenizer.advance()
            # print('<symbol>(</symbol>', file=self.outf)
            self.tokenizer.advance()
        elif sym == '(':
            sub_name = self.class_name + '.' + i
            n_args += 1
            self.vm_writer.write_push('pointer', 0)
        else:
            raise Exception()
        # print('<expressionList>', file=self.outf)
        n_args += self.compile_expression_list()
        # print('</expressionList>', file=self.outf)
        self.vm_writer.write_call(sub_name, n_args)
        # print('<symbol>)</symbol>', file=self.outf)
        self.tokenizer.advance()
        # print('<symbol>;</symbol>', file=self.outf)
        self.vm_writer.write_pop('temp', 0)
        self.tokenizer.advance()

    def compile_let(self):
        # print('<keyword> let </keyword>', file=self.outf)
        self.tokenizer.advance()
        name = self.tokenizer.identifier()
        kind = self.symtab.kind_of(name)
        index = self.symtab.index_of(name)
        # print('<identifier> {0} </identifier>'.format(
        #     self.tokenizer.identifier()), file=self.outf)
        self.tokenizer.advance()
        if self.tokenizer.symbol() != '[':
            # print('<symbol> = </symbol>', file=self.outf)
            self.tokenizer.advance()
            # print('<expression>', file=self.outf)
            self.compile_expression()
            # print('</expression>', file=self.outf)
            self.vm_writer.write_pop(CompilationEngine.KIND[kind], index)
        elif self.tokenizer.symbol() == '[':
            self.vm_writer.write_push(CompilationEngine.KIND[kind], index)
            # print('<symbol> [ </symbol>', file=self.outf)
            self.tokenizer.advance()
            # print('<expression>', file=self.outf)
            self.compile_expression()
            # print('</expression>', file=self.outf)
            # print('<symbol> ] </symbol>', file=self.outf)
            self.vm_writer.write_arithmetic('ADD')
            self.tokenizer.advance()
            # print('<symbol> = </symbol>', file=self.outf)
            self.tokenizer.advance()
            # print('<expression>', file=self.outf)
            self.compile_expression()
            # self.vm_writer.write_push('temp', 0)
            # self.vm_writer.write_pop('pointer', 1)
            # self.vm_writer.write_pop('that', 0)
            self.vm_writer.write_pop('temp', 0)
            self.vm_writer.write_pop('pointer', 1)
            self.vm_writer.write_push('temp', 0)
            self.vm_writer.write_pop('that', 0)
            # print('</expression>', file=self.outf)
        else:
            raise Exception()
        # print('<symbol>;</symbol>', file=self.outf)
        self.tokenizer.advance()

    def compile_while(self):
        l1 = self.make_label()
        l2 = self.make_label()
        self.vm_writer.write_label(l1)
        # print('<keyword> while </keyword>', file=self.outf)
        self.tokenizer.advance()
        # print('<symbol>(</symbol>', file=self.outf)
        self.tokenizer.advance()
        # print('<expression>', file=self.outf)
        self.compile_expression()
        self.vm_writer.write_push('constant', 0)
        self.vm_writer.write_arithmetic('EQ')
        self.vm_writer.write_if(l2)
        # print('</expression>', file=self.outf)
        # print('<symbol>)</symbol>', file=self.outf)
        self.tokenizer.advance()
        # print('<symbol>{</symbol>', file=self.outf)
        self.tokenizer.advance()
        # print('<statements>', file=self.outf)
        self.compile_statements()
        # print('</statements>', file=self.outf)
        # print('<symbol>}</symbol>', file=self.outf)
        self.vm_writer.write_goto(l1)
        self.vm_writer.write_label(l2)
        self.tokenizer.advance()

    def compile_return(self):
        # print('<keyword> return </keyword>', file=self.outf)
        self.tokenizer.advance()
        if self.tokenizer.token_type() != 'SYMBOL' and \
           self.tokenizer.symbol() != ';':
            # print('<expression>', file=self.outf)
            self.compile_expression()
            # print('</expression>', file=self.outf)
        else:
            self.vm_writer.write_push('constant', 0)
        self.vm_writer.write_return()
        # print('<symbol>;</symbol>', file=self.outf)
        self.tokenizer.advance()

    def compile_if(self):
        l1 = self.make_label()
        l2 = self.make_label()
        # print('<keyword>if</keyword>', file=self.outf)
        self.tokenizer.advance()
        # print('<symbol>(</symbol>', file=self.outf)
        self.tokenizer.advance()
        # print('<expression>', file=self.outf)
        self.compile_expression()
        self.vm_writer.write_push('constant', 0)
        self.vm_writer.write_arithmetic('EQ')
        self.vm_writer.write_if(l1)
        # print('</expression>', file=self.outf)
        # print('<symbol>)</symbol>', file=self.outf)
        self.tokenizer.advance()
        # print('<symbol>{</symbol>', file=self.outf)
        self.tokenizer.advance()
        # print('<statements>', file=self.outf)
        self.compile_statements()
        self.vm_writer.write_goto(l2)
        # print('</statements>', file=self.outf)
        # print('<symbol>}</symbol>', file=self.outf)
        self.tokenizer.advance()
        self.vm_writer.write_label(l1)
        if self.tokenizer.token_type() == 'KEYWORD' and \
           self.tokenizer.keyword() == 'else':
            # self.vm_writer.write_label(l1)
            # print('<keyword>else</keyword>', file=self.outf)
            self.tokenizer.advance()
            # print('<symbol>{</symbol>', file=self.outf)
            self.tokenizer.advance()
            # print('<statements>', file=self.outf)
            self.compile_statements()
            # print('</statements>', file=self.outf)
            # print('<symbol>}</symbol>', file=self.outf)
            self.tokenizer.advance()
        self.vm_writer.write_label(l2)

    def compile_expression(self):
        # print('<term>', file=self.outf)
        self.compile_term()
        # print('</term>', file=self.outf)
        while True:
            if self.tokenizer.token_type() == 'SYMBOL':
                s = self.tokenizer.symbol()
                if s in ['+', '-', '*', '/', '&', '|', '<', '>', '=']:
                    # print('<symbol>{0}</symbol>'.format(s), file=self.outf)
                    self.tokenizer. advance()
                    # print('<term>', file=self.outf)
                    self.compile_term()
                    # print('</term>', file=self.outf)
                    if s == '+':
                        self.vm_writer.write_arithmetic('ADD')
                    elif s == '-':
                        self.vm_writer.write_arithmetic('SUB')
                    elif s == '*':
                        self.vm_writer.write_call('Math.multiply', 2)
                    elif s == '/':
                        self.vm_writer.write_call('Math.divide', 2)
                    elif s == '&':
                        self.vm_writer.write_arithmetic('AND')
                    elif s == '|':
                        self.vm_writer.write_arithmetic('OR')
                    elif s == '<':
                        self.vm_writer.write_arithmetic('LT')
                    elif s == '>':
                        self.vm_writer.write_arithmetic('GT')
                    elif s == '=':
                        self.vm_writer.write_arithmetic('EQ')

                else:
                    break
            else:
                break

    def compile_term(self):
        t = self.tokenizer.token_type()
        if t == 'INT_CONST':
            self.vm_writer.write_push('constant', self.tokenizer.int_val())
            # print('<integerConstant>{0}</integerConstant>'.format(
            #     self.tokenizer.int_val()), file=self.outf)
            self.tokenizer.advance()
        elif t == 'STRING_CONST':
            s = self.tokenizer.string_val()
            self.vm_writer.write_push('constant', len(s))
            self.vm_writer.write_call('String.new', 1)
            for ch in s:
                self.vm_writer.write_push('constant', ord(ch))
                self.vm_writer.write_call('String.appendChar', 2)
            # print('<stringConstant>{0}</stringConstant>'.format(
            #     self.tokenizer.string_val()), file=self.outf)
            self.tokenizer.advance()
        elif t == 'KEYWORD':
            k = self.tokenizer.keyword()
            if k == 'true':
                self.vm_writer.write_push('constant', 1)
                self.vm_writer.write_arithmetic('NEG')
            elif k == 'false' or k == 'null':
                self.vm_writer.write_push('constant', 0)
            elif k == 'this':
                self.vm_writer.write_push('pointer', 0)
            else:
                raise Exception()
            # print('<keyword>{0}</keyword>'.format(self.tokenizer.keyword()),
            #       file=self.outf)
            self.tokenizer.advance()
        elif t == 'IDENTIFIER':
            # varName | varName '[' expression ']' | subroutineCall
            name = self.tokenizer.identifier()
            # print('<identifier>{0}</identifier>'.format(
            #     self.tokenizer.identifier()), file=self.outf)
            self.tokenizer.advance()
            if self.tokenizer.token_type() == 'SYMBOL':
                s = self.tokenizer.symbol()
                # varName '[' expression ']'
                if s == '[':
                    # print('<symbol>[</symbol>', file=self.outf)
                    self.tokenizer.advance()
                    kind = self.symtab.kind_of(name)
                    index = self.symtab.index_of(name)
                    self.vm_writer.write_push(
                        CompilationEngine.KIND[kind], index)
                    # print('<expression>', file=self.outf)
                    self.compile_expression()
                    self.vm_writer.write_arithmetic('ADD')
                    self.vm_writer.write_pop('pointer', 1)
                    self.vm_writer.write_push('that', 0)
                    # print('</expression>', file=self.outf)
                    # print('<symbol>]</symbol>', file=self.outf)
                    self.tokenizer.advance()
                # subroutineCall
                elif s in ['(', '.']:
                    n_args = 0
                    # print('<symbol>{0}</symbol>'.format(s), file=self.outf)
                    self.tokenizer.advance()
                    if s == '.':
                        sub_name = self.tokenizer.identifier()
                        kind = self.symtab.kind_of(name)
                        if kind is None:
                            cls_name = name
                            sub_name = cls_name + '.' + sub_name
                        else:
                            index = self.symtab.index_of(name)
                            self.vm_writer.write_push(
                                CompilationEngine.KIND[kind], index)
                            sub_name = self.symtab.type_of(name) + '.' + \
                                sub_name
                            n_args += 1
                        # print('<identifier>{0}</identifier>'.format(
                        #     self.tokenizer.identifier()), file=self.outf)
                        self.tokenizer.advance()
                    elif s == '(':
                        sub_name = self.class_name + '.' + name
                        n_args += 1
                        self.vm_writer.write_push('pointer', 0)
                    else:
                        raise Exception()
                    # print('<symbol>(</symbol>', file=self.outf)
                    self.tokenizer.advance()
                    # print('<expressionList>', file=self.outf)
                    n_args += self.compile_expression_list()
                    self.vm_writer.write_call(sub_name, n_args)
                    # print('</expressionList>', file=self.outf)
                    # print('<symbol>)</symbol>', file=self.outf)

                    self.tokenizer.advance()
                # varName
                else:
                    var_kind = self.symtab.kind_of(name)
                    var_index = self.symtab.index_of(name)
                    self.vm_writer.write_push(CompilationEngine.KIND[var_kind],
                                              var_index)

        elif t == 'SYMBOL':
            sym = self.tokenizer.symbol()
            # print('<symbol>{0}</symbol>'.format(sym), file=self.outf)
            self.tokenizer.advance()
            if sym in '(':
                # print('<expression>', file=self.outf)
                self.compile_expression()
                # print('</expression>', file=self.outf)
                # print('<symbol>)</symbol>', file=self.outf)
                self.tokenizer.advance()
            elif sym in ['-', '~']:
                # print('<term>', file=self.outf)
                self.compile_term()
                # print('</term>', file=self.outf)
                if sym == '-':
                    self.vm_writer.write_arithmetic('NEG')
                else:
                    self.vm_writer.write_arithmetic('NOT')
            else:
                raise Exception('compile term: {0}'.format(sym))
        else:
            raise Exception('compile term: {0}'.format(t))

    def compile_expression_list(self):
        n = 0
        if self.tokenizer.token_type() == 'SYMBOL' and \
           self.tokenizer.symbol() == ')':
            return n
        # print('<expression>', file=self.outf)
        self.compile_expression()
        n += 1
        # print('</expression>', file=self.outf)
        while self.tokenizer.token_type() == 'SYMBOL' and \
                self.tokenizer.symbol() == ',':
            # print('<symbol>,</symbol>', file=self.outf)
            self.tokenizer.advance()
            # print('<expression>', file=self.outf)
            self.compile_expression()
            n += 1
            # print('</expression>', file=self.outf)
        return n


class SymbolTable:
    static_scope = {}
    static_index = -1

    def __init__(self):
        self.field_scope = {}
        # SymbolTable.static_index = -1
        self.field_index = -1
        self.sub_scope = {}
        self.arg_index = -1
        self.var_index = -1

    def start_subroutine(self):
        self.sub_scope = {}
        self.arg_index = -1
        self.var_index = -1

    def define(self, name, type, kind):
        if kind == 'STATIC':
            scope = SymbolTable.static_scope
            SymbolTable.static_index += 1
            index = SymbolTable.static_index
        elif kind == 'FIELD':
            scope = self.field_scope
            self.field_index += 1
            index = self.field_index
        elif kind == 'ARG':
            scope = self.sub_scope
            self.arg_index += 1
            index = self.arg_index
        elif kind == 'VAR':
            scope = self.sub_scope
            self.var_index += 1
            index = self.var_index
        else:
            raise Exception()
        scope[name] = {'type': type, 'kind': kind, 'index': index}

    def var_count(self, kind):
        if kind == 'STATIC':
            return SymbolTable.static_index + 1
        if kind == 'FIELD':
            return self.field_index + 1
        if kind == 'ARG':
            return self.arg_index + 1
        if kind == 'VAR':
            return self.var_index + 1
        raise Exception()
        # if kind == 'STATIC' or kind == 'FIELD':
        #     scope = self.cls_scope
        # elif kind == 'ARG' or kind == 'VAR':
        #     scope = self.sub_scope
        # else:
        #     raise Exception()
        # return len([_ for _, v in scope.items() if v['kind'] == kind])

    def kind_of(self, name):
        if name in self.sub_scope:
            return self.sub_scope[name]['kind']
        if name in self.field_scope:
            return self.field_scope[name]['kind']
        if name in SymbolTable.static_scope:
            return self.static_scope[name]['kind']
        return None

    def type_of(self, name):
        if name in self.sub_scope:
            return self.sub_scope[name]['type']
        if name in self.field_scope:
            return self.field_scope[name]['type']
        if name in SymbolTable.static_scope:
            return SymbolTable.static_scope[name]['type']
        raise Exception()

    def index_of(self, name):
        if name in self.sub_scope:
            return self.sub_scope[name]['index']
        if name in self.field_scope:
            return self.field_scope[name]['index']
        if name in SymbolTable.static_scope:
            return SymbolTable.static_scope[name]['index']
        raise Exception('index_of: {0}'.format(name))


class VMWriter:

    def __init__(self, outf):
        self.outf = outf

    def write_push(self, segment, index):
        print('push', segment, index, file=self.outf)

    def write_pop(self, segment, index):
        print('pop', segment, index, file=self.outf)

    def write_arithmetic(self, command):
        print(command.lower(), file=self.outf)

    def write_label(self, label):
        print('label', label, file=self.outf)

    def write_goto(self, label):
        print('goto', label, file=self.outf)

    def write_if(self, label):
        print('if-goto', label, file=self.outf)

    def write_call(self, name, n_args):
        print('call', name, n_args, file=self.outf)

    def write_function(self, name, n_locals):
        print('function', name, n_locals, file=self.outf)

    def write_return(self):
        print('return', file=self.outf)

    def close(self):
        self.outf.close()

if __name__ == '__main__':
    source = sys.argv[1]
    filenames = []
    if os.path.isfile(source):
        filenames.append(source)
    elif os.path.isdir(source):
        filenames = glob.glob('{0}{1}*.jack'.format(source, os.path.sep))
    for filename in filenames:
        with open(filename) as inf, \
                open(filename.replace('.jack', '.vm'), 'w') as outf:
            compilation_engine = CompilationEngine(inf, outf)
            compilation_engine.compile_class()

入出力結果(Terminal, IPython)

$ make
./JackCompiler.py Seven
./JackCompiler.py ConvertToBin
./JackCompiler.py Square
./JackCompiler.py Pong
./JackCompiler.py ComplexArrays
$

0 コメント:

コメントを投稿