コンピュータシステムの理論と実装10章

コンピュータシステムの理論と実装10章のパーサ完成しました。
10章と11章では、Jack言語というこの本のオリジナル言語のコンパイラ
作ります。10章では、パーサの部分を作成します。

jack-analyzerにファイルかディレクトリのパスを渡せば、パースした結果をXMLとして
出力します(10章ではテスト用に一旦XMLとして出力します。)

(jack-analyzer "test/xxx.jack") ;test/xxxT.xmlに出力される。

lex-advansにファイルポートを渡すとトークンを一つ返します

(lex-advans port);ファイルの内容が"x=10;"なら、最初は(identifier "x")を返す。

Jack言語はオブジェクトベースの言語で、クラスをパースするcomp-classからパースを始めます。

(comp-class p o) ;pは読み込みポートで、oは出力ポート

この本全部読み終わったら、Jack言語のかわりにLisp用のコンパイラを作ろうと思っています。

(use file.util)
(use sxml.serializer)

(define (read-number1 p lis)
  (let1 c (peek-char p)
	(cond ((eof-object? c)
	       (reverse lis))
	      ((char-numeric? c) 
	       (read-char p)
	       (read-number1  p (cons c lis)))
	      ((char-alphabetic? c)
	       (error "read-number1"))
	      (else
		(reverse lis)))))
	         
(define (read-number p)
  (list 'integerConstant
	(string->number
	  (list->string (read-number1 p '())))))

(define (read-number p)
  (list 'integerConstant
	  (list->string (read-number1 p '()))))

(define (read-string1 p lis)
  (let1 c (peek-char p)
	(cond ((eof-object? c) 
	       (error "read-string1"))
	      ((char=? c #\newline)
	       (error "read-string1 newline"))
	      ((char=? c #\")
	       (read-char p)
	       (reverse lis))
	      (else
		(read-char p)
		(read-string1 p (cons c lis))))))

(define (read-string p)
  (read-char p) ;"を読み飛ばす
  (list 'stringConstant
    (list->string (read-string1 p '()))))


(define (comp-symbol? c)
  (member c (string->list "{}()[].,\;+-*/&|<>=~")))


(define (read-symbol p)
  (let1 c (read-char p)
	;コメントの除去をここでやる
	(cond ((and (char=? c #\/) (char=? (peek-char p) #\/)) ;peek-charでpがeofならエラー
	       (read-char p)
	       (skip-comment p))
	      ((and (char=? c #\/) (char=? (peek-char p) #\*)) ;peek-charでpがeofならエラー
	       (read-char p)
	       (skip-comment*/ p))
	      (else
	       (list 'symbol (x->string c))))));sxmlとして扱うため、文字cをx->stringで文字列に変換

(define (comp-id? c)
  (or (char-alphabetic? c) (char=? c #\_)))

(define (read-id1 p lis)
  (let1 c (peek-char p)
	(cond ((eof-object? c) 
	       (reverse lis))
	      ((or (char-alphabetic? c) (char-numeric? c) (char=? c #\_))
	       (read-char p)
	       (read-id1 p (cons c lis)))
	      (else (reverse lis)))))

(define (read-id p)
  (let1 id (list->string (read-id1 p '()))
	(if (member id '("class" "constructor" "function" "method"
			 "field" "static" "var" "int" "char" 
			 "boolean" "void" "true" "false" "null"
			 "this" "let" "do" "if" "else" "while" "return"))
	    (list 'keyword id)
	    (list 'identifier id))))

(define (skip-space p)
  (let1 c (peek-char p)
	(cond ((eof-object? c) '())
	      ((char-whitespace? c)
	       (read-char p)
	       (skip-space p)))))
(define (skip-comment p)
  (let1 c (peek-char p)
	(cond ((eof-object? c) '(comment))
	      ((not (char=? c #\newline))
	       (read-char p)
	       (skip-comment p))
	      (else '(comment))
	      )))

(define (skip-comment*/ p)
  (let1 c (peek-char p)
	(cond ((eof-object? c) '(comment))
	      ((char=? c #\*)
	       (read-char p)
	       (if (char=? (peek-char p) #\/)
		 (begin (read-char p) 
			'(comment))
		 (skip-comment*/ p)))
	      (else
		(read-char p)
		(skip-comment*/ p)))))

(define (lex-advans1 p)
  (let1 c (peek-char p)
	(cond ((eof-object? c) '())
	      ((comp-symbol? c) (read-symbol p))
	      ((char=? c #\") (read-string p))
	      ((char-numeric? c) (read-number p))
	      ((comp-id? c) (read-id p))
	      ((char-whitespace? c) 
	       (skip-space p)
	       (lex-advans p))
	      (else (error "lex-advans1"))
	      )))
(define (lex-advans p)
  (let1 token (lex-advans1 p)
	(if (equal? token '(comment))
	  (lex-advans p)
	  token)))

(define current-token '())
(define next-token '())

(define (next-token-set! p)
  (set! current-token (lex-advans p)))

(define (next-token-set! p)
  (set! current-token next-token)
  (set! next-token (lex-advans p)))

(define (token-init p)
  (set! current-token (lex-advans p))
  (set! next-token (lex-advans p)))

(define (token-keyword? tok)
  (eq? (car tok) 'keyword))

(define (token-symbol? tok)
  (eq? (car tok) 'symbol))

(define (token-integerConstant? tok)
  (eq? (car tok) 'integerConstant))

(define (token-stringConstant? tok)
  (eq? (car tok) 'stringConstant))

(define (token-identifier? tok)
  (eq? (car tok) 'identifier))



(define (token->xml-output tok o)
  (display (string-append "<" (symbol->string (car tok)) ">"
			  (cond ((string=? (cadr tok) "<") "&lt;")
				((string=? (cadr tok) ">") "&gt;")
				((string=? (cadr tok) "&") "&amp;")
				(else (cadr tok)))
			  "</" (symbol->string (car tok)) ">\n") o))

(define (comp-term p o)
  (cond	((token-integerConstant? current-token)
	 (display "<term>\n" o) (token->xml-output current-token o) (display "</term>\n" o)
	 (next-token-set! p))
	((token-stringConstant? current-token)
	 (display "<term>\n" o) (token->xml-output current-token o) (display "</term>\n" o)
	 (next-token-set! p))
	((string=? (cadr current-token) "(")
	 (display "<term>\n" o)
	 (token-output p o "(")
	 (comp-expression p o)
	 (token-output p o ")")
	 (display "</term>\n" o))
	((member (cadr current-token) '("true" "false" "null" "this"))
	 (display "<term>\n" o)
	 (token-word?-output p o 'keyword)
	 (display "</term>\n" o))
	((member (cadr current-token) '("-" "~"))
	 (display "<term>\n" o)
	 (token-word?-output p o 'symbol)
	 (comp-term p o)
	 (display "</term>\n" o))
	((token-identifier? current-token)
	 (display "<term>\n" o)
	 (cond ((string=? (cadr next-token) "[")
		(token-word?-output p o 'identifier)
		(token-output p o "[")
		(comp-expression p o)
		(token-output p o "]"))
	       ((member (cadr next-token) '("." "("))
		(comp-subroutineCall p o))
	       (else (token-word?-output p o 'identifier)))
	 (display "</term>\n" o))
	(else 
	  (error "comp-term")) 
	))


(define (comp-expression p o)
  (display "<expression>\n" o)
  (comp-term p o)
  (comp-expression1 p o)
  (display "</expression>\n" o))

(define (comp-expression1 p o)
  (if (member (cadr current-token) '("+" "-" "*" "/" "&" "|"
				     "<" ">" "="))
    (begin (token-word?-output p o 'symbol)
	   (comp-term p o)
	   (comp-expression1 p o))))

(define (comp-expressionList p o)
  (display "<expressionList>\n" o)
  (if (not (string=? (cadr current-token) ")"))
    (begin (comp-expression p o)
	   (comp-expressionList1 p o)))
  (display "</expressionList>\n" o))

(define (comp-expressionList1 p o)
  (cond ((string=? (cadr current-token) ",")
         (token->xml-output current-token o)
	 (next-token-set! p)
	 (comp-expression p o)
	 (comp-expressionList1 p o))))

(define (comp-returnStatement p o)
  (display "<returnStatement>\n" o)
  (token->xml-output current-token o)
  (next-token-set! p)
  (if (not (string=? (cadr current-token)  ";"))
    (comp-expression p o))
  (if (string=? (cadr current-token) ";")
    (token->xml-output current-token o)
    (error "comp-returnStatement"))
  (next-token-set! p)
  (display "</returnStatement>\n" o))

(define (comp-subroutineCall p o)
  (cond ((token-identifier? current-token)
	 (token->xml-output current-token o)
	 (next-token-set! p)
	 (cond ((string=? (cadr current-token) "(")
		(token->xml-output current-token o)
		(next-token-set! p)
		(comp-expressionList p o))
	       ((string=? (cadr current-token) ".")
		(token->xml-output current-token o) ;"<symbol> . </symbol>"を出力
		(next-token-set! p)
		(token->xml-output current-token o) ;"<identifire> メソッド名 </identifire>"を出力
		(next-token-set! p)
		(token->xml-output current-token o) ;"<symbol> ( </symbol>"を出力
		(next-token-set! p)
		(comp-expressionList p o))
	       (else (error "subroutineCall")))
	 (if (string=? (cadr current-token) ")")
	   (token->xml-output current-token o)
           (error "subroutineCall"))
	 (next-token-set! p))
	(else
	  (error "subroutineCall"))))

(define (comp-doStatement p o)
  (display "<doStatement>\n" o)
  (token->xml-output current-token o)
  (next-token-set! p)
  (comp-subroutineCall p o)
  (token->xml-output current-token o)
  (display "</doStatement>\n" o)
  (next-token-set! p))

(define (token-output p o str)
  (if (string=? (cadr current-token) str)
    (begin (token->xml-output current-token o)
	   (next-token-set! p))
    (error (string-append "not " str))))


(define (token-word?-output p o sym)
  (if (eq? (car current-token) sym)
    (begin (token->xml-output current-token o)
	   (next-token-set! p))
    (error current-token))) 


(define (comp-whileStatement p o)
  (display "<whileStatement>\n" o)
  (token-output p o "while")
  (token-output p o "(")
  (comp-expression p o)
  (token-output p o ")")
  (token-output p o "{")
  (comp-statements p o)
  (token-output p o "}")
  (display "</whileStatement>\n" o))

  
(define (comp-letStatement p o)
  (display "<letStatement>\n" o)
  (token-output p o "let")
  (token-word?-output p o 'identifier)
  (if (string=? (cadr current-token) "[")
    (begin (token->xml-output current-token o)
	   (next-token-set! p)
	   (comp-expression p o)
	   (token-output p o "]")))
  (token-output p o "=")
  (comp-expression p o)
  (token-output p o ";")
  (display"</letStatement>\n" o))

(define (comp-ifStatement p o)
  (display "<ifStatement>\n" o)
  (token-output p o "if")
  (token-output p o "(")
  (comp-expression p o)
  (token-output p o ")")
  (token-output p o "{")
  (comp-statements p o)
  (token-output p o "}")
  (if (string=? (cadr current-token) "else")
    (begin (token-output p o "else")
           (token-output p o "{")
	   (comp-statements p o)
	   (token-output p o "}")))
  (display "</ifStatement>\n" o))


(define (comp-statements1 p o)
  (cond ((string=? (cadr current-token) "return")
	 (comp-returnStatement p o)
	 (comp-statements1 p o))
	((string=? (cadr current-token) "do")
	 (comp-doStatement p o)
	 (comp-statements1 p o))
	((string=? (cadr current-token) "let")
	 (comp-letStatement p o)
	 (comp-statements1 p o))
	((string=? (cadr current-token) "if")
	 (comp-ifStatement p o)
	 (comp-statements1 p o))
	((string=? (cadr current-token) "while")
	 (comp-whileStatement p o)
	 (comp-statements1 p o))


	)

  )

(define (comp-statements p o)
  (display "<statements>\n" o)
  (comp-statements1 p o)
  (display "</statements>\n" o))

(define (comp-type? token)
  (or (member (cadr token) '("int" "char" "boolean"))
      (token-identifier? token)))

(define (comp-varDec1 p o)
  (if (string=? (cadr current-token) ",")
    (begin (token-output p o ",")
	   (token-word?-output p o 'identifier)
	   (comp-varDec1 p o))))

(define (comp-varDec p o)
  (display "<varDec>\n" o)
  (token-output p o "var")
  (if (comp-type? current-token)
    (token->xml-output current-token o)
    (error "comp-varDec"))
  (next-token-set! p)
  (token-word?-output p o 'identifier)
  (comp-varDec1 p o)
  (token-output p o ";")
  (display "</varDec>\n" o))

(define (comp-varDec* p o)
  (if (string=? (cadr current-token) "var")
    (begin (comp-varDec p o)
	   (comp-varDec* p o))))

(define (comp-classVarDec p o)
  (display "<classVarDec>\n" o)
  (if (member (cadr current-token) '("static" "field"))
    (token->xml-output current-token o)
    (error "comp-classVarDec"))
  (next-token-set! p)

  (if (comp-type? current-token)
   (token->xml-output current-token o)
   (error "comp-varDec"))
  (next-token-set! p)

  (token-word?-output p o 'identifier)
  (comp-varDec1 p o)
  (token-output p o ";")
  (display "</classVarDec>\n" o))

(define (comp-classVarDec* p o)
  (if (member (cadr current-token) '("static" "field"))
    (begin (comp-classVarDec p o)
	   (comp-classVarDec* p o))))


(define (comp-subroutineBody p o)
  (display "<subroutineBody>\n" o)
  (token-output p o "{")
  (comp-varDec* p o)
  (comp-statements p o)
  (token-output p o "}")
  (display "</subroutineBody>\n" o))

(define (comp-parameterList p o)
  (display "<parameterList>\n" o)
  (cond ((string=? (cadr current-token) ")") '())
	((comp-type? current-token)
	 (token->xml-output current-token o)
	 (next-token-set! p)
	 (token-word?-output p o 'identifier)
	 (comp-parameterList1 p o)) 
	(else
	  (error "comp-parameterList")))
  (display "</parameterList>\n" o))

(define (comp-parameterList1 p o)
  (if (string=? (cadr current-token) ",")
    (begin (token-output p o ",")
	   (if (comp-type? current-token)
	     (token->xml-output current-token o)
	     (error "comp-parameterList1 type"))
	   (next-token-set! p)
	   (token-word?-output p o 'identifier)
	   (comp-parameterList1 p o))))

(define (comp-subroutineDec p o)
  (cond ((member (cadr current-token) '("constructor" "function" "method"))
	 (display "<subroutineDec>\n" o)
	 (token->xml-output current-token o)
	 (next-token-set! p)
	 (if (or (string=? (cadr current-token) "void")
		 (comp-type? current-token))
	   (begin (token->xml-output current-token o)
		  (next-token-set! p)
		  (token-word?-output p o 'identifier)
		  (token-output p o "(")
		  (comp-parameterList p o)
		  (token-output p o ")")
		  (comp-subroutineBody p o)
		  (display "</subroutineDec>\n" o))
	   (error "comp-subroutineDec not type, not void")))
	(else 
	  (error "comp-subroutineDec not constructor, not function, not method"))))

(define (comp-subroutineDec* p o)
  (if (member (cadr current-token) '("constructor" "function" "method"))
    (begin (comp-subroutineDec p o)
	   (comp-subroutineDec* p o))))

  
(define (comp-class p o)
  (display "<class>\n" o)
  (token-output p o "class")
  (token-word?-output p o 'identifier)
  (token-output p o "{")
  (comp-classVarDec* p o)
  (comp-subroutineDec* p o)
  (token-output p o "}")
  (display "</class>\n" o))

(define (comp-test str f)
  (call-with-input-string str
    (lambda (p)
      (token-init p)
      (f p (current-output-port)))))


(define (lex-test21 p lis)
  (let1 c (peek-char p)
	(if (eof-object? c)
	  (srl:sxml->xml (cons 'tokens (remove null? (reverse lis))))
	  (lex-test21 p (cons (lex-advans p) lis)))))
(define (lex-test2 p)
  (lex-test21 p '()))

;アウトプット用のパスを得る
;例:"text/xxx.jack" から "test/xxxT.xml" 
(define (jack->xml-path-name1 path)
  (receive (p name k)
	   (decompose-path path)
	   (string-append p "/" name "T.xml")))


(define (jack-analyzer1 path)
  (call-with-input-file path
    (lambda (p)
      (token-init p)
      (call-with-output-file (jack->xml-path-name1 path)
	(lambda (o)
	  (comp-class p o))))))

(define (jack-analyzer path)
  (if (and (path-extension path) (string=? (path-extension path) "jack"))
    (jack-analyzer1 path)
    (let1 file-list (remove (lambda (s) (not (and (path-extension s)
						  (string=? (path-extension s) "jack")))) 
			    (sys-readdir path)) ;ディレクトリの中にある.jackファイルの名前の文字列だけを取り出す
	  (for-each (lambda (s)
		      (jack-analyzer1 (string-append path "/" s))) 
		    file-list))))

(define (lex-test3 path)
  (call-with-input-file path
   (lambda (p)
     (let1 xml-txt (lex-test2 p)
	(call-with-output-file (jack->xml-path-name1 path)
	 (lambda (p2)
	   (display xml-txt p2)))))))

(define test-str "if (x < 153)
		   {let city=\"Paris\";}")

(define lex-test call-with-input-string)