Counter: 2859,
today: 1,
yesterday: 0
Chapter 2 の PROGRAM
PROGRAM
ML-lex を使うので ocamllex とはどうがんばっても異なるものになってしまう。
とりあえずこんな感じになった。
type pos = int
type lexresult = Tokens.token
val lineNum = ErrorMsg.lineNum
val linePos = ErrorMsg.linePos
fun err(p1,p2) = ErrorMsg.error p1
val strTerm = ref ""
val currState = ref 0
val commentLv = ref 0
fun eof() = let val pos = hd(!linePos)
in if !currState > 0
then ErrorMsg.error pos ("String or Comment not concluded.")
else (); Tokens.EOF(pos,pos)
end
%%
%s COMMENT STRINGS IGNORES;
%%
<INITIAL>type => (Tokens.TYPE(yypos,yypos+4));
<INITIAL>var => (Tokens.VAR(yypos,yypos+3));
<INITIAL>function => (Tokens.FUNCTION(yypos,yypos+8));
<INITIAL>break => (Tokens.BREAK(yypos,yypos+5));
<INITIAL>of => (Tokens.OF(yypos,yypos+2));
<INITIAL>end => (Tokens.END(yypos,yypos+3));
<INITIAL>in => (Tokens.IN(yypos,yypos+2));
<INITIAL>nil => (Tokens.NIL(yypos,yypos+3));
<INITIAL>let => (Tokens.LET(yypos,yypos+3));
<INITIAL>do => (Tokens.DO(yypos,yypos+2));
<INITIAL>to => (Tokens.TO(yypos,yypos+2));
<INITIAL>for => (Tokens.FOR(yypos,yypos+3));
<INITIAL>while => (Tokens.WHILE(yypos,yypos+5));
<INITIAL>if => (Tokens.IF(yypos,yypos+2));
<INITIAL>then => (Tokens.THEN(yypos,yypos+4));
<INITIAL>else => (Tokens.ELSE(yypos,yypos+4));
<INITIAL>array => (Tokens.ARRAY(yypos,yypos+5));
<INITIAL>":=" => (Tokens.ASSIGN(yypos,yypos+2));
<INITIAL>"|" => (Tokens.OR(yypos,yypos+1));
<INITIAL>"&" => (Tokens.AND(yypos,yypos+1));
<INITIAL>">=" => (Tokens.GE(yypos,yypos+2));
<INITIAL>">" => (Tokens.GT(yypos,yypos+1));
<INITIAL>"<=" => (Tokens.LE(yypos,yypos+2));
<INITIAL>"<" => (Tokens.LT(yypos,yypos+1));
<INITIAL>"<>" => (Tokens.NEQ(yypos,yypos+2));
<INITIAL>"=" => (Tokens.EQ(yypos,yypos+1));
<INITIAL>"/" => (Tokens.DIVIDE(yypos,yypos+1));
<INITIAL>"*" => (Tokens.TIMES(yypos,yypos+1));
<INITIAL>"-" => (Tokens.MINUS(yypos,yypos+1));
<INITIAL>"+" => (Tokens.PLUS(yypos,yypos+1));
<INITIAL>"." => (Tokens.DOT(yypos,yypos+1));
<INITIAL>"}" => (Tokens.RBRACE(yypos,yypos+1));
<INITIAL>"{" => (Tokens.LBRACE(yypos,yypos+1));
<INITIAL>"]" => (Tokens.RBRACK(yypos,yypos+1));
<INITIAL>"[" => (Tokens.LBRACK(yypos,yypos+1));
<INITIAL>")" => (Tokens.RPAREN(yypos,yypos+1));
<INITIAL>"(" => (Tokens.LPAREN(yypos,yypos+1));
<INITIAL>";" => (Tokens.SEMICOLON(yypos,yypos+1));
<INITIAL>":" => (Tokens.COLON(yypos,yypos+1));
<INITIAL>"," => (Tokens.COMMA(yypos,yypos+1));
<INITIAL>"/*" => (commentLv := 1;currState := 1; YYBEGIN COMMENT; continue());
<INITIAL>\" => (currState := 2; strTerm := ""; YYBEGIN STRINGS; continue());
<INITIAL>[0-9]+ => (Tokens.INT(valOf(Int.fromString yytext),yypos,yypos+size yytext));
<INITIAL>[A-Za-z][A-Za-z0-9_]* => (Tokens.STRING(yytext,yypos,yypos+size yytext));
<INITIAL>[" "\t] => (continue());
<INITIAL>\n => (lineNum := !lineNum+1; linePos := yypos :: !linePos; continue());
<INITIAL>. => (ErrorMsg.error yypos ("illegal character " ^ yytext); continue());
<COMMENT>"/*" => (commentLv := !commentLv + 1; continue());
<COMMENT>"*/" => (currState := 0; commentLv := !commentLv - 1;
(if !commentLv = 0 then YYBEGIN INITIAL else ()); continue() );
<COMMENT>\n => (lineNum := !lineNum+1; linePos := yypos :: !linePos; continue());
<COMMENT>. => (continue());
<STRINGS>\" => (currState := 0; YYBEGIN INITIAL; Tokens.STRING(!strTerm,0,0));
<STRINGS>\\\" => (strTerm := !strTerm ^ "\"" ; continue());
<STRINGS>\\n => (strTerm := !strTerm ^ "\n" ; continue());
<STRINGS>\\t => (strTerm := !strTerm ^ "\t" ; continue());
<STRINGS>\\\^[@A-Z\[\\\]^_]
=> (strTerm := !strTerm ^ (str(chr(ord (String.sub (yytext,2)) - 64)));
continue()) ;
<STRINGS>\\\^[a-z]
=> (strTerm := !strTerm ^ (str(chr(ord (String.sub (yytext,2)) - 96)));
continue()) ;
<STRINGS>\\[0-9][0-9][0-9]
=> (strTerm := !strTerm ^ (str(chr(valOf(Int.fromString(substring (yytext,1,3))))));
continue());
<STRINGS>"\\" => (strTerm := !strTerm ^ "\\" ; continue());
<STRINGS>\\ => (YYBEGIN IGNORES; continue());
<STRINGS>\\. => (ErrorMsg.error yypos ("illegal character " ^ yytext); continue());
<STRINGS>\n => (lineNum := !lineNum+1; linePos := yypos :: !linePos;
strTerm := !strTerm ^ yytext; continue());
<STRINGS>. => (strTerm := !strTerm ^ yytext; continue());
<IGNORES>\\ => (YYBEGIN STRINGS; continue() );
<IGNORES>\n => (lineNum := !lineNum+1; linePos := yypos :: !linePos; continue());
<IGNORES>. => ( continue() );
- 開きコメントor開き文字列で EOF を迎えたときにエラー検出
- 多重コメント
- 文字列エスケープ
に従うとこんな感じかなぁ、と。かなり泥臭いことになっている。\^c の文字 c は仕様にはあいまいな書き方しかしてないのでてきとうに解釈した。SML の文法の場合は小文字を許していないようだけど。
とりあえずサンプルプログラムのパースには成功するけど、
これが処理として正しいかどうかは後にならないとわからないんだろうなぁ。
(追記)おおぅ、if-then-else がすっぽり抜けてるのを修正