class Pos private(val prog: String, val offs: Int, val line: Int, val col: Int) { def this(prog: String) = this(prog, 0, 1, 1) def ch = if (offs == prog.length) -1 else prog(offs) def inc = ch match { case '\n' => new Pos(prog, offs+1, line+1, 1) case -1 => this case _ => new Pos(prog, offs+1, line, col+1) } override def toString = "(" + line + ", " + col + ")" } object DomainTags extends Enumeration { type Tag = Value val WHITESPACE, IDENT, NUMBER, STRING_LIT, ERROR, END_OF_PROGRAM = Value } import DomainTags._ class Scanner { def scan(start: Pos, list: List[String]): (Tag, Pos, List[String]) = //sys.error("syntax error at " + start) (ERROR, start.inc, (start + "syntax error") :: list) } class Token(val start: Pos, scanner: Scanner, list: List[String]) { val (tag, follow, newlist) = start.ch match { case -1 => (END_OF_PROGRAM, start, list) case _ => scanner.scan(start, list) } def image = start.prog.substring(start.offs, follow.offs) def next = new Token(follow, scanner, newlist) def errList = list } trait Whitespaces extends Scanner { private def missWhitespace(pos: Pos): Pos = pos.ch match { case ' ' => missWhitespace(pos.inc) case '\t' => missWhitespace(pos.inc) case '\n' => missWhitespace(pos.inc) case _ => pos } override def scan(start: Pos, list: List[String]) = { val follow = missWhitespace(start) if (start != follow) (WHITESPACE, follow, list) else super.scan(start, list) } } trait Ident extends Scanner { private def findIdent(pos: Pos): Pos = { if (pos.ch == '_' || (pos.ch >= 'a' && pos.ch <= 'z') || (pos.ch >= 'A' && pos.ch <= 'Z') || (pos.ch >= '0' && pos.ch <= '9')) { findIdent(pos.inc) } else pos } override def scan(start: Pos, list: List[String]) = { if (start.ch == '_' || (start.ch >= 'a' && start.ch <= 'z') || (start.ch >= 'A' && start.ch <= 'Z')) { val follow = findIdent(start); if (start != follow) (IDENT, follow, list) else (ERROR, follow, (follow + "syntax error") :: list)//super.scan(start, list) // переделать } else { super.scan(start, list) } } } trait StringLit extends Scanner { private def findStringLit(pos: Pos): Pos = { if ((pos.ch >= 'a' && pos.ch <= 'z') || (pos.ch >= 'A' && pos.ch <= 'Z')) { findStringLit(pos.inc) } else if (pos.ch == '\\') { if (pos.inc.ch == 'n' || pos.inc.ch == 't') { findStringLit(pos.inc) } else if (pos.inc.ch == '\"') { findStringLit(pos.inc.inc) } else pos } else pos } override def scan(start: Pos, list: List[String]) = { if (start.ch == '\"') { val follow = findStringLit(start.inc); if (start != follow) if (follow.ch == '\"') (STRING_LIT, follow.inc, list) else (ERROR, follow, (follow + "symbol \" expected") :: list) //super.scan(start, list) // синтаксическая ошибка else (ERROR, follow, (follow + "syntax error") :: list)//super.scan(start, list) } else { super.scan(start, list) } } } trait Number extends Scanner { private def step(pos: Pos): Pos = { if ((pos.ch >= '0' && pos.ch <= '9') || (pos.ch == '.' && pos.inc.ch != '.')) { step(pos.inc) } else pos } private def findNumber(pos: Pos): Pos = { if (pos.ch >= '0' && pos.ch <= '9') { if (pos.inc.ch >= '0' && pos.inc.ch <= '9') { if (pos.inc.inc.ch >= '0' && pos.inc.inc.ch <= '9') { if (pos.inc.inc.inc.ch == '.') { findNumber(pos.inc.inc.inc) } else if (pos.inc.inc.inc.ch >= '0' && pos.inc.inc.inc.ch <= '9') { step(pos) } else { pos.inc.inc } } else if (pos.inc.inc.ch == '.') { findNumber(pos.inc.inc) } else pos.inc } else if (pos.inc.ch == '.') { findNumber(pos.inc) } else pos } else if (pos.ch == '.') { if ((pos.inc.ch >= '0' && pos.inc.ch <= '9') && (pos.inc.inc.ch >= '0' && pos.inc.inc.ch <= '9') && (pos.inc.inc.inc.ch >= '0' && pos.inc.inc.inc.ch <= '9')) { if (pos.inc.inc.inc.inc.ch >= '0' && pos.inc.inc.inc.inc.ch <= '9') { step(pos) } else findNumber(pos.inc.inc.inc) } else step(pos) } else pos } override def scan(start: Pos, list: List[String]) = { if (start.ch >= '0' && start.ch <= '9') { val follow = findNumber(start) if (follow.ch >= '0' && follow.ch <= '9') { (NUMBER, follow.inc, list) } else (ERROR, follow, (start + "syntax error") :: list)//super.scan(start, list) // syntax error } else { super.scan(start, list) } } } var t = new Token( new Pos("qwerty_qwerty \"qwerty\\nqwerty\\t\\\"qwerty\" \n 1 10 100 1.000 10000 1.00 10000.0 " + "1q \"jbj\\jfbfjb\" "), new Scanner with Whitespaces with Ident with StringLit with Number, Nil ) while (t.tag != END_OF_PROGRAM) { if (t.tag != ERROR) { println(t.tag.toString + " " + t.start + "-" + t.follow + ": " + t.image) } t = t.next } println(t.errList.reverse)