class Pos private(val prog: String, val offs: Int, val line: Int, val col: Int) {
def this(prog: String) = this(prog, 0, 1, 1)
def ch = if (offs == prog.length) -1 else prog(offs)
def inc = ch match {
case '\n' => new Pos(prog, offs+1, line+1, 1)
case -1 => this
case _ => new Pos(prog, offs+1, line, col+1)
}
override def toString = "(" + line + ", " + col + ")"
}
object DomainTags extends Enumeration {
type Tag = Value
val WHITESPACE, IDENT, NUMBER, STRING_LIT, ERROR, END_OF_PROGRAM = Value
}
import DomainTags._
class Scanner {
def scan(start: Pos, list: List[String]): (Tag, Pos, List[String]) =
//sys.error("syntax error at " + start)
(ERROR, start.inc, (start + "syntax error") :: list)
}
class Token(val start: Pos, scanner: Scanner, list: List[String]) {
val (tag, follow, newlist) = start.ch match {
case -1 => (END_OF_PROGRAM, start, list)
case _ => scanner.scan(start, list)
}
def image = start.prog.substring(start.offs, follow.offs)
def next = new Token(follow, scanner, newlist)
def errList = list
}
trait Whitespaces extends Scanner {
private def missWhitespace(pos: Pos): Pos = pos.ch match {
case ' ' => missWhitespace(pos.inc)
case '\t' => missWhitespace(pos.inc)
case '\n' => missWhitespace(pos.inc)
case _ => pos
}
override def scan(start: Pos, list: List[String]) = {
val follow = missWhitespace(start)
if (start != follow) (WHITESPACE, follow, list)
else super.scan(start, list)
}
}
trait Ident extends Scanner {
private def findIdent(pos: Pos): Pos = {
if (pos.ch == '_' || (pos.ch >= 'a' && pos.ch <= 'z') || (pos.ch >= 'A' && pos.ch <= 'Z') || (pos.ch >= '0' && pos.ch <= '9')) {
findIdent(pos.inc)
} else pos
}
override def scan(start: Pos, list: List[String]) = {
if (start.ch == '_' || (start.ch >= 'a' && start.ch <= 'z') || (start.ch >= 'A' && start.ch <= 'Z')) {
val follow = findIdent(start);
if (start != follow) (IDENT, follow, list)
else (ERROR, follow, (follow + "syntax error") :: list)//super.scan(start, list) // переделать
} else {
super.scan(start, list)
}
}
}
trait StringLit extends Scanner {
private def findStringLit(pos: Pos): Pos = {
if ((pos.ch >= 'a' && pos.ch <= 'z') || (pos.ch >= 'A' && pos.ch <= 'Z')) {
findStringLit(pos.inc)
} else if (pos.ch == '\\') {
if (pos.inc.ch == 'n' || pos.inc.ch == 't') {
findStringLit(pos.inc)
} else if (pos.inc.ch == '\"') {
findStringLit(pos.inc.inc)
}
else pos
} else pos
}
override def scan(start: Pos, list: List[String]) = {
if (start.ch == '\"') {
val follow = findStringLit(start.inc);
if (start != follow)
if (follow.ch == '\"')
(STRING_LIT, follow.inc, list)
else (ERROR, follow, (follow + "symbol \" expected") :: list) //super.scan(start, list) // синтаксическая ошибка
else (ERROR, follow, (follow + "syntax error") :: list)//super.scan(start, list)
} else {
super.scan(start, list)
}
}
}
trait Number extends Scanner {
private def step(pos: Pos): Pos = {
if ((pos.ch >= '0' && pos.ch <= '9') || (pos.ch == '.' && pos.inc.ch != '.')) {
step(pos.inc)
} else pos
}
private def findNumber(pos: Pos): Pos = {
if (pos.ch >= '0' && pos.ch <= '9') {
if (pos.inc.ch >= '0' && pos.inc.ch <= '9') {
if (pos.inc.inc.ch >= '0' && pos.inc.inc.ch <= '9') {
if (pos.inc.inc.inc.ch == '.') {
findNumber(pos.inc.inc.inc)
} else if (pos.inc.inc.inc.ch >= '0' && pos.inc.inc.inc.ch <= '9') {
step(pos)
}
else {
pos.inc.inc
}
} else if (pos.inc.inc.ch == '.') {
findNumber(pos.inc.inc)
} else pos.inc
} else if (pos.inc.ch == '.') {
findNumber(pos.inc)
} else pos
} else if (pos.ch == '.') {
if ((pos.inc.ch >= '0' && pos.inc.ch <= '9') && (pos.inc.inc.ch >= '0' && pos.inc.inc.ch <= '9') &&
(pos.inc.inc.inc.ch >= '0' && pos.inc.inc.inc.ch <= '9')) {
if (pos.inc.inc.inc.inc.ch >= '0' && pos.inc.inc.inc.inc.ch <= '9') {
step(pos)
} else findNumber(pos.inc.inc.inc)
} else step(pos)
} else pos
}
override def scan(start: Pos, list: List[String]) = {
if (start.ch >= '0' && start.ch <= '9') {
val follow = findNumber(start)
if (follow.ch >= '0' && follow.ch <= '9') {
(NUMBER, follow.inc, list)
} else (ERROR, follow, (start + "syntax error") :: list)//super.scan(start, list) // syntax error
} else {
super.scan(start, list)
}
}
}
var t = new Token(
new Pos("qwerty_qwerty \"qwerty\\nqwerty\\t\\\"qwerty\" \n 1 10 100 1.000 10000 1.00 10000.0 " +
"1q \"jbj\\jfbfjb\" "),
new Scanner with Whitespaces with Ident with StringLit with Number,
Nil
)
while (t.tag != END_OF_PROGRAM) {
if (t.tag != ERROR) {
println(t.tag.toString + " " + t.start + "-" + t.follow + ": " + t.image)
}
t = t.next
}
println(t.errList.reverse)