lexer scala

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
class Pos private(val prog: String, val offs: Int, val line: Int, val col: Int) {
def this(prog: String) = this(prog, 0, 1, 1)
def ch = if (offs == prog.length) -1 else prog(offs)
def inc = ch match {
case '\n' => new Pos(prog, offs+1, line+1, 1)
case -1 => this
case _ => new Pos(prog, offs+1, line, col+1)
}
override def toString = "(" + line + ", " + col + ")"
}
object DomainTags extends Enumeration {
type Tag = Value
val WHITESPACE, IDENT, NUMBER, OPER, ERROR, END_OF_PROGRAM = Value
}
import DomainTags._
class Scanner {
def scan(start: Pos, errs: List[String]): (Tag, Pos, List[String]) =
{
(ERROR, start.inc, ("unexpected symbol " + start)::errs)
}
}
class Token(val start: Pos, scanner: Scanner, errs: List[String]) {
val (tag, follow, errsl) = start.ch match {
case -1 => (END_OF_PROGRAM, start, errs)
case _ => scanner.scan(start, errs)
}
def image = start.prog.substring(start.offs, follow.offs)
def next = new Token(follow, scanner, errsl)
def listacc = errs
}
trait Oper extends Scanner {
private def findoper(pos: Pos, fstate: Boolean, sstate: Boolean, tstate: Boolean): (Pos, Boolean, Boolean, Boolean) = pos.ch match {
case a if (( a == '<' ) && !fstate && !sstate && !tstate ) =>
findoper(pos.inc, true, false, false)
case a if ((a == '=') && fstate && !sstate && !tstate ) =>
(pos.inc, true, true, false)
case a if ((a == '=') && !fstate && !sstate && !tstate ) =>
findoper(pos.inc, false, true, false)
case a if ((a == '=') && !fstate && sstate && !tstate ) =>
(pos.inc, false, true, true)
case a if ((a != '=') && !fstate && sstate && !tstate ) =>
(pos, false, true, true)
case _ =>
(pos, false, false, false)
}
override def scan(start: Pos, errs: List[String]) = {
val (follow, flag1, flag2, flag3) = findoper(start, false, false, false)
if ((start != follow) && ((flag1 && flag2) | (flag2) | (flag2 && flag3)) ) (OPER, follow, errs)
else super.scan(start, errs)
}
}
trait Number extends Scanner {
private def findnumber(pos: Pos, fstate: Boolean, sstate: Boolean, tstate: Boolean, errstate: Boolean): (Pos, Boolean, Boolean, Boolean, Boolean) = pos.ch match {
case a if ( (a == '<') && (!tstate) ) =>
findnumber(pos.inc, true, sstate, tstate, false)
case a if ( (a >= '0') && (a <= '9') ) =>
findnumber(pos.inc, fstate, true, tstate, errstate)
case a if ( (a == '>') && (fstate) ) =>
(pos.inc, fstate, sstate, true, errstate)
case a if ( ( ( (a >= 'a') && (a <= 'z') ) || (a == ' ')) && (fstate) && (!tstate) && (a != '=') ) =>
findnumber(pos.inc, fstate, false, tstate, true)
case _ =>
(pos, fstate, sstate, tstate, fstate)
}
override def scan(start: Pos, errs: List[String]) = {
val (follow, flag1, flag2, flag3, flag4) = findnumber(start, false, false, false, false)
if ((start != follow)&& flag1 && flag2 && flag3 && (!flag2 || !flag4)) (NUMBER, follow, errs)
else if ((start != follow) && flag1 && flag3 && (!flag2 || flag4)) {
//println("error in digit part")
//(ERROR, follow)
(NUMBER, follow, ("error in digit part" + follow)::errs)
}
else super.scan(start, errs)
}
}
trait Ident extends Scanner {
private def findident(pos: Pos, fstate: Boolean, sstate: Boolean): (Pos, Boolean, Boolean) = pos.ch match {
case a if ((a >= 'a') && (a <= 'z')) =>
findident(pos.inc, true, false)
case a if ((a >= '0') && (a <= '9')) =>
findident(pos.inc, fstate, true)
case _ =>
(pos, fstate, sstate)
}
override def scan(start: Pos, errs: List[String]) = {
val (follow, flag1, flag2) = findident(start, false, false)
if ((start != follow) && flag2) (IDENT, follow, errs)
else if ((start != follow) && flag1 && !flag2) {
//println("digit expected")
//(ERROR, follow)
(IDENT, follow, ("digit expected" + follow)::errs)
} else super.scan(start, errs)
}
}
trait Whitespaces extends Scanner {
private def missWhitespace(pos: Pos): Pos = pos.ch match {
case ' ' => missWhitespace(pos.inc)
case '\t' => missWhitespace(pos.inc)
case '\n' => missWhitespace(pos.inc)
case _ => pos
}
override def scan(start: Pos, errs: List[String]) = {
val follow = missWhitespace(start)
if (start != follow) (WHITESPACE, follow, errs)
else super.scan(start, errs)
}
}
val str = "<1 2>= q1q 1q1 \n w1w1 <2><==== <2q> 11 &"
println(str)
var t = new Token(
new Pos(str),
new Scanner with Oper with Number with Ident with Whitespaces,
Nil
)
while (t.tag != END_OF_PROGRAM) {
if (t.tag != ERROR && t.tag != WHITESPACE) println(t.tag.toString + " " + t.start + "-" + t.follow + ": " + t.image)
t = t.next
}
def printerrs(errs: List[String]): List[String] = errs match {
case Nil => Nil
case x::l => {println("err:" + x)
printerrs(l)}
}
printerrs(t.listacc)