/**
* Created by natalia on 19.04.14.
*/
//package CompilerGenerator.lex;
/**
* Created by natalia on 05.04.14.
*/
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.io.*;
import java.util.ArrayList;
class Reader {
private File openedFile_;
private BufferedReader reader_;
private ArrayList<String> readedText_;
public Reader(String path) {
openedFile_ = new File(path);
try {
reader_ = new BufferedReader(new InputStreamReader(new FileInputStream(openedFile_), "UTF-8"));
} catch (UnsupportedEncodingException exc) {
System.out.println(" UTF-8 exception. " + exc.getMessage());
} catch (FileNotFoundException exc) {
System.out.println("File Not Found. " + exc.getMessage());
}
readedText_ = new ArrayList<String>();
}
public ArrayList<String> Read() {
String currentString;
try {
while((currentString = reader_.readLine()) != null) {
readedText_.add(currentString);
}
} catch (IOException exc) {
System.out.println("IOException while reading file. " + exc.getMessage());
}
return new ArrayList<String>(readedText_);
}
}
enum tokenType {
N, T, R, LP, RP, LFP, RFP, OR, E, TERM, NTERM
}
class Token {
tokenType type;
String value;
String coords;
public Token(tokenType t, String v) {
type = t;
value = v;
}
public Token(tokenType t, String v, String c) {
type = t;
value = v;
coords = c;
}
public tokenType getType() {
return type;
}
public String toString() {
return type + " - " + value;
}
}
enum nodeType {
TERM, NTERM, CLOSURE, FIGCLOSURE, ALTERNATIVE
}
class Node {
nodeType type;
public Node(nodeType t) {
type = t;
}
public nodeType getType() { return type; }
public String toString() {
return "";
}
}
class Atom extends Node {
String value;
public Atom(nodeType t, String v) {
super(t);
value = v;
}
public String getValue() { return value; }
public String toString() {
return value;
}
}
class Closure extends Node {
Node alt;
public Closure(nodeType t, Node n) {
super(t);
alt = n;
}
//public LinkedList<Node> getValue() { return tokens; }
public Node getValue() { return alt;}
public String toString() {
//System.out.println("CLOSURE!!!");
String sk = "", cl;
if(this.getType() == nodeType.CLOSURE) {
sk = "( "; cl = " )";
} else {
sk = "{ "; cl = "}";
}
String s = sk;
s += alt.toString();
return s + cl;
}
}
class Alternative extends Node {
LinkedList<LinkedList<Node>> alts;
public Alternative(nodeType t, LinkedList<LinkedList<Node>> a) {
super(t);
alts = a;
}
public LinkedList<LinkedList<Node>> getValue() { return alts; }
public String toString() {
String s = "";
for(LinkedList<Node> l: alts) {
if (l != null)
for (Node n : l) {
if (n != null)
s += n.toString();
}
s += " | ";
}
return s.substring(0, s.length()-2);
}
}
class Parse {
List<Token> tokens = new LinkedList<Token>();
LinkedList<String> nterms = new LinkedList<String>();
LinkedList<String> terms = new LinkedList<String>();
HashMap<String , Node> rules = new HashMap<String, Node>();
public Parse(List<Token> t) {
tokens = t;
}
public void error(String coords, String message) {
System.out.print("Syntax error at " + coords + ": " + message);
System.exit(1);
}
//count first
HashMap<String, LinkedList<String>> first = new HashMap<String, LinkedList<String>>();
public LinkedList<String> countFirstForConcat(LinkedList<Node> n) {
LinkedList<String> result = F(n.get(0));
if(result.contains("$EPS")) {
result.remove("$EPS");
LinkedList<Node> copy = (LinkedList<Node>)n.clone();
copy.remove(0);
result = Union(result, countFirstForConcat(copy));
}
return result;
}
public LinkedList<String> F(Node n) {
LinkedList<String> result = new LinkedList<String>();
switch (n.getType()) {
case TERM:
Atom a = (Atom)n;
result.add(a.getValue());
break;
case NTERM:
Atom ab = (Atom)n;
result = first.get(ab.getValue());
break;
case ALTERNATIVE:
Alternative al = (Alternative)n;
for(LinkedList<Node> alt: al.getValue())
result = Union(result, countFirstForConcat(alt));
break;
case CLOSURE:
Closure cl = (Closure)n;
result = F(cl.getValue());
break;
case FIGCLOSURE:
Closure figcl = (Closure)n;
result = F(figcl.getValue());
result.add("$EPS");
break;
default:
return null;
}
return result;
}
public void countFirst() {
for(String key: rules.keySet())
for(String nt: nterms)
first.put(nt, new LinkedList<String>());
boolean isChanging = true;
while (isChanging) {
isChanging = false;
for (String lR : nterms) {
LinkedList<String> newFirst = F(rules.get(lR));
if(ifContainsNewElements(first.get(lR), newFirst)) {
isChanging = true;
first.put(lR, newFirst);
}
}
}
for(String nt: nterms) {
System.out.print("\nFirst(" + nt + ") = ");
for(String s: first.get(nt))
System.out.print(s + " ");
}
}
private boolean ifContainsNewElements(List<String> before, List<String> after) {
for(String a: after)
if(!before.contains(a))
return true;
return false;
}
private LinkedList<String> Union(List<String> a, List<String> b) {
LinkedList<String> union = new LinkedList<String>();
if(a != null)
for(String l: a) {
union.add(l);
}
if(b != null)
for(String l: b) {
if(!union.contains(l))
union.add(l);
}
return union;
}
//atom(with name), closure(with mas of ), alternative
int tokenNum = 0;
//parse () in a rule
public Node parseLP() {
Node n = parseAlternative();
if(tokens.get(tokenNum).getType() != tokenType.RP) {
return null;
}
tokenNum++;
return new Closure(nodeType.CLOSURE, n);
}
//parse {} in a rule
public Node parseLFP() {
Node n = parseAlternative();
if(tokens.get(tokenNum).getType() != tokenType.RFP) {
return null;
}
tokenNum++;
return new Closure(nodeType.FIGCLOSURE, n);
}
//parse one part of alternative
public LinkedList<Node> parseNAlternative() {
LinkedList<Node> l = new LinkedList<Node>();
Token cur = tokens.get(tokenNum);
do {
switch (cur.getType()) {
case LP:
tokenNum++;
l.add(parseLP());
break;
case LFP:
tokenNum++;
l.add(parseLFP());
break;
case TERM:
l.add(new Atom(nodeType.TERM, tokens.get(tokenNum).value));
tokenNum++;
break;
case NTERM:
l.add(new Atom(nodeType.NTERM, tokens.get(tokenNum).value));
tokenNum++;
break;
case R:
case OR:
case RP:
case RFP:
return l;
default:
error(tokens.get(tokenNum).coords, "impossible nterm");
}
if(tokenNum == tokens.size())
return l;
cur = tokens.get(tokenNum);
} while (cur.getType() != tokenType.OR && cur.getType() != tokenType.RP && cur.getType() != tokenType.RFP);
return l;
}
//parse alternatives in a rule
public Node parseAlternative() {
LinkedList<LinkedList<Node>> l = new LinkedList<LinkedList<Node>>();
do {
if(tokens.get(tokenNum).getType() == tokenType.OR)
tokenNum++;
l.add(parseNAlternative());
if(tokenNum >= tokens.size())
break;
} while (tokenNum+1 < tokens.size() && tokens.get(tokenNum).getType() == tokenType.OR &&
tokens.get(tokenNum).getType() != tokenType.R);
return new Alternative(nodeType.ALTERNATIVE, l);
}
//parse rules
public void parseR() {
//System.out.println("in parseR " + tokens.get(tokenNum));
if(tokenNum >= tokens.size())
return;
while (tokens.get(tokenNum).getType() == tokenType.R) {
tokenNum++;
Token cur = tokens.get(tokenNum);
if (cur.getType() == tokenType.NTERM) {//&& nterms.contains(cur.value)) {
tokenNum++;//rules.put(cur, null);
} else {
error(tokens.get(tokenNum).coords, "left part of a rule expected, " + tokens.get(tokenNum)+ " found");
//System.out.println("error at " + tokens.get(tokenNum).coords +": left part of a rule expected, " + tokens.get(tokenNum)+ " found");
return;
}
tokenNum++;
//System.out.println("curToken: " + tokens.get(tokenNum));
rules.put(cur.value, parseAlternative());
System.out.println(cur.value + " = "+rules.get(cur.value).toString() + "\n");
//System.out.println("in parseR2 " + tokens.get(tokenNum));
if(tokenNum >= tokens.size())
return;
}
if (tokenNum == tokens.size()) {
return;
}
error(tokens.get(tokenNum-1).coords, "one more rule or end of grammer expected, \"" + tokens.get(tokenNum-1).value + "\" found");
return;
}
//parse terminals
public void parseT() {
while (tokens.get(tokenNum).getType() == tokenType.TERM) {
terms.add(tokens.get(tokenNum++).value);
}
switch (tokens.get(tokenNum++).getType()) {
case T:
parseT();
break;
case R:
tokenNum--;
parseR();
break;
default:
error(tokens.get(tokenNum-1).coords, "rules expected");
break;
}
}
//parse nterminals
public void parseN() {
if(tokenNum >= tokens.size()) {
error(tokens.get(tokenNum-1).coords, "unexpected end");
}
while (tokens.get(tokenNum).getType() == tokenType.NTERM) {
nterms.add(tokens.get(tokenNum++).value);
}
switch (tokens.get(tokenNum++).getType()) {
case N:
parseN();
break;
case T:
parseT();
break;
default:
System.out.println("error at " + tokens.get(tokenNum-1).coords +": terms declaration expected;");
break;
}
}
public void startParse() {
tokenNum = 0;
switch (tokens.get(tokenNum++).getType()) {
case N:
parseN();
break;
default:
error(tokens.get(tokenNum-1).coords, "nterms declaration expected");
return;
}
}
}
public class lexer {
static private int strNum = 0;
static private Parse parse;// = new Parse();
static List<Token> tokens = new LinkedList<Token>();
private static void findLex(String s, Pattern p) {
int pos = 0;
Matcher m;
while(s.length() != 0){
m = p.matcher(s);
if(m.find()) {
String coords = "("+strNum+","+pos+")-("+strNum+","+
(pos+m.end())+")";
//System.out.println("BBBBBBBBBBBBBBBBBBB");
if (m.group(2) != null) {
System.out.println("KEYWORD ("+strNum+","+pos+")-("+strNum+","+
(pos+m.end())+"): "+m.group(2));
if(m.group(3) != null) tokens.add(new Token(tokenType.T, "$TERM", coords));
else if(m.group(4) != null) tokens.add(new Token(tokenType.N, "$NTERM", coords));
else if(m.group(5) != null) tokens.add(new Token(tokenType.R, "$RULE", coords));
else if(m.group(6) != null) tokens.add(new Token(tokenType.LP, m.group(6), coords));
else if(m.group(7) != null) tokens.add(new Token(tokenType.RP, m.group(7), coords));
else if(m.group(8) != null) tokens.add(new Token(tokenType.LFP, m.group(8), coords));
else if(m.group(9) != null) tokens.add(new Token(tokenType.RFP, m.group(9), coords));
else if(m.group(10) != null) tokens.add(new Token(tokenType.E, m.group(10), coords));
else if(m.group(11) != null) tokens.add(new Token(tokenType.OR, m.group(11), coords));
//tokens.add(new Token("AXIOM", "$AXIOM"));
//parse.addAxiom(m.group(1));
} else if(m.group(13) != null) {
System.out.println("TERM ("+strNum+","+pos+")-("+strNum+","+
(pos+m.end())+"): "+m.group(13));
tokens.add(new Token(tokenType.TERM, m.group(13), coords));
} else if(m.group(12) != null) {
System.out.println("NTERM ("+strNum+","+pos+")-("+strNum+","+
(pos+m.end())+"): "+m.group(12));
tokens.add(new Token(tokenType.NTERM, m.group(12), coords));
}
pos += m.end();
s = s.substring(m.end());
//System.out.println("s: "+s);
} else {
pos++;
if(s.charAt(0) != ' ') {
System.out.println("ERROR at (" + strNum + "," + pos + ")");
}
s = s.substring(1);
}
}
strNum++;
}
public static void main(String args[])
{
// Регулярные выражения
String nterm = "[A-Z]";
String term = "\".\"";
String keyword = "(\\$TERM)|(\\$NTERM)|(\\$RULE)|(\\()|(\\))|(\\{)|(\\})|(=)|(\\|)";
String pattern = "^((" + keyword + ")|(" + nterm + ")|(" + term + ")| |\\n)";
// Компиляция регулярного выражения
Pattern p = Pattern.compile(pattern);
Reader reader = new Reader("test2.txt");//"test.txt");
ArrayList<String> mas = reader.Read();
//System.out.println(reader1.Read());
// Сопоставление текста с регулярным выражением
String program = "";
for(String s: mas)
program += (s+'\n');
findLex(program, p);
//for(int i = 0; i < tokens.size(); i++)
// System.out.println(tokens.get(i));
parse = new Parse(tokens);
parse.startParse();
parse.countFirst();
}
}