/** * Created by natalia on 19.04.14. */ //package CompilerGenerator.lex; /** * Created by natalia on 05.04.14. */ import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.io.*; import java.util.ArrayList; class Reader { private File openedFile_; private BufferedReader reader_; private ArrayList readedText_; public Reader(String path) { openedFile_ = new File(path); try { reader_ = new BufferedReader(new InputStreamReader(new FileInputStream(openedFile_), "UTF-8")); } catch (UnsupportedEncodingException exc) { System.out.println(" UTF-8 exception. " + exc.getMessage()); } catch (FileNotFoundException exc) { System.out.println("File Not Found. " + exc.getMessage()); } readedText_ = new ArrayList(); } public ArrayList Read() { String currentString; try { while((currentString = reader_.readLine()) != null) { readedText_.add(currentString); } } catch (IOException exc) { System.out.println("IOException while reading file. " + exc.getMessage()); } return new ArrayList(readedText_); } } enum tokenType { N, T, R, LP, RP, LFP, RFP, OR, E, TERM, NTERM } class Token { tokenType type; String value; String coords; public Token(tokenType t, String v) { type = t; value = v; } public Token(tokenType t, String v, String c) { type = t; value = v; coords = c; } public tokenType getType() { return type; } public String toString() { return type + " - " + value; } } enum nodeType { TERM, NTERM, CLOSURE, FIGCLOSURE, ALTERNATIVE } class Node { nodeType type; public Node(nodeType t) { type = t; } public nodeType getType() { return type; } public String toString() { return ""; } } class Atom extends Node { String value; public Atom(nodeType t, String v) { super(t); value = v; } public String getValue() { return value; } public String toString() { return value; } } class Closure extends Node { Node alt; public Closure(nodeType t, Node n) { super(t); alt = n; } //public LinkedList getValue() { return tokens; } public Node getValue() { return alt;} public String toString() { //System.out.println("CLOSURE!!!"); String sk = "", cl; if(this.getType() == nodeType.CLOSURE) { sk = "( "; cl = " )"; } else { sk = "{ "; cl = "}"; } String s = sk; s += alt.toString(); return s + cl; } } class Alternative extends Node { LinkedList> alts; public Alternative(nodeType t, LinkedList> a) { super(t); alts = a; } public LinkedList> getValue() { return alts; } public String toString() { String s = ""; for(LinkedList l: alts) { if (l != null) for (Node n : l) { if (n != null) s += n.toString(); } s += " | "; } return s.substring(0, s.length()-2); } } class Parse { List tokens = new LinkedList(); LinkedList nterms = new LinkedList(); LinkedList terms = new LinkedList(); HashMap rules = new HashMap(); public Parse(List t) { tokens = t; } public void error(String coords, String message) { System.out.print("Syntax error at " + coords + ": " + message); System.exit(1); } //count first HashMap> first = new HashMap>(); public LinkedList countFirstForConcat(LinkedList n) { LinkedList result = F(n.get(0)); if(result.contains("$EPS")) { result.remove("$EPS"); LinkedList copy = (LinkedList)n.clone(); copy.remove(0); result = Union(result, countFirstForConcat(copy)); } return result; } public LinkedList F(Node n) { LinkedList result = new LinkedList(); switch (n.getType()) { case TERM: Atom a = (Atom)n; result.add(a.getValue()); break; case NTERM: Atom ab = (Atom)n; result = first.get(ab.getValue()); break; case ALTERNATIVE: Alternative al = (Alternative)n; for(LinkedList alt: al.getValue()) result = Union(result, countFirstForConcat(alt)); break; case CLOSURE: Closure cl = (Closure)n; result = F(cl.getValue()); break; case FIGCLOSURE: Closure figcl = (Closure)n; result = F(figcl.getValue()); result.add("$EPS"); break; default: return null; } return result; } public void countFirst() { for(String key: rules.keySet()) for(String nt: nterms) first.put(nt, new LinkedList()); boolean isChanging = true; while (isChanging) { isChanging = false; for (String lR : nterms) { LinkedList newFirst = F(rules.get(lR)); if(ifContainsNewElements(first.get(lR), newFirst)) { isChanging = true; first.put(lR, newFirst); } } } for(String nt: nterms) { System.out.print("\nFirst(" + nt + ") = "); for(String s: first.get(nt)) System.out.print(s + " "); } } private boolean ifContainsNewElements(List before, List after) { for(String a: after) if(!before.contains(a)) return true; return false; } private LinkedList Union(List a, List b) { LinkedList union = new LinkedList(); if(a != null) for(String l: a) { union.add(l); } if(b != null) for(String l: b) { if(!union.contains(l)) union.add(l); } return union; } //atom(with name), closure(with mas of ), alternative int tokenNum = 0; //parse () in a rule public Node parseLP() { Node n = parseAlternative(); if(tokens.get(tokenNum).getType() != tokenType.RP) { return null; } tokenNum++; return new Closure(nodeType.CLOSURE, n); } //parse {} in a rule public Node parseLFP() { Node n = parseAlternative(); if(tokens.get(tokenNum).getType() != tokenType.RFP) { return null; } tokenNum++; return new Closure(nodeType.FIGCLOSURE, n); } //parse one part of alternative public LinkedList parseNAlternative() { LinkedList l = new LinkedList(); Token cur = tokens.get(tokenNum); do { switch (cur.getType()) { case LP: tokenNum++; l.add(parseLP()); break; case LFP: tokenNum++; l.add(parseLFP()); break; case TERM: l.add(new Atom(nodeType.TERM, tokens.get(tokenNum).value)); tokenNum++; break; case NTERM: l.add(new Atom(nodeType.NTERM, tokens.get(tokenNum).value)); tokenNum++; break; case R: case OR: case RP: case RFP: return l; default: error(tokens.get(tokenNum).coords, "impossible nterm"); } if(tokenNum == tokens.size()) return l; cur = tokens.get(tokenNum); } while (cur.getType() != tokenType.OR && cur.getType() != tokenType.RP && cur.getType() != tokenType.RFP); return l; } //parse alternatives in a rule public Node parseAlternative() { LinkedList> l = new LinkedList>(); do { if(tokens.get(tokenNum).getType() == tokenType.OR) tokenNum++; l.add(parseNAlternative()); if(tokenNum >= tokens.size()) break; } while (tokenNum+1 < tokens.size() && tokens.get(tokenNum).getType() == tokenType.OR && tokens.get(tokenNum).getType() != tokenType.R); return new Alternative(nodeType.ALTERNATIVE, l); } //parse rules public void parseR() { //System.out.println("in parseR " + tokens.get(tokenNum)); if(tokenNum >= tokens.size()) return; while (tokens.get(tokenNum).getType() == tokenType.R) { tokenNum++; Token cur = tokens.get(tokenNum); if (cur.getType() == tokenType.NTERM) {//&& nterms.contains(cur.value)) { tokenNum++;//rules.put(cur, null); } else { error(tokens.get(tokenNum).coords, "left part of a rule expected, " + tokens.get(tokenNum)+ " found"); //System.out.println("error at " + tokens.get(tokenNum).coords +": left part of a rule expected, " + tokens.get(tokenNum)+ " found"); return; } tokenNum++; //System.out.println("curToken: " + tokens.get(tokenNum)); rules.put(cur.value, parseAlternative()); System.out.println(cur.value + " = "+rules.get(cur.value).toString() + "\n"); //System.out.println("in parseR2 " + tokens.get(tokenNum)); if(tokenNum >= tokens.size()) return; } if (tokenNum == tokens.size()) { return; } error(tokens.get(tokenNum-1).coords, "one more rule or end of grammer expected, \"" + tokens.get(tokenNum-1).value + "\" found"); return; } //parse terminals public void parseT() { while (tokens.get(tokenNum).getType() == tokenType.TERM) { terms.add(tokens.get(tokenNum++).value); } switch (tokens.get(tokenNum++).getType()) { case T: parseT(); break; case R: tokenNum--; parseR(); break; default: error(tokens.get(tokenNum-1).coords, "rules expected"); break; } } //parse nterminals public void parseN() { if(tokenNum >= tokens.size()) { error(tokens.get(tokenNum-1).coords, "unexpected end"); } while (tokens.get(tokenNum).getType() == tokenType.NTERM) { nterms.add(tokens.get(tokenNum++).value); } switch (tokens.get(tokenNum++).getType()) { case N: parseN(); break; case T: parseT(); break; default: System.out.println("error at " + tokens.get(tokenNum-1).coords +": terms declaration expected;"); break; } } public void startParse() { tokenNum = 0; switch (tokens.get(tokenNum++).getType()) { case N: parseN(); break; default: error(tokens.get(tokenNum-1).coords, "nterms declaration expected"); return; } } } public class lexer { static private int strNum = 0; static private Parse parse;// = new Parse(); static List tokens = new LinkedList(); private static void findLex(String s, Pattern p) { int pos = 0; Matcher m; while(s.length() != 0){ m = p.matcher(s); if(m.find()) { String coords = "("+strNum+","+pos+")-("+strNum+","+ (pos+m.end())+")"; //System.out.println("BBBBBBBBBBBBBBBBBBB"); if (m.group(2) != null) { System.out.println("KEYWORD ("+strNum+","+pos+")-("+strNum+","+ (pos+m.end())+"): "+m.group(2)); if(m.group(3) != null) tokens.add(new Token(tokenType.T, "$TERM", coords)); else if(m.group(4) != null) tokens.add(new Token(tokenType.N, "$NTERM", coords)); else if(m.group(5) != null) tokens.add(new Token(tokenType.R, "$RULE", coords)); else if(m.group(6) != null) tokens.add(new Token(tokenType.LP, m.group(6), coords)); else if(m.group(7) != null) tokens.add(new Token(tokenType.RP, m.group(7), coords)); else if(m.group(8) != null) tokens.add(new Token(tokenType.LFP, m.group(8), coords)); else if(m.group(9) != null) tokens.add(new Token(tokenType.RFP, m.group(9), coords)); else if(m.group(10) != null) tokens.add(new Token(tokenType.E, m.group(10), coords)); else if(m.group(11) != null) tokens.add(new Token(tokenType.OR, m.group(11), coords)); //tokens.add(new Token("AXIOM", "$AXIOM")); //parse.addAxiom(m.group(1)); } else if(m.group(13) != null) { System.out.println("TERM ("+strNum+","+pos+")-("+strNum+","+ (pos+m.end())+"): "+m.group(13)); tokens.add(new Token(tokenType.TERM, m.group(13), coords)); } else if(m.group(12) != null) { System.out.println("NTERM ("+strNum+","+pos+")-("+strNum+","+ (pos+m.end())+"): "+m.group(12)); tokens.add(new Token(tokenType.NTERM, m.group(12), coords)); } pos += m.end(); s = s.substring(m.end()); //System.out.println("s: "+s); } else { pos++; if(s.charAt(0) != ' ') { System.out.println("ERROR at (" + strNum + "," + pos + ")"); } s = s.substring(1); } } strNum++; } public static void main(String args[]) { // Регулярные выражения String nterm = "[A-Z]"; String term = "\".\""; String keyword = "(\\$TERM)|(\\$NTERM)|(\\$RULE)|(\\()|(\\))|(\\{)|(\\})|(=)|(\\|)"; String pattern = "^((" + keyword + ")|(" + nterm + ")|(" + term + ")| |\\n)"; // Компиляция регулярного выражения Pattern p = Pattern.compile(pattern); Reader reader = new Reader("test2.txt");//"test.txt"); ArrayList mas = reader.Read(); //System.out.println(reader1.Read()); // Сопоставление текста с регулярным выражением String program = ""; for(String s: mas) program += (s+'\n'); findLex(program, p); //for(int i = 0; i < tokens.size(); i++) // System.out.println(tokens.get(i)); parse = new Parse(tokens); parse.startParse(); parse.countFirst(); } }