/** * Created by natalia on 05.04.14. */ import java.util.regex.Matcher; import java.util.regex.Pattern; import java.io.*; import java.util.ArrayList; class Reader { private File openedFile_; private BufferedReader reader_; private ArrayList readedText_; public Reader(String path) { openedFile_ = new File(path); try { reader_ = new BufferedReader(new InputStreamReader(new FileInputStream(openedFile_), "UTF-8")); } catch (UnsupportedEncodingException exc) { System.out.println(" UTF-8 exception. " + exc.getMessage()); } catch (FileNotFoundException exc) { System.out.println("File Not Found. " + exc.getMessage()); } readedText_ = new ArrayList(); } public ArrayList Read() { String currentString; try { while((currentString = reader_.readLine()) != null) { readedText_.add(currentString); } } catch (IOException exc) { System.out.println("IOException while reading file. " + exc.getMessage()); } return new ArrayList(readedText_); } } public class lex { static private int strNum = 0; private void print(String s, int pos, int mend) { System.out.println("IDENT ("+strNum+","+pos+")-("+strNum+","+(pos+mend)+"): "+s); } private static void findLex2(String s, Pattern p) { int pos = 0; Matcher m; while(s.length() != 0){ m = p.matcher(s); if(m.find()) { if (m.group(1) != null) { System.out.println("RULE II ("+strNum+","+pos+")-("+strNum+","+ (pos+m.end())+"): "+m.group(1)); } else { System.out.println("??? ("+strNum+","+pos+")-("+strNum+","+ (pos+m.end())+"): "+m.group(7)); } pos += m.end(); s = s.substring(m.end()); //System.out.println(s); //for(int i = 1; i< 7;i++) // System.out.println("g: " +m.group(i)); } else { pos++; if(s.charAt(0) != ' ') { System.out.println("ERROR at (" + strNum + "," + pos + ")"); } s = s.substring(1); } } strNum++; } private static void findLex(String s, Pattern p) { int pos = 0; Matcher m; while(s.length() != 0){ m = p.matcher(s); if(m.find()) { if (m.group(1) != null) { System.out.println("AXIOM ("+strNum+","+pos+")-("+strNum+","+ (pos+m.end())+"): "+m.group(1)); } else if(m.group(5) != null) { System.out.println("TERM ("+strNum+","+pos+")-("+strNum+","+ (pos+m.end())+"): "+m.group(5)); } else if(m.group(3) != null) { System.out.println("NTERM ("+strNum+","+pos+")-("+strNum+","+ (pos+m.end())+"): "+m.group(3)); } else { System.out.println("??? ("+strNum+","+pos+")-("+strNum+","+ (pos+m.end())+"): "+m.group(7)); findLex2(m.group(7), Pattern.compile("\\$RULE ([A-Z]\\'{0,1} =([( [A-Z]\\'{0,1})( \"[a-z]\"| \"\\*\"| \"\\+\"| \"\\(\"| \"\\)\")]+\\n| \\$EPS\\n)+)")); } pos += m.end(); s = s.substring(m.end()); //System.out.println(s); //for(int i = 1; i< 7;i++) // System.out.println("g: " +m.group(i)); } else { pos++; if(s.charAt(0) != ' ') { System.out.println("ERROR at (" + strNum + "," + pos + ")"); } s = s.substring(1); } } strNum++; } public static void splitLexems(String rules) { } public static void main(String args[]) { // Текст для сопоставления String text = "$AXIOM E\n" + "$TERM"; // Регулярные выражения String ident = "^\\p{L}.{0,8}\\p{L}";//"^[A-Za-z]+|^[(][0-9]+[)]";+";// //String number = //"^0|^[1-9][0-9]*"; //String oper = //"^[(][)]|^[:][=]|^[:]"; String strlit = "^\'([^\\\\]|\\\\n|\\\\\'|\\\\[0-9A-Fa-f]{4,4}|\\p{L})*\'";//|[[\\][n]]*|[[\\][0-9A-Fa-f]{,4}]*|[.]*][\']\'"; String keyword = "^z|^forward|^for"; String pattern = "("+keyword+")|("+strlit+")|("+ident+")"; String nt = "[A-Z]\\'{0,1}"; String t = "\"[a-z]\"| \"\\*\"| \"\\+\"| \"\\(\"| \"\\)\""; String axiom = "\\$AXIOM ([A-Z]\\\'{0,1})\\n"; String nterm = "\\$NTERM( [A-Z]\\'{0,1})+\\n"; String term = "\\$TERM( \"[a-z]\"| \"\\*\"| \"\\+\"| \"\\(\"| \"\\)\")+\\n"; String rule = "\\$RULE ([A-Z]\\'{0,1} =([( [A-Z]\\'{0,1})( \"[a-z]\"| \"\\*\"| \"\\+\"| \"\\(\"| \"\\)\")]+\\n| \\$EPS\\n)+)"; pattern = "("+axiom+")|("+nterm+")|("+term+")|(("+rule+")+)"; //String // Компиляция регулярного выражения Pattern p = Pattern.compile(pattern); Reader reader = new Reader("test.txt"); ArrayList mas = reader.Read(); //System.out.println(reader1.Read()); // Сопоставление текста с регулярным выражением String program = ""; for(String s: mas) program += (s+'\n'); findLex(program, p); for (String s : mas) { //System.out.println(); //findLex(s+'\n', p); } } }