import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.io.*;
import java.util.ArrayList;
class Reader {
private File openedFile_;
private BufferedReader reader_;
private ArrayList<String> readedText_;
public Reader(String path) {
openedFile_ = new File(path);
try {
reader_ = new BufferedReader(new InputStreamReader(new FileInputStream(openedFile_), "UTF-8"));
} catch (UnsupportedEncodingException exc) {
System.out.println(" UTF-8 exception. " + exc.getMessage());
} catch (FileNotFoundException exc) {
System.out.println("File Not Found. " + exc.getMessage());
}
readedText_ = new ArrayList<String>();
}
public ArrayList<String> Read() {
String currentString;
try {
while((currentString = reader_.readLine()) != null) {
readedText_.add(currentString);
}
} catch (IOException exc) {
System.out.println("IOException while reading file. " + exc.getMessage());
}
return new ArrayList<String>(readedText_);
}
}
public class LexParse
{
public static void main(String args[])
{
String stringg = "[']([A-Za-z]|[']{2})+[']";
String number = "[0-9]+|([0-1]+[b]{1})";
String identifier = "((\\?|\\*|\\|)+[0-9]*)+"
String space = "^\n|^\r|^\n\r|^\u0085|^\u2028|^\u2029";
String w = "[ \t]+";
String pattern = "(^" + w + ")|(" + space + ")|(^"+identifier+")|(^"+number+")|("+stringg+")";
Pattern p = Pattern.compile(pattern);
Reader program = new Reader ("C:\Users\Denis");
ArrayList<String> lines = program.Read();
boolean err = false;
int l_num = 1;
for(String line : lines) {
int pos = 1;
Matcher m;
while(line.length() != 0){
m = p.matcher(line);
if (m.find()){
err = false;
line = line.substring(m.end());
if (m.group(3) != null){
System.out.println("identifier ("+ l_num + "," + pos + ") " + m.group(3));
} else if (m.group(4) != null){
System.out.println("number ("+ l_num + "," + pos + ") " + m.group(4));
} else if (m.group(5) != null){
System.out.println("stringg ("+ l_num + "," + pos + ") " + m.group(5));
}
pos += m.end();
} else {
if (!err){
System.out.println("error" + "(" +l_num + "," + pos + ")");
err = true;
}
line = line.substring(1);
pos++;
continue;
}
}
l_num++;
}
}
}