How to replace expression by String in JavaParser AST - java

Suppose I have expression "(a == b || a == c) && (d == e)". How can I replace subexpression by custom String e.g. how can I replace a == b by a.equals(b)?

Solution based on #Andreas suggestion
public static void main(String[] args) {
String strExpr = "(get(a == b) || b == c ) && ( a == b ? b == c : c == d)";
Expression expr = StaticJavaParser.parseExpression(strExpr);
EnclosedExpr parentExpr = addParentExpr(expr);
processExpr(parentExpr);
expr = removeParentExpr(parentExpr);
// OUTPUT: (get(a.equals(b)) || b.equals(c)) && (a.equals(b) ? b.equals(c) : c.equals(d))
System.out.println(expr);
}
processing
private static void processExpr(Expression expr) {
expr.stream()
.filter(node -> node instanceof Expression)
.map(node -> ( Expression ) node)
.filter(Expression::isBinaryExpr)
.map(Expression::asBinaryExpr)
.filter(binaryExpr -> binaryExpr.getOperator() == Operator.EQUALS)
.forEach(binaryExpr -> binaryExpr.getParentNode().ifPresent(node -> {
node.replace(binaryExpr, equalsExpr(
binaryExpr.getLeft(), binaryExpr.getRight()));
}));
}
helper methods
private static EnclosedExpr addParentExpr(Expression expr) {
EnclosedExpr enclosedExpr = new EnclosedExpr();
enclosedExpr.setInner(expr);
return enclosedExpr;
}
private static Expression removeParentExpr(EnclosedExpr parentExpr) {
Expression expr = parentExpr.getInner();
parentExpr.remove(expr);
return expr;
}
private static MethodCallExpr equalsExpr(Expression leftExpr, Expression rightExpr) {
return new MethodCallExpr(leftExpr, "equals", new NodeList<>(rightExpr));
}
Since StaticJavaParser is slow, there is also an option to use JavaParser with ParseConfiguration to disable post processing and validation (fast parsing).
JavaParser javaParser = new JavaParser(new ParserConfiguration()
.setLanguageLevel(LanguageLevel.RAW));

Related

Parse Boolean Expression in Java

Given an arbitrary number of TextField inputs (t1, t2, t3, ...) and a custom boolean string input from a JtextArea, I need to check if lines in a file match the custom boolean expression. It needs to support nested parentheses.
Example:
User enters "str1" into t1 , "str2" into t2, "str3" into t3, "str4" into t4, "str5" into t5.
User enters the following into the JTextArea for the custom boolean:
"not ((t1 and not t3) or (t4 and t2)) or t5"
Then based on these inputs, I must filter a file and return lines in the file that match the custom boolean based on a "contains" relationship (e.g. "t1 and not t3" means a line must contain the string t1 and not contain the string t3).
For example a file with the following two lines:
str 5
str4 str2
The filter would only return str5 because it is the only line that matches the custom boolean.
I am having trouble even getting started. I have tried to think of a recursive solution but couldn't come up with anything. Also I tried non-recursive solutions but can't come up with anything either.
There is also the problem of the end result boolean needing to take in a parameter (each line in the file). I thought of maybe producing a sequence of operations to perform rather than a boolean that somehow takes in a parameter. But I can't figure out how to get this sequence in the first place.
Here is what I have now. It is very bad and I am thinking of scrapping this approach.
public class CustomInputParser {
private ArrayList<String> pairs;
private String inp;
private HashMap<Integer,String> atomMap;
public CustomInputParser() {
this.pairs = null;
this.inp = "";
this.atomMap = new HashMap<Integer,String>();
}
public void findAtoms() {
int i = 0;
for(String s : this.pairs) {
String[] indices = s.split(",");
int begin = Integer.valueOf(indices[0]);
int end = Integer.valueOf(indices[1]);
if(!inp.substring(begin+1, end).contains("(")) {
this.pairs.set(i, this.pairs.get(i) + ",#");
}
i++;
}
}
public void computeAtoms() {
int i = 0;
for(String s : this.pairs) {
if(s.contains(",#")) {
String[] indices = s.split(",");
int begin = Integer.valueOf(indices[0]);
int end = Integer.valueOf(indices[1]);
//this.pairs.set(i,this.pairs.get(i).replace(",a", ""));
this.pairs.set(i, this.pairs.get(i) + ","+inp.substring(begin+1, end));
this.atomMap.put(begin,this.pairs.get(i).split(",")[3]+"#"+String.valueOf(end));
}
i++;
}
System.out.println(this.pairs.toString());
System.out.println(this.atomMap.toString());
}
public void replaceAtoms() {
int i = 0;
for(String s : this.pairs) {
if(!(s.contains("o") || s.contains("a") || s.contains("n"))) {
String[] indices = s.split(",");
int begin = Integer.valueOf(indices[0])+1;
int end = Integer.valueOf(indices[1]);
for(int j = begin; j < end; j++) {
if(inp.charAt(j) == '(') {
if(atomMap.containsKey(j)) {
this.pairs.set(i, this.pairs.get(i) + ","+j+"#"+atomMap.get(j).split("#")[1]+">"+atomMap.get(j).split("#")[0]);
}
else {
this.pairs.set(i,"!"+ this.pairs.get(i));
}
}
}
}
i++;
}
System.out.println(this.pairs.toString());
}
public ArrayList<String> getPairs(String str){
this.inp = str;
ArrayList<String> res = new ArrayList<String>();
char[] arr = str.toCharArray();
Stack<Integer> s = new Stack<Integer>();
for(int i = 0; i < arr.length; i++) {
if(arr[i] == '(') {
s.push(i);
}
if(arr[i] == ')') {
if(s.empty()) {
return null;
}
else {
Integer start = s.pop();
Integer end = Integer.valueOf(i);
res.add(start.toString() + "," + end.toString());
}
}
}
if(!s.empty()) {
return null;
}
this.pairs = res;
return res;
}
public static void main(String[] args) {
String x = "((not t1 and ((not t2 or t4) or (t3 or t4))) or (t5 and not t6)) and t7";
x = x.replace("not", "n").replace("and","a").replace("or", "o").replace("t", "").replace(" ", "");
System.out.println(x);
CustomInputParser c = new CustomInputParser();
System.out.println(c.getPairs(x).toString());
c.findAtoms();
c.computeAtoms();
c.replaceAtoms();
}
}
The first step is to tokenize the input. Define
enum Token {VAR, LP, RP, NOT, AND, OR, END}
LP and RP are parentheses. Now define a tokenizer class that looks something like this:
class Tokenizer {
Tokenizer(String input) {...}
void reset() {...}
Token getNext() {...}
String getVarName() {...}
}
Calling getNext() on your example in a loop should return
LP LP NOT VAR AND LP LP NOT VAR OR VAR RP OR LP VAR OR VAR RP RP RP OR LP VAR AND NOT VAR RP RP AND VAR END
Calling getVarName() immediately after a VAR has been returned by getNext() gives you the name of the variable (e.g. "t42").
There are many ways to implement little scanners like this. You should do this first and make sure it's bulletproof by testing. Trying to build a parser on top of a flaky scanner is torture.
As I said in comments, I'd consider recursive descent parsing. If you have a suitable grammar, writing an RD parser is a very short step as the Dragon Book (also mentioned above) shows.
A reasonable grammar (using tokens as above) is
Expr -> Term AND Term
| Term OR Term
| Term END
Term -> NOT Term
| Opnd
Opnd -> VAR
| LP Expr RP
For example, here is how you'd get started. It shows the first rule converted to a function:
class Evaluator {
final Tokenizer tokenizer = ...; // Contains the expression text.
final Map<String, Boolean> env = ... // Environment: variables to values.
Token lookAhead; // Holds the token we're parsing right now.
Evaluator(Tokenizer tokenizer, Map<String, Boolean> env) { ... }
void advance() {
lookAhead = tokenizer.getNext();
}
boolean expr() {
boolean leftHandSide = term(); // Parse the left hand side recursively.
Token op = lookAhead; // Remember the operator.
if (op == Token.END) return leftHandSide; // Oops. That's all.
advance(); // Skip past the operator.
boolean rightHandSide = term(); // Parse the right hand side recursively.
if (op == Token.AND) return leftHandSide && rightHandSide; // Evaluate!
if (op == Token.OR) return leftHandSide || rightHandSide;
dieWithSyntaxError("Expected op, found " + op);
}
boolean term() {...}
boolean opnd() {...}
}
The environment is used when a VAR is parsed. Its boolean value is env.get(tokenizer.getVarName()).
So to process the file, you'll
For each line
For each variable tX in the expression
See if the line contains the string tX is bound to in its text field.
If so, put the mapping tX -> true in the environment
else put tX -> false
Reset the tokenizer
Call Evaluator.evaluate(tokenizer, environment)
If it returns true, print the line, else skip it.
This is the simplest approach I can think of. About 150 lines. Many optimizations are possible.
Added
Well since I can no longer take away the thrill of discovery, here is my version:
import static java.lang.Character.isDigit;
import static java.lang.Character.isWhitespace;
import java.util.HashMap;
import java.util.Map;
import static java.util.stream.Collectors.toMap;
public class TextExpressionSearch {
enum Token { VAR, LP, RP, NOT, AND, OR, END }
static class Tokenizer {
final String input;
int pos = 0;
String var;
Tokenizer(String input) {
this.input = input;
}
void reset() {
pos = 0;
var = null;
}
String getRead() {
return input.substring(0, pos);
}
Token getNext() {
var = null;
while (pos < input.length() && isWhitespace(input.charAt(pos))) {
++pos;
}
if (pos >= input.length()) {
return Token.END;
}
int start = pos++;
switch (input.charAt(start)) {
case 't':
while (pos < input.length() && isDigit(input.charAt(pos))) {
++pos;
}
var = input.substring(start, pos);
return Token.VAR;
case '(':
return Token.LP;
case ')':
return Token.RP;
case 'n':
if (input.startsWith("ot", pos)) {
pos += 2;
return Token.NOT;
}
break;
case 'a':
if (input.startsWith("nd", pos)) {
pos += 2;
return Token.AND;
}
break;
case 'o':
if (input.startsWith("r", pos)) {
pos += 1;
return Token.OR;
}
break;
}
throw new AssertionError("Can't tokenize: " + input.substring(start));
}
}
static class Evaluator {
final Tokenizer tokenizer;
final Map<String, Boolean> env;
Token lookAhead;
Evaluator(Tokenizer tokenizer, Map<String, Boolean> env) {
this.tokenizer = tokenizer;
this.env = env;
advance();
}
boolean die(String msg) {
throw new AssertionError(msg + "\nRead: " + tokenizer.getRead());
}
void advance() {
lookAhead = tokenizer.getNext();
}
void match(Token token) {
if (lookAhead != token) {
die("Expected " + token + ", found " + lookAhead);
}
advance();
}
boolean evaluate() {
boolean exprVal = expr();
match(Token.END);
return exprVal;
}
boolean expr() {
boolean lhs = negated();
switch (lookAhead) {
case AND:
advance();
return negated() && lhs;
case OR:
advance();
return negated() || lhs;
case END:
return lhs;
}
return die("Expected expr, found " + lookAhead);
}
boolean negated() {
switch (lookAhead) {
case NOT:
advance();
return !negated();
default:
return operand();
}
}
boolean operand() {
switch (lookAhead) {
case VAR:
if (!env.containsKey(tokenizer.var)) {
die("Undefined variable: " + tokenizer.var);
}
boolean varVal = env.get(tokenizer.var);
advance();
return varVal;
case LP:
advance();
boolean exprVal = expr();
match(Token.RP);
return exprVal;
}
return die("Expected operand, found " + lookAhead);
}
}
public static void main(String [] args) {
String expr = "((not t1 and ((not t2 or t4) or (t3 or t4))) or (t5 and not t6)) and t7";
Map<String, String> bindings = new HashMap<>();
bindings.put("t1", "str1");
bindings.put("t2", "str2");
bindings.put("t3", "str3");
bindings.put("t4", "str4");
bindings.put("t5", "str5");
bindings.put("t6", "str6");
bindings.put("t7", "str7");
String [] lines = {"str5 str7", "str4 str2"};
Tokenizer tokenizer = new Tokenizer(expr);
for (String line : lines) {
Map<String, Boolean> env =
bindings.entrySet().stream()
.collect(toMap(e -> e.getKey(), e -> line.contains(e.getValue())));
tokenizer.reset();
if (new Evaluator(tokenizer, env).evaluate()) {
System.out.println(line);
}
}
}
}
You can define a parser that returns a Predicate<String> that tests if a given string satisfies a conditional expression.
static Predicate<String> parse(String s, Map<String, String> map) {
return new Object() {
String[] tokens = Pattern.compile("[()]|[a-z][a-z0-9]*")
.matcher(s).results()
.map(MatchResult::group)
.toArray(String[]::new);
int length = tokens.length;
int index = 0;
String token = get();
String get() {
return token = index < length ? tokens[index++] : null;
}
boolean eat(String expect) {
if (expect.equals(token)) {
get();
return true;
}
return false;
}
Predicate<String> identifier() {
String id = token;
return s -> {
String value = map.get(id);
if (value == null)
throw new RuntimeException(
"identifier '" + id + "' undefined");
return s.contains(value);
};
}
Predicate<String> factor() {
boolean not = false;
Predicate<String> p;
if (eat("not"))
not = true;
switch (token) {
case "(":
get();
p = expression();
if (!eat(")"))
throw new RuntimeException("')' expected");
break;
case ")": case "not": case "and": case "or":
throw new RuntimeException("syntax error at '" + token + "'");
default:
p = identifier();
get();
break;
}
if (not)
p = p.negate();
return p;
}
Predicate<String> term() {
Predicate<String> p = factor();
while (eat("and"))
p = p.and(factor());
return p;
}
Predicate<String> expression() {
Predicate<String> p = term();
while (eat("or"))
p = p.or(term());
return p;
}
Predicate<String> parse() {
Predicate<String> p = expression();
if (token != null)
throw new RuntimeException("extra tokens string");
return p;
}
}.parse();
}
test case:
#Test
public void testParse() {
String s = "not ((t1 and not t3) or (t4 and t2)) or t5";
Map<String, String> map = new HashMap<>(Map.of(
"t1", "str1",
"t2", "str2",
"t3", "str3",
"t4", "str4",
"t5", "str5"));
Predicate<String> p = parse(s, map);
assertTrue(p.test("str5"));
assertTrue(p.test("str3"));
assertTrue(p.test("str1 str3"));
assertFalse(p.test("str1"));
assertFalse(p.test("str2 str4"));
// you can change value of variables.
assertFalse(p.test("str1 FOO"));
map.put("t5", "FOO");
assertTrue(p.test("str1 FOO"));
}
syntax:
expression = term { "or" term }
term = factor { "and" factor }
factor = [ "not" ] ( "(" expression ")" | identifier )
identifier = letter { letter | digit }
letter = "a" | "b" | ... | "z"
digit = "0" | "1" | ... | "9"
For posterity, here is my shunting yard solution which includes input validation:
public class CustomInputParser {
private Stack<Character> ops;
private LinkedList<Character> postFix;
private HashMap<Character, Integer> precedence;
private Stack<Boolean> eval;
private HashMap<Integer, String> termsMap;
private String customBool;
public CustomInputParser(HashMap<Integer, String> tMap, String custBool) {
this.ops = new Stack<Character>();
this.eval = new Stack<Boolean>();
this.postFix = new LinkedList<Character>();
this.termsMap = tMap;
this.customBool = custBool;
this.precedence = new HashMap<Character, Integer>();
precedence.put('n', 1);
precedence.put('a', 2);
precedence.put('o',3);
precedence.put('(', 4);
}
private int inToPost() {
char[] expr = convertToArr(this.customBool);
char c;
for(int i = 0; i < expr.length; i++) {
c = expr[i];
if(isOp(c)) {
if(processOp(c) != 0) return -1;
}
else {
if(!Character.isDigit(c)) {
return -1;
}
//I made the mistake of using a queue of Characters for postfix initially
//This only worked for up to 9 operands (multi digit would add mutiple chars to
// postfix for a single reference.
//This loops is a lazy workaround:
// 1. get the string of the reference (e.g. "11")
// 2. convert it to int
// 3. store the char value of the int in postfix
// 4. when evaluating operands in postfix eval, convert char back to int to get the termsMap key
String num = "";
while(i < expr.length) {
if(!Character.isDigit(expr[i])) {
i--;
break;
}
c = expr[i];
num += c;
i++;
}
int j = Integer.valueOf(num);
c = (char) j;
postFix.offer(c); //enqueue
}
}
while(!ops.empty()) {
if(ops.peek() == '(')return -1; //no matching close paren for the open paren
postFix.offer(ops.pop()); //pop and enqueue all remaining ops from stack
}
return 0;
}
private boolean isOp(char c) {
if(c == '(' || c == ')' || c =='n' || c=='a' || c=='o') {
return true;
}
return false;
}
private int processOp(char c) {
if (ops.empty() || c == '(') {
ops.push(c);
}
else if(c == ')') {
while(ops.peek() != '(') {
postFix.offer(ops.pop()); //pop and equeue ops wrapped in parens
if(ops.empty()) return -1; //no matching open paren for the close paren
}
ops.pop(); // don't enqueue open paren, just remove it from stack
}
else if(precedence.get(c) > precedence.get(ops.peek())) {
postFix.offer(ops.pop()); //pop and enqueue the higher precedence op
ops.push(c);
}
else {
ops.push(c);
}
return 0;
}
public boolean evaluate(String s) {
while(!postFix.isEmpty()) {
char c = postFix.poll();
boolean op1, op2;
switch(c) {
case 'n':
op1 = eval.pop();
eval.push(!op1);
break;
case 'a':
op1 = eval.pop();
op2 = eval.pop();
eval.push(op1 && op2);
break;
case 'o':
op1 = eval.pop();
op2 = eval.pop();
eval.push(op1 || op2);
break;
default:
int termKey = (int) c;
String term = this.termsMap.get(termKey);
eval.push(s.contains(String.valueOf(term)));
break;
}
}
return eval.pop();
}
private char[] convertToArr(String x) {
x = x.replace("not", "n").replace("and","a").replace("or", "o").replace("t", "").replace(" ", "");
return x.toCharArray();
}
public static void main(String[] args) {
String customBool = "(t1 and not (t2 and t3)) or (t4 and not t5)";
HashMap<Integer,String> termsMap = new HashMap<Integer, String>();
termsMap.put(1,"str1");
termsMap.put(2,"str2");
termsMap.put(3,"str3");
termsMap.put(4,"str4");
termsMap.put(5,"str5");
CustomInputParser c = new CustomInputParser(termsMap, customBool);
if(c.inToPost() != 0) {
System.out.println("invalid custom boolean");
}
else {
System.out.println(c.evaluate("str1str5"));
}
}
}

Replace custom list search with Java lambda expressions

I am looking to replace the following code with Java lambda expressions.In the below case I need to come out the loop if the match is found and also set boolean to true in order to assert a condition
Long teamId;
boolean matchFound = false;
List<WorkflowSubscriptions> workflowSubscriptionsList = fetchSubscriptions();
for (WorkflowSubscriptions workflowSubscriptions : workflowSubscriptionsList) {
for (WorkflowCompositeInfo workflowCompositeInfo : workflowSubscriptions.getWorkflowCompositeInfo()) {
if (workflowCompositeInfo.getId() > 0 && workflowCompositeId == workflowCompositeId.intValue()) {
teamId = Long.valueOf(workflowSubscriptions.getId());
matchFound = true;
}
}
}
if (!matchFound) {
throw new CustomParameterizedException("Workflow does not have valid subscriptions");
}
Try this:
fetchSubscriptions().stream()
.filter(p -> w.getId() > 0 && workflowCompositeId == w.intValue())
.limit(1)
.forEach(w -> {teamId = Long.valueOf(w); matchFound = true;} );
UPDATE
To avoid errors about modifying a final variable. You can simply create a value class:
public class MyValue {
public boolean matchFound;
public Long teamId;
}
Then:
final MyValue value = new MyValue();
fetchSubscriptions().stream()
.filter(p -> w.getId() > 0 && workflowCompositeId == w.intValue())
.limit(1)
.forEach(w -> {value.teamId = Long.valueOf(w); value.matchFound = true;} );
// Here, find your data in value.teamId and value.matchFound

Dynamic operator tokens in ANTLR4

I'm trying to make a calculator in ANTLR4 that can use almost every possible symbol as mathematical operator.
Concrete:
- The user defines operations consisting of an operator and a precedence. The operator can be any combination of symbols except for some system symbols (parentheses, commas, ...). Precedence is a positive integer number. Operations are stored in a java HashMap.
- There are three different kinds of operations: left side (unary minus, ...), right side (factorial, ...) and binary (addition, ...)
- The operations should be requested at runtime, so that operations can be (de)activated during the parse. If this is not possible, then the operators should be requested at parser creation.
- For the precedence: full dynamic precedence is preferable(at runtime the precedence of an encountered operation is requested), but if it isn't possible then there should be different precedence presets. (multiplication, addition, ...)
What I've got:
- Working code for operator recognition
- Precedence climbing code which produces a correct parse tree, but gives an error: rule expr failed predicate: (getPrecedence($op) >= $_p)?
UPDATE: fixed operator recognition code, and found code for the precedence climbing mechanism
tokens { PREOP, POSTOP, BINOP, ERROR }
#lexer::members {
private static List<String> binaryOperators;
private static List<String> prefixOperators;
private static List<String> postfixOperators;
{
binaryOperators = new ArrayList<String>();
binaryOperators.add("+");
binaryOperators.add("*");
binaryOperators.add("-");
binaryOperators.add("/");
prefixOperators = new ArrayList<String>();
prefixOperators.add("-");
postfixOperators = new ArrayList<String>();
postfixOperators.add("!");
}
private Deque<Token> deque = new LinkedList<Token>();
private Token previousToken;
private Token nextToken;
#Override
public Token nextToken() {
if (!deque.isEmpty()) {
return previousToken = deque.pollFirst();
}
Token next = super.nextToken();
if (next.getType() != SYMBOL) {
return previousToken = next;
}
StringBuilder builder = new StringBuilder();
while (next.getType() == SYMBOL) {
builder.append(next.getText());
next = super.nextToken();
}
deque.addLast(nextToken = next);
List<Token> tokens = findOperatorCombination(builder.toString(), getOperatorType());
for (int i = tokens.size() - 1; i >= 0; i--) {
deque.addFirst(tokens.get(i));
}
return deque.pollFirst();
}
private static List<Token> findOperatorCombination(String sequence, OperatorType type) {
switch (type) {
case POSTFIX:
return getPostfixCombination(sequence);
case PREFIX:
return getPrefixCombination(sequence);
case BINARY:
return getBinaryCombination(sequence);
default:
break;
}
return null;
}
private static List<Token> getPrefixCombination(String sequence) {
if (isPrefixOperator(sequence)) {
List<Token> seq = new ArrayList<Token>(1);
seq.add(0, new CommonToken(MathParser.PREOP, sequence));
return seq;
}
if (sequence.length() <= 1) {
return null;
}
for (int i = 1; i < sequence.length(); i++) {
List<Token> seq1 = getPrefixCombination(sequence.substring(0, i));
List<Token> seq2 = getPrefixCombination(sequence.substring(i, sequence.length()));
if (seq1 != null & seq2 != null) {
seq1.addAll(seq2);
return seq1;
}
}
return null;
}
private static List<Token> getPostfixCombination(String sequence) {
if (isPostfixOperator(sequence)) {
List<Token> seq = new ArrayList<Token>(1);
seq.add(0, new CommonToken(MathParser.POSTOP, sequence));
return seq;
}
if (sequence.length() <= 1) {
return null;
}
for (int i = 1; i < sequence.length(); i++) {
List<Token> seq1 = getPostfixCombination(sequence.substring(0, i));
List<Token> seq2 = getPostfixCombination(sequence.substring(i, sequence.length()));
if (seq1 != null && seq2 != null) {
seq1.addAll(seq2);
return seq1;
}
}
return null;
}
private static List<Token> getBinaryCombination(String sequence) {
for (int i = 0; i < sequence.length(); i++) { // i is number of postfix spaces
for (int j = 0; j < sequence.length() - i; j++) { // j is number of prefix spaces
String seqPost = sequence.substring(0, i);
List<Token> post = getPostfixCombination(seqPost);
String seqPre = sequence.substring(sequence.length()-j, sequence.length());
List<Token> pre = getPrefixCombination(seqPre);
String seqBin = sequence.substring(i, sequence.length()-j);
if ((post != null || seqPost.isEmpty()) &&
(pre != null || seqPre.isEmpty()) &&
isBinaryOperator(seqBin)) {
List<Token> res = new ArrayList<Token>();
if (post != null)
res.addAll(post);
res.add(new CommonToken(MathParser.BINOP, seqBin));
if (pre != null)
res.addAll(pre);
return res;
}
}
}
return null;
}
/**
* Returns the expected operator type based on the previous and next token
*/
private OperatorType getOperatorType() {
if (isValueEnd(previousToken.getType())) {
if (isValueStart(nextToken.getType())) {
return OperatorType.BINARY;
}
return OperatorType.POSTFIX;
}
return OperatorType.PREFIX;
}
private enum OperatorType { BINARY, PREFIX, POSTFIX };
/**
* Checks whether the given token is a token found at the start of value elements
* #param tokenType
* #return
*/
private static boolean isValueStart(int tokenType) {
return tokenType == MathParser.INT;
}
/**
* Checks whether the given token is a token found at the end of value elements
* #param tokenType
* #return
*/
private static boolean isValueEnd(int tokenType) {
return tokenType == MathParser.INT;
}
private static boolean isBinaryOperator(String operator) {
return binaryOperators.contains(operator);
}
private static boolean isPrefixOperator(String operator) {
return prefixOperators.contains(operator);
}
private static boolean isPostfixOperator(String operator) {
return postfixOperators.contains(operator);
}
}
Precedence climbing code:
#parser::members {
static Map<String, Integer> precedenceMap = new HashMap<String, Integer>();
static {
precedenceMap.put("*", 2);
precedenceMap.put("+", 1);
precedenceMap.put("^", 4);
precedenceMap.put("-", 3);
precedenceMap.put("!", 5);
}
public static Integer getPrecedence(Token op) {
return precedenceMap.get(op.getText());
}
public static Integer getNextPrecedence(Token op) {
Integer p = getPrecedence(op);
if (op.getType() == PREOP) return p;
else if (op.getText().equals("^")) return p;
else if (op.getType() == BINOP) return p+1;
else if (op.getType() == POSTOP) return p+1;
throw new IllegalArgumentException(op.getText());
}
}
prog
: expr[0]
;
expr [int _p]
: aexpr
( {getPrecedence(_input.LT(1)) >= $_p}? op=BINOP expr[getNextPrecedence($op)]
| {getPrecedence(_input.LT(1)) >= $_p}? POSTOP
)*
;
atom
: INT
| '(' expr[0] ')'
| op=PREOP expr[getNextPrecedence($op)]
;
So now the question is what can do about this predicate failure error
Thanks to the other contributors I have found a complete (and actually reasonably clean) solution for my problem.
Operator matching:
By looking at the tokens before and after the encountered series of symbols, it is possible to detect the fixity of the operator. After that, apply an algorithm which detects a sequence of valid operators in the symbol series. Then inject those tokens in the token stream (in nextToken() ).
Just make sure you define all hardcoded tokens before the SYMBOL definition.
Precedence climbing:
Actually this wasn't that hard, it is exactly the same as ANTLR4's internal strategy.
grammar Math;
tokens { PREOP, POSTOP, BINOP, ERROR }
#header {
import java.util.*;
}
#lexer::members {
private static List<String> binaryOperators;
private static List<String> prefixOperators;
private static List<String> postfixOperators;
{
binaryOperators = new ArrayList<String>();
binaryOperators.add("+");
binaryOperators.add("*");
binaryOperators.add("-");
binaryOperators.add("/");
System.out.println(binaryOperators);
prefixOperators = new ArrayList<String>();
prefixOperators.add("-");
System.out.println(prefixOperators);
postfixOperators = new ArrayList<String>();
postfixOperators.add("!");
System.out.println(postfixOperators);
}
private Deque<Token> deque = new LinkedList<Token>();
private Token previousToken;
private Token nextToken;
#Override
public Token nextToken() {
if (!deque.isEmpty()) {
return previousToken = deque.pollFirst();
}
Token next = super.nextToken();
if (next.getType() != SYMBOL) {
return previousToken = next;
}
StringBuilder builder = new StringBuilder();
while (next.getType() == SYMBOL) {
builder.append(next.getText());
next = super.nextToken();
}
deque.addLast(nextToken = next);
List<Token> tokens = findOperatorCombination(builder.toString(), getOperatorType());
for (int i = tokens.size() - 1; i >= 0; i--) {
deque.addFirst(tokens.get(i));
}
return deque.pollFirst();
}
private static List<Token> findOperatorCombination(String sequence, OperatorType type) {
switch (type) {
case POSTFIX:
return getPostfixCombination(sequence);
case PREFIX:
return getPrefixCombination(sequence);
case BINARY:
return getBinaryCombination(sequence);
default:
break;
}
return null;
}
private static List<Token> getPrefixCombination(String sequence) {
if (isPrefixOperator(sequence)) {
List<Token> seq = new ArrayList<Token>(1);
seq.add(0, new CommonToken(MathParser.PREOP, sequence));
return seq;
}
if (sequence.length() <= 1) {
return null;
}
for (int i = 1; i < sequence.length(); i++) {
List<Token> seq1 = getPrefixCombination(sequence.substring(0, i));
List<Token> seq2 = getPrefixCombination(sequence.substring(i, sequence.length()));
if (seq1 != null & seq2 != null) {
seq1.addAll(seq2);
return seq1;
}
}
return null;
}
private static List<Token> getPostfixCombination(String sequence) {
if (isPostfixOperator(sequence)) {
List<Token> seq = new ArrayList<Token>(1);
seq.add(0, new CommonToken(MathParser.POSTOP, sequence));
return seq;
}
if (sequence.length() <= 1) {
return null;
}
for (int i = 1; i < sequence.length(); i++) {
List<Token> seq1 = getPostfixCombination(sequence.substring(0, i));
List<Token> seq2 = getPostfixCombination(sequence.substring(i, sequence.length()));
if (seq1 != null && seq2 != null) {
seq1.addAll(seq2);
return seq1;
}
}
return null;
}
private static List<Token> getBinaryCombination(String sequence) {
for (int i = 0; i < sequence.length(); i++) { // i is number of postfix spaces
for (int j = 0; j < sequence.length() - i; j++) { // j is number of prefix spaces
String seqPost = sequence.substring(0, i);
List<Token> post = getPostfixCombination(seqPost);
String seqPre = sequence.substring(sequence.length()-j, sequence.length());
List<Token> pre = getPrefixCombination(seqPre);
String seqBin = sequence.substring(i, sequence.length()-j);
if ((post != null || seqPost.isEmpty()) &&
(pre != null || seqPre.isEmpty()) &&
isBinaryOperator(seqBin)) {
List<Token> res = new ArrayList<Token>();
if (post != null)
res.addAll(post);
res.add(new CommonToken(MathParser.BINOP, seqBin));
if (pre != null)
res.addAll(pre);
return res;
}
}
}
return null;
}
/**
* Returns the expected operator type based on the previous and next token
*/
private OperatorType getOperatorType() {
if (isAfterAtom()) {
if (isBeforeAtom()) {
return OperatorType.BINARY;
}
return OperatorType.POSTFIX;
}
return OperatorType.PREFIX;
}
private enum OperatorType { BINARY, PREFIX, POSTFIX };
/**
* Checks whether the current token is a token found at the start of atom elements
* #return
*/
private boolean isBeforeAtom() {
int tokenType = nextToken.getType();
return tokenType == MathParser.INT ||
tokenType == MathParser.PLEFT;
}
/**
* Checks whether the current token is a token found at the end of atom elements
* #return
*/
private boolean isAfterAtom() {
int tokenType = previousToken.getType();
return tokenType == MathParser.INT ||
tokenType == MathParser.PRIGHT;
}
private static boolean isBinaryOperator(String operator) {
return binaryOperators.contains(operator);
}
private static boolean isPrefixOperator(String operator) {
return prefixOperators.contains(operator);
}
private static boolean isPostfixOperator(String operator) {
return postfixOperators.contains(operator);
}
}
#parser::members {
static Map<String, Integer> precedenceMap = new HashMap<String, Integer>();
static {
precedenceMap.put("*", 2);
precedenceMap.put("+", 1);
precedenceMap.put("^", 4);
precedenceMap.put("-", 3);
precedenceMap.put("!", 5);
}
public static Integer getPrecedence(Token op) {
return precedenceMap.get(op.getText());
}
public static Integer getNextPrecedence(Token op) {
Integer p = getPrecedence(op);
if (op.getType() == PREOP) return p;
else if (op.getText().equals("^")) return p;
else if (op.getType() == BINOP) return p+1;
throw new IllegalArgumentException(op.getText());
}
}
prog
: expr[0]
;
expr [int _p]
: atom
( {getPrecedence(_input.LT(1)) >= $_p}? op=BINOP expr[getNextPrecedence($op)]
| {getPrecedence(_input.LT(1)) >= $_p}? POSTOP
)*
;
atom
: INT
| PLEFT expr[0] PRIGHT
| op=PREOP expr[getNextPrecedence($op)]
;
INT
: ( '0'..'9' )+
;
PLEFT : '(' ;
PRIGHT : ')' ;
WS
: [ \t\r\n]+ -> skip ; // skip spaces, tabs, newlines
SYMBOL
: .
;
Note: code is meant as an example, not as my real code (operators and precedence will be requested externally)
You can't define precedence/associativity rules for Antlr at runtime. What you can, however, is parse all of the operators (built-in in the language or user-defined) as a single chained list (like ArrayList<>) in the parse, then apply your own algorithm for precedence and associativity in a visitor (or in grammar actions, if you really want to).
The algorithm itself isn't that hard, if you iterate the list many times. For example, you can first fetch the precedence of each operator in the list, then check the one with highest precedence, see if its right or left-associative, and from there you've built your first (bottom-most) tree node. Keep applying until the list is empty, and you've built your own "parse tree", but without the parsing (you're not working with abstract-input strings anymore).
Alternatively, at runtime make externals calls for Antlr to compile the .g4 and to javac to compile the generated Antlr code, then use reflection to call it. However, it is probably much slower and arguably harder to pull off.
A parser rule that will work 'correctly' according to some runtime definition of Symbol precedence is possible. While not initially appearing to be an idiomatic choice, the standard alternative of deferring semantic analysis out of the parser would produce a very poorly differentiated parse tree -- making this a reasonable exception to the standard design rule.
In (overly simplified) form, the parser rule would be:
expr : LParen expr RParen # group
| expr Symbol expr # binary
| expr Symbol # postfix
| Symbol expr # prefix
| Int+ # value
;
To cure the ambiguity, add inline predicates:
expr : LParen expr RParen # group
| expr s=Symbol { binary($s) }? expr # binary
| expr s=Symbol { postfix($s) }? # postfix
| s=Symbol { prefix($s) }? expr # prefix
| Int+ # value
;
For any given Symbol, a single predicate method should evaluate as true.
Extending to multiple Symbol strings will add a bit of complexity (ex, differentiating a binary from a postfix followed by a prefix) but the mechanics remain largely the same.
I think your approach is the right way. I propose following grammar:
grammar Op;
options {
superClass=PrecedenceParser;
}
prog : expr[0] ;
expr[int _p] locals[Token op]: INT ({$op = _input.LT(1);} {getPrecedence($op) >= $_p}? OP expr[getPrecedence($op)])*;
INT : ( '0'..'9' )+ ;
OP : '+' | '*'; // all allowed symbols, should be extended
WS : [ \t\r\n]+ -> skip ; // skip spaces, tabs, newlines
The rule for op should contain all allowed operator symbols. My restriction to + and * is only for simplicity. The parser super class would be:
public abstract class PrecedenceParser extends Parser {
private Map<String, Integer> precedences;
public PrecedenceParser(TokenStream input) {
super(input);
this.precedences = new HashMap<>();
}
public PrecedenceParser putOperator(String op, int p) {
precedences.put(op, p);
return this;
}
public int getPrecedence(Token operator) {
Integer p = precedences.get(operator.getText());
if (p == null) {
return Integer.MAX_VALUE;
} else {
return p;
}
}
}
Results
with precedences {+ : 4, * : 3 }
(prog (expr 1 + (expr 2) * (expr 3 + (expr 4))))
with precedences {+ : 3, * : 4 }
(prog (expr 1 + (expr 2 * (expr 3) + (expr 4))))
Evaluating these sequences from left to right is equivalent to evaluating them with precedence.
This approach should work for larger sets of operators. ANTLR4 uses this approach internally for precedence climbing yet ANTLR uses constants instead of the precedences map (because it assumes that precedence is determined at parser build time).

Parenthesis Check within a Linked stack for infix to postfix

I have four classes.
One contains my linkedstack setup
One is infixtopostfix for prioritization and conversion
Parenthesis for matching
Postfix for evaluation
I have setup almost everything here but it is still returning false anyway I put it.
On another note my equals on !stackMatch.pop().equals(c) is not working due to it being a object type with '!' being a problem.
My programs are simple and straight forward:
LinkedStack.java
public class LinkedStack implements StackInterface {
private Node top;
public LinkedStack() {
top = null;
} // end default constructor
public boolean isEmpty() {
return top == null;
} // end isEmpty
public void push(Object newItem) {
Node n = new Node();
n.setData(newItem);
n.setNext(top);
top = n;
} // end push
public Object pop() throws Exception {
if (!isEmpty()) {
Node temp = top;
top = top.getNext();
return temp.getData();
} else {
throw new Exception("StackException on pop: stack empty");
} // end if
} // end pop
public Object peek() throws Exception {
if (!isEmpty()) {
return top.getData();
} else {
throw new Exception("StackException on peek: stack empty");
} // end if
} // end peek
} // end LinkedStack
InfixToPostfix.java
import java.util.*;
public class InfixToPostfix {
Parenthesis p = new Parenthesis();
LinkedStack stack = new LinkedStack();
String token = ""; // each token of the string
String output = ""; // the string holding the postfix expression
Character topOfStackObject = null; // the top object of the stack, converted to a Character Object
char charValueOfTopOfStack = ' '; // the primitive value of the Character object
/**
* Convert an infix expression to postfix. If the expression is invalid, throws an exception.
* #param s the infix expression
* #return the postfix expression as a string
* hint: StringTokenizer is very useful to this iteratively
*/
//public String convertToPostfix(String s) throws Exception {
//}
private boolean isOperand (char c){
return ((c>= '0' && c <= '9') || (c >= 'a' && c<= 'z'));
}
public void precedence(char curOp, int val) throws Exception {
while (!stack.isEmpty()) {
char topOp = (Character) stack.pop();
// charValueOfTopOfStack = topOfStackObject.charValue();
if (topOp == '(') {
stack.push(topOp);
break;
}// it's an operator
else {// precedence of new op
int prec2;
if (topOp == '+' || topOp == '-') {
prec2 = 1;
} else {
prec2 = 2;
}
if (prec2 < val) // if prec of new op less
{ // than prec of old
stack.push(topOp); // save newly-popped op
break;
} else // prec of new not less
{
output = output + topOp; // than prec of old
}
}
}
}
Parenthesis.java
import java.util.*;
public class Parenthesis{
private LinkedStack stack = new LinkedStack();
private Object openBrace;
private String outputString;
/**
* Determine if the expression has matching parenthesis using a stack
*
* #param expr the expression to be evaluated
* #return returns true if the expression has matching parenthesis
*/
public boolean match(String expr) {
LinkedStack stackMatch = new LinkedStack();
for(int i=0; i < expr.length(); i++) {
char c = expr.charAt(i);
if(c == '(')
stackMatch.push(c);
else if(c == ')'){
if (stackMatch.isEmpty() || !stackMatch.pop().equals(c))
return false;
}
}
return stackMatch.isEmpty();
}
}
Just wanted to give you all of it so you could help me. I have tests written already just struggling with the parenthesis problem of pushing it on the stack but unable to compare it to the closing parenthesis so it can check if there is enough while checking to be sure it is not empty.
The problem probably is, that you are trying to test if matching ( is currently on top of the stack when ) comes, but in c is acctual character, ), so you test if ) is on top of stack, not ( as you should.

boolean expression parser in java

Are there any java libraries or techniques to parsing boolean expressions piecemeal?
What I mean is given an expression like this:
T && ( F || ( F && T ) )
It could be broken down into a expression tree to show which token caused the 'F' value, like so (maybe something like this):
T && <- rhs false
( F || <- rhs false
( F && T ) <- eval, false
)
I am trying to communicate boolean expression evaluations to non-programmers. I have poked around with Anlr, but I couldn't get it to do much (it seems to have a bit of a learning curve).
I'm not opposed to writing it myself, but I'd rather not reinvent the wheel.
You could do this with MVEL or JUEL. Both are expression language libraries, examples below are using MVEL.
Example:
System.out.println(MVEL.eval("true && ( false || ( false && true ) )"));
Prints:
false
If you literally want to use 'T' and 'F' you can do this:
Map<String, Object> context = new java.util.HashMap<String, Object>();
context.put("T", true);
context.put("F", false);
System.out.println(MVEL.eval("T && ( F || ( F && T ) )", context));
Prints:
false
I've coded this using Javaluator.
It's not exactly the output you are looking for, but I think it could be a start point.
package test;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import net.astesana.javaluator.*;
public class TreeBooleanEvaluator extends AbstractEvaluator<String> {
/** The logical AND operator.*/
final static Operator AND = new Operator("&&", 2, Operator.Associativity.LEFT, 2);
/** The logical OR operator.*/
final static Operator OR = new Operator("||", 2, Operator.Associativity.LEFT, 1);
private static final Parameters PARAMETERS;
static {
// Create the evaluator's parameters
PARAMETERS = new Parameters();
// Add the supported operators
PARAMETERS.add(AND);
PARAMETERS.add(OR);
// Add the parentheses
PARAMETERS.addExpressionBracket(BracketPair.PARENTHESES);
}
public TreeBooleanEvaluator() {
super(PARAMETERS);
}
#Override
protected String toValue(String literal, Object evaluationContext) {
return literal;
}
private boolean getValue(String literal) {
if ("T".equals(literal) || literal.endsWith("=true")) return true;
else if ("F".equals(literal) || literal.endsWith("=false")) return false;
throw new IllegalArgumentException("Unknown literal : "+literal);
}
#Override
protected String evaluate(Operator operator, Iterator<String> operands,
Object evaluationContext) {
List<String> tree = (List<String>) evaluationContext;
String o1 = operands.next();
String o2 = operands.next();
Boolean result;
if (operator == OR) {
result = getValue(o1) || getValue(o2);
} else if (operator == AND) {
result = getValue(o1) && getValue(o2);
} else {
throw new IllegalArgumentException();
}
String eval = "("+o1+" "+operator.getSymbol()+" "+o2+")="+result;
tree.add(eval);
return eval;
}
public static void main(String[] args) {
TreeBooleanEvaluator evaluator = new TreeBooleanEvaluator();
doIt(evaluator, "T && ( F || ( F && T ) )");
doIt(evaluator, "(T && T) || ( F && T )");
}
private static void doIt(TreeBooleanEvaluator evaluator, String expression) {
List<String> sequence = new ArrayList<String>();
evaluator.evaluate(expression, sequence);
System.out.println ("Evaluation sequence for :"+expression);
for (String string : sequence) {
System.out.println (string);
}
System.out.println ();
}
}
Here is the ouput:
Evaluation sequence for :T && ( F || ( F && T ) )
(F && T)=false
(F || (F && T)=false)=false
(T && (F || (F && T)=false)=false)=false
Evaluation sequence for :(T && T) || ( F && T )
(T && T)=true
(F && T)=false
((T && T)=true || (F && T)=false)=true
I recently put together a library in Java specifically to manipulate boolean expressions: jbool_expressions.
It includes a tool too parse expressions out of string input:
Expression<String> expr = ExprParser.parse("( ( (! C) | C) & A & B)")
You can also do some fairly simple simplification:
Expression<String> simplified = RuleSet.simplify(expr);
System.out.println(expr);
gives
(A & B)
If you wanted to step through the assignment then, you could assign values one by one. For the example here,
Expression<String> halfAssigned = RuleSet.assign(simplified, Collections.singletonMap("A", true));
System.out.println(halfAssigned);
shows
B
and you could resolve it by assigning B.
Expression<String> resolved = RuleSet.assign(halfAssigned, Collections.singletonMap("B", true));
System.out.println(resolved);
shows
true
Not 100% what you were asking for, but hope it helps.
Check out BeanShell. It has expression parsing that accepts Java-like syntax.
EDIT: Unless you're trying to actually parse T && F literally, though you could do this in BeanShell using the literals true and false.
Try this.
static boolean parseBooleanExpression(String s) {
return new Object() {
int length = s.length(), index = 0;
boolean match(String expect) {
while (index < length && Character.isWhitespace(s.charAt(index)))
++index;
if (index >= length)
return false;
if (s.startsWith(expect, index)) {
index += expect.length();
return true;
}
return false;
}
boolean element() {
if (match("T"))
return true;
else if (match("F"))
return false;
else if (match("(")) {
boolean result = expression();
if (!match(")"))
throw new RuntimeException("')' expected");
return result;
} else
throw new RuntimeException("unknown token");
}
boolean term() {
if (match("!"))
return !element();
else
return element();
}
boolean factor() {
boolean result = term();
while (match("&&"))
result &= term();
return result;
}
boolean expression() {
boolean result = factor();
while (match("||"))
result |= factor();
return result;
}
boolean parse() {
boolean result = expression();
if (index < length)
throw new RuntimeException(
"extra string '" + s.substring(index) + "'");
return result;
}
}.parse();
}
And
public static void main(String[] args) {
String s = "T && ( F || ( F && T ) )";
boolean result = parseBooleanExpression(s);
System.out.println(result);
}
output:
false
The syntax is
expression = factor { "||" factor }
factor = term { "&&" term }
term = [ "!" ] element
element = "T" | "F" | "(" expression ")"
mXparser handles Boolean operators - please find few examples
Example 1:
import org.mariuszgromada.math.mxparser.*;
...
...
Expression e = new Expression("1 && (0 || (0 && 1))");
System.out.println(e.getExpressionString() + " = " + e.calculate());
Result 1:
1 && (0 || (0 && 1)) = 0.0
Example 2:
import org.mariuszgromada.math.mxparser.*;
...
...
Constant T = new Constant("T = 1");
Constant F = new Constant("F = 0");
Expression e = new Expression("T && (F || (F && T))", T, F);
System.out.println(e.getExpressionString() + " = " + e.calculate());
Result 2:
T && (F || (F && T)) = 0.0
For more details please follow mXparser tutorial.
Best regards

Categories

Resources