Dynamic operator tokens in ANTLR4

Dynamic operator tokens in ANTLR4 - java

I'm trying to make a calculator in ANTLR4 that can use almost every possible symbol as mathematical operator.
Concrete:
- The user defines operations consisting of an operator and a precedence. The operator can be any combination of symbols except for some system symbols (parentheses, commas, ...). Precedence is a positive integer number. Operations are stored in a java HashMap.
- There are three different kinds of operations: left side (unary minus, ...), right side (factorial, ...) and binary (addition, ...)
- The operations should be requested at runtime, so that operations can be (de)activated during the parse. If this is not possible, then the operators should be requested at parser creation.
- For the precedence: full dynamic precedence is preferable(at runtime the precedence of an encountered operation is requested), but if it isn't possible then there should be different precedence presets. (multiplication, addition, ...)
What I've got:
- Working code for operator recognition
- Precedence climbing code which produces a correct parse tree, but gives an error: rule expr failed predicate: (getPrecedence($op) >= $_p)?
UPDATE: fixed operator recognition code, and found code for the precedence climbing mechanism
tokens { PREOP, POSTOP, BINOP, ERROR }
#lexer::members {
private static List<String> binaryOperators;
private static List<String> prefixOperators;
private static List<String> postfixOperators;
{
binaryOperators = new ArrayList<String>();
binaryOperators.add("+");
binaryOperators.add("*");
binaryOperators.add("-");
binaryOperators.add("/");
prefixOperators = new ArrayList<String>();
prefixOperators.add("-");
postfixOperators = new ArrayList<String>();
postfixOperators.add("!");
}
private Deque<Token> deque = new LinkedList<Token>();
private Token previousToken;
private Token nextToken;
#Override
public Token nextToken() {
if (!deque.isEmpty()) {
return previousToken = deque.pollFirst();
}
Token next = super.nextToken();
if (next.getType() != SYMBOL) {
return previousToken = next;
}
StringBuilder builder = new StringBuilder();
while (next.getType() == SYMBOL) {
builder.append(next.getText());
next = super.nextToken();
}
deque.addLast(nextToken = next);
List<Token> tokens = findOperatorCombination(builder.toString(), getOperatorType());
for (int i = tokens.size() - 1; i >= 0; i--) {
deque.addFirst(tokens.get(i));
}
return deque.pollFirst();
}
private static List<Token> findOperatorCombination(String sequence, OperatorType type) {
switch (type) {
case POSTFIX:
return getPostfixCombination(sequence);
case PREFIX:
return getPrefixCombination(sequence);
case BINARY:
return getBinaryCombination(sequence);
default:
break;
}
return null;
}
private static List<Token> getPrefixCombination(String sequence) {
if (isPrefixOperator(sequence)) {
List<Token> seq = new ArrayList<Token>(1);
seq.add(0, new CommonToken(MathParser.PREOP, sequence));
return seq;
}
if (sequence.length() <= 1) {
return null;
}
for (int i = 1; i < sequence.length(); i++) {
List<Token> seq1 = getPrefixCombination(sequence.substring(0, i));
List<Token> seq2 = getPrefixCombination(sequence.substring(i, sequence.length()));
if (seq1 != null & seq2 != null) {
seq1.addAll(seq2);
return seq1;
}
}
return null;
}
private static List<Token> getPostfixCombination(String sequence) {
if (isPostfixOperator(sequence)) {
List<Token> seq = new ArrayList<Token>(1);
seq.add(0, new CommonToken(MathParser.POSTOP, sequence));
return seq;
}
if (sequence.length() <= 1) {
return null;
}
for (int i = 1; i < sequence.length(); i++) {
List<Token> seq1 = getPostfixCombination(sequence.substring(0, i));
List<Token> seq2 = getPostfixCombination(sequence.substring(i, sequence.length()));
if (seq1 != null && seq2 != null) {
seq1.addAll(seq2);
return seq1;
}
}
return null;
}
private static List<Token> getBinaryCombination(String sequence) {
for (int i = 0; i < sequence.length(); i++) { // i is number of postfix spaces
for (int j = 0; j < sequence.length() - i; j++) { // j is number of prefix spaces
String seqPost = sequence.substring(0, i);
List<Token> post = getPostfixCombination(seqPost);
String seqPre = sequence.substring(sequence.length()-j, sequence.length());
List<Token> pre = getPrefixCombination(seqPre);
String seqBin = sequence.substring(i, sequence.length()-j);
if ((post != null || seqPost.isEmpty()) &&
(pre != null || seqPre.isEmpty()) &&
isBinaryOperator(seqBin)) {
List<Token> res = new ArrayList<Token>();
if (post != null)
res.addAll(post);
res.add(new CommonToken(MathParser.BINOP, seqBin));
if (pre != null)
res.addAll(pre);
return res;
}
}
}
return null;
}
/**
* Returns the expected operator type based on the previous and next token
*/
private OperatorType getOperatorType() {
if (isValueEnd(previousToken.getType())) {
if (isValueStart(nextToken.getType())) {
return OperatorType.BINARY;
}
return OperatorType.POSTFIX;
}
return OperatorType.PREFIX;
}
private enum OperatorType { BINARY, PREFIX, POSTFIX };
/**
* Checks whether the given token is a token found at the start of value elements
* #param tokenType
* #return
*/
private static boolean isValueStart(int tokenType) {
return tokenType == MathParser.INT;
}
/**
* Checks whether the given token is a token found at the end of value elements
* #param tokenType
* #return
*/
private static boolean isValueEnd(int tokenType) {
return tokenType == MathParser.INT;
}
private static boolean isBinaryOperator(String operator) {
return binaryOperators.contains(operator);
}
private static boolean isPrefixOperator(String operator) {
return prefixOperators.contains(operator);
}
private static boolean isPostfixOperator(String operator) {
return postfixOperators.contains(operator);
}
}
Precedence climbing code:
#parser::members {
static Map<String, Integer> precedenceMap = new HashMap<String, Integer>();
static {
precedenceMap.put("*", 2);
precedenceMap.put("+", 1);
precedenceMap.put("^", 4);
precedenceMap.put("-", 3);
precedenceMap.put("!", 5);
}
public static Integer getPrecedence(Token op) {
return precedenceMap.get(op.getText());
}
public static Integer getNextPrecedence(Token op) {
Integer p = getPrecedence(op);
if (op.getType() == PREOP) return p;
else if (op.getText().equals("^")) return p;
else if (op.getType() == BINOP) return p+1;
else if (op.getType() == POSTOP) return p+1;
throw new IllegalArgumentException(op.getText());
}
}
prog
: expr[0]
;
expr [int _p]
: aexpr
( {getPrecedence(_input.LT(1)) >= $_p}? op=BINOP expr[getNextPrecedence($op)]
| {getPrecedence(_input.LT(1)) >= $_p}? POSTOP
)*
;
atom
: INT
| '(' expr[0] ')'
| op=PREOP expr[getNextPrecedence($op)]
;
So now the question is what can do about this predicate failure error

Thanks to the other contributors I have found a complete (and actually reasonably clean) solution for my problem.
Operator matching:
By looking at the tokens before and after the encountered series of symbols, it is possible to detect the fixity of the operator. After that, apply an algorithm which detects a sequence of valid operators in the symbol series. Then inject those tokens in the token stream (in nextToken() ).
Just make sure you define all hardcoded tokens before the SYMBOL definition.
Precedence climbing:
Actually this wasn't that hard, it is exactly the same as ANTLR4's internal strategy.
grammar Math;
tokens { PREOP, POSTOP, BINOP, ERROR }
#header {
import java.util.*;
}
#lexer::members {
private static List<String> binaryOperators;
private static List<String> prefixOperators;
private static List<String> postfixOperators;
{
binaryOperators = new ArrayList<String>();
binaryOperators.add("+");
binaryOperators.add("*");
binaryOperators.add("-");
binaryOperators.add("/");
System.out.println(binaryOperators);
prefixOperators = new ArrayList<String>();
prefixOperators.add("-");
System.out.println(prefixOperators);
postfixOperators = new ArrayList<String>();
postfixOperators.add("!");
System.out.println(postfixOperators);
}
private Deque<Token> deque = new LinkedList<Token>();
private Token previousToken;
private Token nextToken;
#Override
public Token nextToken() {
if (!deque.isEmpty()) {
return previousToken = deque.pollFirst();
}
Token next = super.nextToken();
if (next.getType() != SYMBOL) {
return previousToken = next;
}
StringBuilder builder = new StringBuilder();
while (next.getType() == SYMBOL) {
builder.append(next.getText());
next = super.nextToken();
}
deque.addLast(nextToken = next);
List<Token> tokens = findOperatorCombination(builder.toString(), getOperatorType());
for (int i = tokens.size() - 1; i >= 0; i--) {
deque.addFirst(tokens.get(i));
}
return deque.pollFirst();
}
private static List<Token> findOperatorCombination(String sequence, OperatorType type) {
switch (type) {
case POSTFIX:
return getPostfixCombination(sequence);
case PREFIX:
return getPrefixCombination(sequence);
case BINARY:
return getBinaryCombination(sequence);
default:
break;
}
return null;
}
private static List<Token> getPrefixCombination(String sequence) {
if (isPrefixOperator(sequence)) {
List<Token> seq = new ArrayList<Token>(1);
seq.add(0, new CommonToken(MathParser.PREOP, sequence));
return seq;
}
if (sequence.length() <= 1) {
return null;
}
for (int i = 1; i < sequence.length(); i++) {
List<Token> seq1 = getPrefixCombination(sequence.substring(0, i));
List<Token> seq2 = getPrefixCombination(sequence.substring(i, sequence.length()));
if (seq1 != null & seq2 != null) {
seq1.addAll(seq2);
return seq1;
}
}
return null;
}
private static List<Token> getPostfixCombination(String sequence) {
if (isPostfixOperator(sequence)) {
List<Token> seq = new ArrayList<Token>(1);
seq.add(0, new CommonToken(MathParser.POSTOP, sequence));
return seq;
}
if (sequence.length() <= 1) {
return null;
}
for (int i = 1; i < sequence.length(); i++) {
List<Token> seq1 = getPostfixCombination(sequence.substring(0, i));
List<Token> seq2 = getPostfixCombination(sequence.substring(i, sequence.length()));
if (seq1 != null && seq2 != null) {
seq1.addAll(seq2);
return seq1;
}
}
return null;
}
private static List<Token> getBinaryCombination(String sequence) {
for (int i = 0; i < sequence.length(); i++) { // i is number of postfix spaces
for (int j = 0; j < sequence.length() - i; j++) { // j is number of prefix spaces
String seqPost = sequence.substring(0, i);
List<Token> post = getPostfixCombination(seqPost);
String seqPre = sequence.substring(sequence.length()-j, sequence.length());
List<Token> pre = getPrefixCombination(seqPre);
String seqBin = sequence.substring(i, sequence.length()-j);
if ((post != null || seqPost.isEmpty()) &&
(pre != null || seqPre.isEmpty()) &&
isBinaryOperator(seqBin)) {
List<Token> res = new ArrayList<Token>();
if (post != null)
res.addAll(post);
res.add(new CommonToken(MathParser.BINOP, seqBin));
if (pre != null)
res.addAll(pre);
return res;
}
}
}
return null;
}
/**
* Returns the expected operator type based on the previous and next token
*/
private OperatorType getOperatorType() {
if (isAfterAtom()) {
if (isBeforeAtom()) {
return OperatorType.BINARY;
}
return OperatorType.POSTFIX;
}
return OperatorType.PREFIX;
}
private enum OperatorType { BINARY, PREFIX, POSTFIX };
/**
* Checks whether the current token is a token found at the start of atom elements
* #return
*/
private boolean isBeforeAtom() {
int tokenType = nextToken.getType();
return tokenType == MathParser.INT ||
tokenType == MathParser.PLEFT;
}
/**
* Checks whether the current token is a token found at the end of atom elements
* #return
*/
private boolean isAfterAtom() {
int tokenType = previousToken.getType();
return tokenType == MathParser.INT ||
tokenType == MathParser.PRIGHT;
}
private static boolean isBinaryOperator(String operator) {
return binaryOperators.contains(operator);
}
private static boolean isPrefixOperator(String operator) {
return prefixOperators.contains(operator);
}
private static boolean isPostfixOperator(String operator) {
return postfixOperators.contains(operator);
}
}
#parser::members {
static Map<String, Integer> precedenceMap = new HashMap<String, Integer>();
static {
precedenceMap.put("*", 2);
precedenceMap.put("+", 1);
precedenceMap.put("^", 4);
precedenceMap.put("-", 3);
precedenceMap.put("!", 5);
}
public static Integer getPrecedence(Token op) {
return precedenceMap.get(op.getText());
}
public static Integer getNextPrecedence(Token op) {
Integer p = getPrecedence(op);
if (op.getType() == PREOP) return p;
else if (op.getText().equals("^")) return p;
else if (op.getType() == BINOP) return p+1;
throw new IllegalArgumentException(op.getText());
}
}
prog
: expr[0]
;
expr [int _p]
: atom
( {getPrecedence(_input.LT(1)) >= $_p}? op=BINOP expr[getNextPrecedence($op)]
| {getPrecedence(_input.LT(1)) >= $_p}? POSTOP
)*
;
atom
: INT
| PLEFT expr[0] PRIGHT
| op=PREOP expr[getNextPrecedence($op)]
;
INT
: ( '0'..'9' )+
;
PLEFT : '(' ;
PRIGHT : ')' ;
WS
: [ \t\r\n]+ -> skip ; // skip spaces, tabs, newlines
SYMBOL
: .
;
Note: code is meant as an example, not as my real code (operators and precedence will be requested externally)

You can't define precedence/associativity rules for Antlr at runtime. What you can, however, is parse all of the operators (built-in in the language or user-defined) as a single chained list (like ArrayList<>) in the parse, then apply your own algorithm for precedence and associativity in a visitor (or in grammar actions, if you really want to).
The algorithm itself isn't that hard, if you iterate the list many times. For example, you can first fetch the precedence of each operator in the list, then check the one with highest precedence, see if its right or left-associative, and from there you've built your first (bottom-most) tree node. Keep applying until the list is empty, and you've built your own "parse tree", but without the parsing (you're not working with abstract-input strings anymore).
Alternatively, at runtime make externals calls for Antlr to compile the .g4 and to javac to compile the generated Antlr code, then use reflection to call it. However, it is probably much slower and arguably harder to pull off.

A parser rule that will work 'correctly' according to some runtime definition of Symbol precedence is possible. While not initially appearing to be an idiomatic choice, the standard alternative of deferring semantic analysis out of the parser would produce a very poorly differentiated parse tree -- making this a reasonable exception to the standard design rule.
In (overly simplified) form, the parser rule would be:
expr : LParen expr RParen # group
| expr Symbol expr # binary
| expr Symbol # postfix
| Symbol expr # prefix
| Int+ # value
;
To cure the ambiguity, add inline predicates:
expr : LParen expr RParen # group
| expr s=Symbol { binary($s) }? expr # binary
| expr s=Symbol { postfix($s) }? # postfix
| s=Symbol { prefix($s) }? expr # prefix
| Int+ # value
;
For any given Symbol, a single predicate method should evaluate as true.
Extending to multiple Symbol strings will add a bit of complexity (ex, differentiating a binary from a postfix followed by a prefix) but the mechanics remain largely the same.

I think your approach is the right way. I propose following grammar:
grammar Op;
options {
superClass=PrecedenceParser;
}
prog : expr[0] ;
expr[int _p] locals[Token op]: INT ({$op = _input.LT(1);} {getPrecedence($op) >= $_p}? OP expr[getPrecedence($op)])*;
INT : ( '0'..'9' )+ ;
OP : '+' | '*'; // all allowed symbols, should be extended
WS : [ \t\r\n]+ -> skip ; // skip spaces, tabs, newlines
The rule for op should contain all allowed operator symbols. My restriction to + and * is only for simplicity. The parser super class would be:
public abstract class PrecedenceParser extends Parser {
private Map<String, Integer> precedences;
public PrecedenceParser(TokenStream input) {
super(input);
this.precedences = new HashMap<>();
}
public PrecedenceParser putOperator(String op, int p) {
precedences.put(op, p);
return this;
}
public int getPrecedence(Token operator) {
Integer p = precedences.get(operator.getText());
if (p == null) {
return Integer.MAX_VALUE;
} else {
return p;
}
}
}
Results
with precedences {+ : 4, * : 3 }
(prog (expr 1 + (expr 2) * (expr 3 + (expr 4))))
with precedences {+ : 3, * : 4 }
(prog (expr 1 + (expr 2 * (expr 3) + (expr 4))))
Evaluating these sequences from left to right is equivalent to evaluating them with precedence.
This approach should work for larger sets of operators. ANTLR4 uses this approach internally for precedence climbing yet ANTLR uses constants instead of the precedences map (because it assumes that precedence is determined at parser build time).

Related

Parse Boolean Expression in Java

Given an arbitrary number of TextField inputs (t1, t2, t3, ...) and a custom boolean string input from a JtextArea, I need to check if lines in a file match the custom boolean expression. It needs to support nested parentheses.
Example:
User enters "str1" into t1 , "str2" into t2, "str3" into t3, "str4" into t4, "str5" into t5.
User enters the following into the JTextArea for the custom boolean:
"not ((t1 and not t3) or (t4 and t2)) or t5"
Then based on these inputs, I must filter a file and return lines in the file that match the custom boolean based on a "contains" relationship (e.g. "t1 and not t3" means a line must contain the string t1 and not contain the string t3).
For example a file with the following two lines:
str 5
str4 str2
The filter would only return str5 because it is the only line that matches the custom boolean.
I am having trouble even getting started. I have tried to think of a recursive solution but couldn't come up with anything. Also I tried non-recursive solutions but can't come up with anything either.
There is also the problem of the end result boolean needing to take in a parameter (each line in the file). I thought of maybe producing a sequence of operations to perform rather than a boolean that somehow takes in a parameter. But I can't figure out how to get this sequence in the first place.
Here is what I have now. It is very bad and I am thinking of scrapping this approach.
public class CustomInputParser {
private ArrayList<String> pairs;
private String inp;
private HashMap<Integer,String> atomMap;
public CustomInputParser() {
this.pairs = null;
this.inp = "";
this.atomMap = new HashMap<Integer,String>();
}
public void findAtoms() {
int i = 0;
for(String s : this.pairs) {
String[] indices = s.split(",");
int begin = Integer.valueOf(indices[0]);
int end = Integer.valueOf(indices[1]);
if(!inp.substring(begin+1, end).contains("(")) {
this.pairs.set(i, this.pairs.get(i) + ",#");
}
i++;
}
}
public void computeAtoms() {
int i = 0;
for(String s : this.pairs) {
if(s.contains(",#")) {
String[] indices = s.split(",");
int begin = Integer.valueOf(indices[0]);
int end = Integer.valueOf(indices[1]);
//this.pairs.set(i,this.pairs.get(i).replace(",a", ""));
this.pairs.set(i, this.pairs.get(i) + ","+inp.substring(begin+1, end));
this.atomMap.put(begin,this.pairs.get(i).split(",")[3]+"#"+String.valueOf(end));
}
i++;
}
System.out.println(this.pairs.toString());
System.out.println(this.atomMap.toString());
}
public void replaceAtoms() {
int i = 0;
for(String s : this.pairs) {
if(!(s.contains("o") || s.contains("a") || s.contains("n"))) {
String[] indices = s.split(",");
int begin = Integer.valueOf(indices[0])+1;
int end = Integer.valueOf(indices[1]);
for(int j = begin; j < end; j++) {
if(inp.charAt(j) == '(') {
if(atomMap.containsKey(j)) {
this.pairs.set(i, this.pairs.get(i) + ","+j+"#"+atomMap.get(j).split("#")[1]+">"+atomMap.get(j).split("#")[0]);
}
else {
this.pairs.set(i,"!"+ this.pairs.get(i));
}
}
}
}
i++;
}
System.out.println(this.pairs.toString());
}
public ArrayList<String> getPairs(String str){
this.inp = str;
ArrayList<String> res = new ArrayList<String>();
char[] arr = str.toCharArray();
Stack<Integer> s = new Stack<Integer>();
for(int i = 0; i < arr.length; i++) {
if(arr[i] == '(') {
s.push(i);
}
if(arr[i] == ')') {
if(s.empty()) {
return null;
}
else {
Integer start = s.pop();
Integer end = Integer.valueOf(i);
res.add(start.toString() + "," + end.toString());
}
}
}
if(!s.empty()) {
return null;
}
this.pairs = res;
return res;
}
public static void main(String[] args) {
String x = "((not t1 and ((not t2 or t4) or (t3 or t4))) or (t5 and not t6)) and t7";
x = x.replace("not", "n").replace("and","a").replace("or", "o").replace("t", "").replace(" ", "");
System.out.println(x);
CustomInputParser c = new CustomInputParser();
System.out.println(c.getPairs(x).toString());
c.findAtoms();
c.computeAtoms();
c.replaceAtoms();
}
}

The first step is to tokenize the input. Define
enum Token {VAR, LP, RP, NOT, AND, OR, END}
LP and RP are parentheses. Now define a tokenizer class that looks something like this:
class Tokenizer {
Tokenizer(String input) {...}
void reset() {...}
Token getNext() {...}
String getVarName() {...}
}
Calling getNext() on your example in a loop should return
LP LP NOT VAR AND LP LP NOT VAR OR VAR RP OR LP VAR OR VAR RP RP RP OR LP VAR AND NOT VAR RP RP AND VAR END
Calling getVarName() immediately after a VAR has been returned by getNext() gives you the name of the variable (e.g. "t42").
There are many ways to implement little scanners like this. You should do this first and make sure it's bulletproof by testing. Trying to build a parser on top of a flaky scanner is torture.
As I said in comments, I'd consider recursive descent parsing. If you have a suitable grammar, writing an RD parser is a very short step as the Dragon Book (also mentioned above) shows.
A reasonable grammar (using tokens as above) is
Expr -> Term AND Term
| Term OR Term
| Term END
Term -> NOT Term
| Opnd
Opnd -> VAR
| LP Expr RP
For example, here is how you'd get started. It shows the first rule converted to a function:
class Evaluator {
final Tokenizer tokenizer = ...; // Contains the expression text.
final Map<String, Boolean> env = ... // Environment: variables to values.
Token lookAhead; // Holds the token we're parsing right now.
Evaluator(Tokenizer tokenizer, Map<String, Boolean> env) { ... }
void advance() {
lookAhead = tokenizer.getNext();
}
boolean expr() {
boolean leftHandSide = term(); // Parse the left hand side recursively.
Token op = lookAhead; // Remember the operator.
if (op == Token.END) return leftHandSide; // Oops. That's all.
advance(); // Skip past the operator.
boolean rightHandSide = term(); // Parse the right hand side recursively.
if (op == Token.AND) return leftHandSide && rightHandSide; // Evaluate!
if (op == Token.OR) return leftHandSide || rightHandSide;
dieWithSyntaxError("Expected op, found " + op);
}
boolean term() {...}
boolean opnd() {...}
}
The environment is used when a VAR is parsed. Its boolean value is env.get(tokenizer.getVarName()).
So to process the file, you'll
For each line
For each variable tX in the expression
See if the line contains the string tX is bound to in its text field.
If so, put the mapping tX -> true in the environment
else put tX -> false
Reset the tokenizer
Call Evaluator.evaluate(tokenizer, environment)
If it returns true, print the line, else skip it.
This is the simplest approach I can think of. About 150 lines. Many optimizations are possible.
Added
Well since I can no longer take away the thrill of discovery, here is my version:
import static java.lang.Character.isDigit;
import static java.lang.Character.isWhitespace;
import java.util.HashMap;
import java.util.Map;
import static java.util.stream.Collectors.toMap;
public class TextExpressionSearch {
enum Token { VAR, LP, RP, NOT, AND, OR, END }
static class Tokenizer {
final String input;
int pos = 0;
String var;
Tokenizer(String input) {
this.input = input;
}
void reset() {
pos = 0;
var = null;
}
String getRead() {
return input.substring(0, pos);
}
Token getNext() {
var = null;
while (pos < input.length() && isWhitespace(input.charAt(pos))) {
++pos;
}
if (pos >= input.length()) {
return Token.END;
}
int start = pos++;
switch (input.charAt(start)) {
case 't':
while (pos < input.length() && isDigit(input.charAt(pos))) {
++pos;
}
var = input.substring(start, pos);
return Token.VAR;
case '(':
return Token.LP;
case ')':
return Token.RP;
case 'n':
if (input.startsWith("ot", pos)) {
pos += 2;
return Token.NOT;
}
break;
case 'a':
if (input.startsWith("nd", pos)) {
pos += 2;
return Token.AND;
}
break;
case 'o':
if (input.startsWith("r", pos)) {
pos += 1;
return Token.OR;
}
break;
}
throw new AssertionError("Can't tokenize: " + input.substring(start));
}
}
static class Evaluator {
final Tokenizer tokenizer;
final Map<String, Boolean> env;
Token lookAhead;
Evaluator(Tokenizer tokenizer, Map<String, Boolean> env) {
this.tokenizer = tokenizer;
this.env = env;
advance();
}
boolean die(String msg) {
throw new AssertionError(msg + "\nRead: " + tokenizer.getRead());
}
void advance() {
lookAhead = tokenizer.getNext();
}
void match(Token token) {
if (lookAhead != token) {
die("Expected " + token + ", found " + lookAhead);
}
advance();
}
boolean evaluate() {
boolean exprVal = expr();
match(Token.END);
return exprVal;
}
boolean expr() {
boolean lhs = negated();
switch (lookAhead) {
case AND:
advance();
return negated() && lhs;
case OR:
advance();
return negated() || lhs;
case END:
return lhs;
}
return die("Expected expr, found " + lookAhead);
}
boolean negated() {
switch (lookAhead) {
case NOT:
advance();
return !negated();
default:
return operand();
}
}
boolean operand() {
switch (lookAhead) {
case VAR:
if (!env.containsKey(tokenizer.var)) {
die("Undefined variable: " + tokenizer.var);
}
boolean varVal = env.get(tokenizer.var);
advance();
return varVal;
case LP:
advance();
boolean exprVal = expr();
match(Token.RP);
return exprVal;
}
return die("Expected operand, found " + lookAhead);
}
}
public static void main(String [] args) {
String expr = "((not t1 and ((not t2 or t4) or (t3 or t4))) or (t5 and not t6)) and t7";
Map<String, String> bindings = new HashMap<>();
bindings.put("t1", "str1");
bindings.put("t2", "str2");
bindings.put("t3", "str3");
bindings.put("t4", "str4");
bindings.put("t5", "str5");
bindings.put("t6", "str6");
bindings.put("t7", "str7");
String [] lines = {"str5 str7", "str4 str2"};
Tokenizer tokenizer = new Tokenizer(expr);
for (String line : lines) {
Map<String, Boolean> env =
bindings.entrySet().stream()
.collect(toMap(e -> e.getKey(), e -> line.contains(e.getValue())));
tokenizer.reset();
if (new Evaluator(tokenizer, env).evaluate()) {
System.out.println(line);
}
}
}
}

You can define a parser that returns a Predicate<String> that tests if a given string satisfies a conditional expression.
static Predicate<String> parse(String s, Map<String, String> map) {
return new Object() {
String[] tokens = Pattern.compile("[()]|[a-z][a-z0-9]*")
.matcher(s).results()
.map(MatchResult::group)
.toArray(String[]::new);
int length = tokens.length;
int index = 0;
String token = get();
String get() {
return token = index < length ? tokens[index++] : null;
}
boolean eat(String expect) {
if (expect.equals(token)) {
get();
return true;
}
return false;
}
Predicate<String> identifier() {
String id = token;
return s -> {
String value = map.get(id);
if (value == null)
throw new RuntimeException(
"identifier '" + id + "' undefined");
return s.contains(value);
};
}
Predicate<String> factor() {
boolean not = false;
Predicate<String> p;
if (eat("not"))
not = true;
switch (token) {
case "(":
get();
p = expression();
if (!eat(")"))
throw new RuntimeException("')' expected");
break;
case ")": case "not": case "and": case "or":
throw new RuntimeException("syntax error at '" + token + "'");
default:
p = identifier();
get();
break;
}
if (not)
p = p.negate();
return p;
}
Predicate<String> term() {
Predicate<String> p = factor();
while (eat("and"))
p = p.and(factor());
return p;
}
Predicate<String> expression() {
Predicate<String> p = term();
while (eat("or"))
p = p.or(term());
return p;
}
Predicate<String> parse() {
Predicate<String> p = expression();
if (token != null)
throw new RuntimeException("extra tokens string");
return p;
}
}.parse();
}
test case:
#Test
public void testParse() {
String s = "not ((t1 and not t3) or (t4 and t2)) or t5";
Map<String, String> map = new HashMap<>(Map.of(
"t1", "str1",
"t2", "str2",
"t3", "str3",
"t4", "str4",
"t5", "str5"));
Predicate<String> p = parse(s, map);
assertTrue(p.test("str5"));
assertTrue(p.test("str3"));
assertTrue(p.test("str1 str3"));
assertFalse(p.test("str1"));
assertFalse(p.test("str2 str4"));
// you can change value of variables.
assertFalse(p.test("str1 FOO"));
map.put("t5", "FOO");
assertTrue(p.test("str1 FOO"));
}
syntax:
expression = term { "or" term }
term = factor { "and" factor }
factor = [ "not" ] ( "(" expression ")" | identifier )
identifier = letter { letter | digit }
letter = "a" | "b" | ... | "z"
digit = "0" | "1" | ... | "9"

For posterity, here is my shunting yard solution which includes input validation:
public class CustomInputParser {
private Stack<Character> ops;
private LinkedList<Character> postFix;
private HashMap<Character, Integer> precedence;
private Stack<Boolean> eval;
private HashMap<Integer, String> termsMap;
private String customBool;
public CustomInputParser(HashMap<Integer, String> tMap, String custBool) {
this.ops = new Stack<Character>();
this.eval = new Stack<Boolean>();
this.postFix = new LinkedList<Character>();
this.termsMap = tMap;
this.customBool = custBool;
this.precedence = new HashMap<Character, Integer>();
precedence.put('n', 1);
precedence.put('a', 2);
precedence.put('o',3);
precedence.put('(', 4);
}
private int inToPost() {
char[] expr = convertToArr(this.customBool);
char c;
for(int i = 0; i < expr.length; i++) {
c = expr[i];
if(isOp(c)) {
if(processOp(c) != 0) return -1;
}
else {
if(!Character.isDigit(c)) {
return -1;
}
//I made the mistake of using a queue of Characters for postfix initially
//This only worked for up to 9 operands (multi digit would add mutiple chars to
// postfix for a single reference.
//This loops is a lazy workaround:
// 1. get the string of the reference (e.g. "11")
// 2. convert it to int
// 3. store the char value of the int in postfix
// 4. when evaluating operands in postfix eval, convert char back to int to get the termsMap key
String num = "";
while(i < expr.length) {
if(!Character.isDigit(expr[i])) {
i--;
break;
}
c = expr[i];
num += c;
i++;
}
int j = Integer.valueOf(num);
c = (char) j;
postFix.offer(c); //enqueue
}
}
while(!ops.empty()) {
if(ops.peek() == '(')return -1; //no matching close paren for the open paren
postFix.offer(ops.pop()); //pop and enqueue all remaining ops from stack
}
return 0;
}
private boolean isOp(char c) {
if(c == '(' || c == ')' || c =='n' || c=='a' || c=='o') {
return true;
}
return false;
}
private int processOp(char c) {
if (ops.empty() || c == '(') {
ops.push(c);
}
else if(c == ')') {
while(ops.peek() != '(') {
postFix.offer(ops.pop()); //pop and equeue ops wrapped in parens
if(ops.empty()) return -1; //no matching open paren for the close paren
}
ops.pop(); // don't enqueue open paren, just remove it from stack
}
else if(precedence.get(c) > precedence.get(ops.peek())) {
postFix.offer(ops.pop()); //pop and enqueue the higher precedence op
ops.push(c);
}
else {
ops.push(c);
}
return 0;
}
public boolean evaluate(String s) {
while(!postFix.isEmpty()) {
char c = postFix.poll();
boolean op1, op2;
switch(c) {
case 'n':
op1 = eval.pop();
eval.push(!op1);
break;
case 'a':
op1 = eval.pop();
op2 = eval.pop();
eval.push(op1 && op2);
break;
case 'o':
op1 = eval.pop();
op2 = eval.pop();
eval.push(op1 || op2);
break;
default:
int termKey = (int) c;
String term = this.termsMap.get(termKey);
eval.push(s.contains(String.valueOf(term)));
break;
}
}
return eval.pop();
}
private char[] convertToArr(String x) {
x = x.replace("not", "n").replace("and","a").replace("or", "o").replace("t", "").replace(" ", "");
return x.toCharArray();
}
public static void main(String[] args) {
String customBool = "(t1 and not (t2 and t3)) or (t4 and not t5)";
HashMap<Integer,String> termsMap = new HashMap<Integer, String>();
termsMap.put(1,"str1");
termsMap.put(2,"str2");
termsMap.put(3,"str3");
termsMap.put(4,"str4");
termsMap.put(5,"str5");
CustomInputParser c = new CustomInputParser(termsMap, customBool);
if(c.inToPost() != 0) {
System.out.println("invalid custom boolean");
}
else {
System.out.println(c.evaluate("str1str5"));
}
}
}

Java Evaluating RPN using Shunting Yard Algorithm

I have the shunting yard algorithm that I found online:
import java.util.HashMap;
import java.util.Map;
import java.util.Stack;
public class ShuntingYardAlgorithm {
private enum Operator {
ADD(1), SUBTRACT(2), MULTIPLY(3), DIVIDE(4);
final int precedence;
Operator(int p) {
precedence = p;
}
}
private Map<String, Operator> operatorMap = new HashMap<String, Operator>() {/**
*
*/
private static final long serialVersionUID = 1L;
{
put("+", Operator.ADD);
put("-", Operator.SUBTRACT);
put("*", Operator.MULTIPLY);
put("/", Operator.DIVIDE);
}};
private boolean isHigherPrec(String op, String sub) {
return (operatorMap.containsKey(sub) &&
operatorMap.get(sub).precedence >= operatorMap.get(op).precedence);
}
public String shuntingYard(String infix) {
StringBuilder output = new StringBuilder();
Stack<String> stack = new Stack<String>();
for (String token : infix.split("")) {
//operator
if (operatorMap.containsKey(token)) {
while ( ! stack.isEmpty() && isHigherPrec(token, stack.peek())) {
output.append(stack.pop()).append(' ');
}
stack.push(token);
}
//left parenthesis
else if (token.equals("(")) {
stack.push(token);
}
//right parenthesis
else if (token.equals(")")) {
while ( ! stack.peek().equals("(")) {
output.append(stack.pop()).append(' ');
}
stack.pop();
}
//digit
else {
output.append(token).append(' ');
}
}
while ( ! stack.isEmpty()) {
output.append(stack.pop()).append(' ');
}
return output.toString();
}
}
And the evaluator:
private static int evalRPN(String[] tokens) {
int returnValue = 0;
String operators = "+-*/";
Stack<String> stack = new Stack<String>();
for (String t : tokens) {
if (!operators.contains(t)) {
stack.push(t);
} else {
int a = Integer.valueOf(stack.pop());
int b = Integer.valueOf(stack.pop());
switch (t) {
case "+":
stack.push(String.valueOf(a + b));
break;
case "-":
stack.push(String.valueOf(b - a));
break;
case "*":
stack.push(String.valueOf(a * b));
break;
case "/":
stack.push(String.valueOf(b / a));
break;
}
}
}
returnValue = Integer.valueOf(stack.pop());
return returnValue;
}
And they work good so far but I have a problem with the evaluation where the delimiter is split by "", which does not allow two digit numbers, such as 23, or above. What can you suggest that I can do to improve the evaluation method?
String output = new ShuntingYardAlgorithm().shuntingYard(algExp);
algExp = output.replaceAll(" ", "");
String[] outputArray = algExp.split("");
return evalRPN(outputArray);
Such as I input: 256+3
result: 2 5 6 3 +
Evaluation: 6 + 3 = 9, ignores 2 and 5

Your shuntingYard function is discarding the contents of output when an operator or a parenthesis is encountered.
You need to add checks for contents of output before processing the current character.
if (operatorMap.containsKey(token)) {
// TODO: Check output here first, and create a token as necessary
while ( ! stack.isEmpty() && isHigherPrec(token, stack.peek())) {
output.append(stack.pop()).append(' ');
}
stack.push(token);
}
//left parenthesis
else if (token.equals("(")) {
// TODO: Check output here first, and create a token as necessary
stack.push(token);
}
//right parenthesis
else if (token.equals(")")) {
// TODO: Check output here first, and create a token as necessary
while ( ! stack.peek().equals("(")) {
output.append(stack.pop()).append(' ');
}
stack.pop();
}
Also, splitting using the empty string is equivalent to just iterating the String one character at a time. Iterating infix using toCharArray() might be more readable
for (char c : infix.toCharArray())

Having trouble checking to see if a string is balanced or not

package edu.bsu.cs121.mamurphy;
import java.util.Stack;
public class Checker {
char openPara = '(';
char openBracket = '[';
char openCurly = '{';
char openArrow = '<';
char closePara = ')';
char closeBracket = ']';
char closeCurly = '}';
char closeArrow = '>';
public boolean checkString(String stringToCheck) {
Stack<Character> stack = new Stack<Character>();
for (int i = 0; i < stringToCheck.length(); i++) {
char c = stringToCheck.charAt(i);
if (c == openPara || c == openBracket || c == openCurly || c == openArrow) {
stack.push(c);
System.out.println(stack);
;
}
if (c == closePara) {
if (stack.isEmpty()) {
System.out.println("Unbalanced");
break;
} else if (stack.peek() == openPara) {
stack.pop();
} else if (stack.size() > 0) {
System.out.println("Unbalanced");
break;
}
}
if (c == closeBracket) {
if (stack.isEmpty()) {
System.out.println("Unbalanced");
break;
} else if (stack.peek() == openBracket) {
stack.pop();
} else if (stack.size() > 0) {
System.out.println("Unbalanced");
break;
}
}
if (c == closeCurly) {
if (stack.isEmpty()) {
System.out.println("Unbalanced");
break;
} else if (stack.peek() == openCurly) {
stack.pop();
} else if (stack.size() > 0) {
System.out.println("Unbalanced");
break;
}
}
if (c == closeArrow) {
if (stack.isEmpty()) {
System.out.println("Unbalanced");
break;
} else if (stack.peek() == openArrow) {
stack.pop();
} else if (stack.size() > 0) {
System.out.println("Unbalanced");
break;
}
}
}
return false;
}
}
I am currently trying to create a program where I check to see if a string is balanced or not. A string is balanced if and only if each opening character: (, {, [, and < have a matching closing character: ), }, ], and > respectively.
What happens is when checking through the string, if an opening character is found, it is pushed into a stack, and it checks to see if there is the appropriate closing character.
If there is a closing character before the opening character, then that automatically means that the string is unbalanced. Also, the string is automatically unbalanced if after going to the next character there is something still inside of the stack.
I tried to use
else if (stack.size() > 0) {
System.out.println("Unbalanced");
break;
}
as a way of seeing if the stack still had anything in it, but it still isn't working for me. Any advice on what to do?
For example, if the string input were ()<>{() then the program should run through like normal until it gets to the single { and then the code should realize that the string is unbalanced and output Unbalanced.
For whatever reason, my code does not do this.

The following logic is flawed (emphasis mine):
For example, if the string input were ()<>{() then the program should run through like normal until it gets to the single { and then the code should realize that the string is unbalanced and output Unbalanced.
In fact, the code can't conclude that the string is unbalanced until it has scanned the entire string and established that the { has no matching }. For all it knows, the full input could be ()<>{()} and be balanced.
To achieve this, you need to add a check that ensures that the stack is empty after the entire string has been processes. In your example, it would still contain the {, indicating that the input is not balanced.

I took a shot at answering this. My solutions returns true if the string is balanced and enforces opening/closing order (ie ({)} returns false). I started with your code and tried to slim it down.
import java.util.HashMap;
import java.util.Map;
import java.util.Stack;
public class mamurphy {
private static final char openPara = '(';
private static final char openBracket = '[';
private static final char openCurly = '{';
private static final char openArrow = '<';
private static final char closePara = ')';
private static final char closeBracket = ']';
private static final char closeCurly = '}';
private static final char closeArrow = '>';
public static void main(String... args) {
System.out.println(checkString("{}[]()90<>"));//true
System.out.println(checkString("(((((())))"));//false
System.out.println(checkString("((())))"));//false
System.out.println(checkString(">"));//false
System.out.println(checkString("["));//false
System.out.println(checkString("{[(<>)]}"));//true
System.out.println(checkString("{[(<>)}]"));//false
System.out.println(checkString("( a(b) (c) (d(e(f)g)h) I (j<k>l)m)"));//true
}
public static boolean checkString(String stringToCheck) {
final Map<Character, Character> closeToOpenMap = new HashMap<>();
closeToOpenMap.put(closePara, openPara);
closeToOpenMap.put(closeBracket, openBracket);
closeToOpenMap.put(closeCurly, openCurly);
closeToOpenMap.put(closeArrow, openArrow);
Stack<Character> stack = new Stack<>();
final char[] stringAsChars = stringToCheck.toCharArray();
for (int i = 0; i < stringAsChars.length; i++) {
final char current = stringAsChars[i];
if (closeToOpenMap.values().contains(current)) {
stack.push(current); //found an opening char, push it!
} else if (closeToOpenMap.containsKey(current)) {
if (stack.isEmpty() || closeToOpenMap.get(current) != stack.pop()) {
return false;//found closing char without correct opening char on top of stack
}
}
}
if (!stack.isEmpty()) {
return false;//still have opening chars after consuming whole string
}
return true;
}
}

Here's an alternate approach:
private static final char[] openParens = "[({<".toCharArray();
private static final char[] closeParens = "])}>".toCharArray();
public static boolean isBalanced(String expression){
Deque<Character> stack = new ArrayDeque<>();
for (char c : expression.toCharArray()){
for (int i = 0; i < openParens.length; i++){
if (openParens[i] == c){
// This is an open - put it in the stack
stack.push(c);
break;
}
if (closeParens[i] == c){
// This is a close - check the open is at the top of the stack
if (stack.poll() != openParens[i]){
return false;
}
break;
}
}
}
return stack.isEmpty();
}
It simplifies the logic to have two corresponding arrays of open and close symbols. You could also do this with even and odd positions in one array - ie. "{}<>", for example:
private static final char[] symbols = "[](){}<>".toCharArray();
public static boolean isBalanced(String expression){
Deque<Character> stack = new ArrayDeque<>();
for (char c : expression.toCharArray()){
for (int i = 0; i < symbols.length; i += 2){
if (symbols[i] == c){
// This is an open - put it in the stack
stack.push(c);
break;
}
if (symbols[i + 1] == c){
// This is a close - check the open is at the top of the stack
if (stack.poll() != symbols[i]){
return false;
}
break;
}
}
}
return stack.isEmpty();
}
Note that poll returns null if the stack is empty, so will correctly fail the equality comparison if we run out of stack.

For example, if the string input were ()<>{() then the program should run through like normal until it gets to the single { and then the code should realize that the string is unbalanced and output Unbalanced.
It is not clear by your example whether the boundaries can be nested like ([{}]). If they can, that logic will not work, as the whole string has to be consumed to be sure any missing closing-chars aren't at the end, and so, the string cannot be reliably deemed unbalanced at the point you indicate.
Here is my take on your problem:
BalanceChecker class:
package so_q33378870;
import java.util.Stack;
public class BalanceChecker {
private final char[] opChars = "([{<".toCharArray();
private final char[] edChars = ")]}>".toCharArray();
//<editor-fold defaultstate="collapsed" desc="support functions">
public boolean isOPChar(char c) {
for (char checkChar : opChars) {
if (c == checkChar) {
return true;
}
}
return false;
}
public boolean isEDChar(char c) {
for (char checkChar : edChars) {
if (c == checkChar) {
return true;
}
}
return false;
}
//NOTE: Unused.
// public boolean isBoundaryChar(char c) {
// boolean result;
// if (result = isOPChar(c) == false) {
// return isEDChar(c);
// } else {
// return result;
// }
// }
public char getOpCharFor(char c) {
for (int i = 0; i < edChars.length; i++) {
if (c == edChars[i]) {
return opChars[i];
}
}
throw new IllegalArgumentException("The character (" + c + ") received is not recognized as a closing boundary character.");
}
//</editor-fold>
public boolean isBalanced(char[] charsToCheck) {
Stack<Character> checkStack = new Stack<>();
for (int i = 0; i < charsToCheck.length; i++) {
if (isOPChar(charsToCheck[i])) {
//beginning char found. Add to top of stack.
checkStack.push(charsToCheck[i]);
} else if (isEDChar(charsToCheck[i])) {
if (checkStack.isEmpty()) {
//ending char found without beginning chars on the stack. UNBALANCED.
return false;
} else if (getOpCharFor(charsToCheck[i]) == checkStack.peek()) {
//ending char found matches last beginning char on the stack. Pop and continue.
checkStack.pop();
} else {
//ending char found, but doesn't match last beginning char on the stack. UNBALANCED.
return false;
}
}
}
//the string is balanced if and only if the stack is empty at the end.
return checkStack.empty();
}
public boolean isBalanced(String stringToCheck) {
return isBalanced(stringToCheck.toCharArray());
}
}
Main class (used for testing):
package so_q33378870;
public class main {
private static final String[] tests = {
//Single - Balanced.
"()",
//Single - Unbalanced by missing end.
"(_",
//Multiple - Balanced.
"()[]{}<>",
//Multiple - Unbalanced by missing beginning.
"()[]_}<>",
//Nested - Balanced.
"([{<>}])",
//Nested - Unbalanced by missing end.
"([{<>}_)",
//Endurance test - Balanced.
"the_beginning (abcd) divider (a[bc]d) divider (a[b{c}d]e) divider (a[b{c<d>e}f]g) the_end"
};
/**
* #param args the command line arguments
*/
public static void main(String[] args) {
BalanceChecker checker = new BalanceChecker();
for (String s : tests) {
System.out.println("\"" + s + "\" is " + ((checker.isBalanced(s)) ? "BALANCED!" : "UNBALANCED!"));
}
}
}

Parenthesis Check within a Linked stack for infix to postfix

I have four classes.
One contains my linkedstack setup
One is infixtopostfix for prioritization and conversion
Parenthesis for matching
Postfix for evaluation
I have setup almost everything here but it is still returning false anyway I put it.
On another note my equals on !stackMatch.pop().equals(c) is not working due to it being a object type with '!' being a problem.
My programs are simple and straight forward:
LinkedStack.java
public class LinkedStack implements StackInterface {
private Node top;
public LinkedStack() {
top = null;
} // end default constructor
public boolean isEmpty() {
return top == null;
} // end isEmpty
public void push(Object newItem) {
Node n = new Node();
n.setData(newItem);
n.setNext(top);
top = n;
} // end push
public Object pop() throws Exception {
if (!isEmpty()) {
Node temp = top;
top = top.getNext();
return temp.getData();
} else {
throw new Exception("StackException on pop: stack empty");
} // end if
} // end pop
public Object peek() throws Exception {
if (!isEmpty()) {
return top.getData();
} else {
throw new Exception("StackException on peek: stack empty");
} // end if
} // end peek
} // end LinkedStack
InfixToPostfix.java
import java.util.*;
public class InfixToPostfix {
Parenthesis p = new Parenthesis();
LinkedStack stack = new LinkedStack();
String token = ""; // each token of the string
String output = ""; // the string holding the postfix expression
Character topOfStackObject = null; // the top object of the stack, converted to a Character Object
char charValueOfTopOfStack = ' '; // the primitive value of the Character object
/**
* Convert an infix expression to postfix. If the expression is invalid, throws an exception.
* #param s the infix expression
* #return the postfix expression as a string
* hint: StringTokenizer is very useful to this iteratively
*/
//public String convertToPostfix(String s) throws Exception {
//}
private boolean isOperand (char c){
return ((c>= '0' && c <= '9') || (c >= 'a' && c<= 'z'));
}
public void precedence(char curOp, int val) throws Exception {
while (!stack.isEmpty()) {
char topOp = (Character) stack.pop();
// charValueOfTopOfStack = topOfStackObject.charValue();
if (topOp == '(') {
stack.push(topOp);
break;
}// it's an operator
else {// precedence of new op
int prec2;
if (topOp == '+' || topOp == '-') {
prec2 = 1;
} else {
prec2 = 2;
}
if (prec2 < val) // if prec of new op less
{ // than prec of old
stack.push(topOp); // save newly-popped op
break;
} else // prec of new not less
{
output = output + topOp; // than prec of old
}
}
}
}
Parenthesis.java
import java.util.*;
public class Parenthesis{
private LinkedStack stack = new LinkedStack();
private Object openBrace;
private String outputString;
/**
* Determine if the expression has matching parenthesis using a stack
*
* #param expr the expression to be evaluated
* #return returns true if the expression has matching parenthesis
*/
public boolean match(String expr) {
LinkedStack stackMatch = new LinkedStack();
for(int i=0; i < expr.length(); i++) {
char c = expr.charAt(i);
if(c == '(')
stackMatch.push(c);
else if(c == ')'){
if (stackMatch.isEmpty() || !stackMatch.pop().equals(c))
return false;
}
}
return stackMatch.isEmpty();
}
}
Just wanted to give you all of it so you could help me. I have tests written already just struggling with the parenthesis problem of pushing it on the stack but unable to compare it to the closing parenthesis so it can check if there is enough while checking to be sure it is not empty.

The problem probably is, that you are trying to test if matching ( is currently on top of the stack when ) comes, but in c is acctual character, ), so you test if ) is on top of stack, not ( as you should.

Java Expression Parser & Calculator Shunting Yard Algorithm

So the task is to create our own parser for a expression calculator. For Example:
Input: 3+2*1-6/3
Output: 3
Input: 3++2
Output: Invalid Expression
Input: -5+2
Output: -3
Input: 5--2
Output: 7
The code here solves a part of the problem except that it has a fixed input and negative values cannot be solved, And I'm not quite sure yet if it really does solve the expression with operator precedence.
but I already modified it to get an input expression from the user.
and I've been wondering for hours how to implement the solving for negative values. help anyone?
NO JAVASCRIPT ENGINE PLEASE.
here's the current code
import java.util.*;
public class ExpressionParser
{
// Associativity constants for operators
private static final int LEFT_ASSOC = 0;
private static final int RIGHT_ASSOC = 1;
// Operators
private static final Map<String, int[]> OPERATORS = new HashMap<String, int[]>();
static
{
// Map<"token", []{precendence, associativity}>
OPERATORS.put("+", new int[] { 0, LEFT_ASSOC });
OPERATORS.put("-", new int[] { 0, LEFT_ASSOC });
OPERATORS.put("*", new int[] { 5, LEFT_ASSOC });
OPERATORS.put("/", new int[] { 5, LEFT_ASSOC });
}
// Test if token is an operator
private static boolean isOperator(String token)
{
return OPERATORS.containsKey(token);
}
// Test associativity of operator token
private static boolean isAssociative(String token, int type)
{
if (!isOperator(token))
{
throw new IllegalArgumentException("Invalid token: " + token);
}
if (OPERATORS.get(token)[1] == type) {
return true;
}
return false;
}
// Compare precedence of operators.
private static final int cmpPrecedence(String token1, String token2)
{
if (!isOperator(token1) || !isOperator(token2))
{
throw new IllegalArgumentException("Invalid tokens: " + token1
+ " " + token2);
}
return OPERATORS.get(token1)[0] - OPERATORS.get(token2)[0];
}
// Convert infix expression format into reverse Polish notation
public static String[] expToRPN(String[] inputTokens)
{
ArrayList<String> out = new ArrayList<String>();
Stack<String> stack = new Stack<String>();
// For each token
for (String token : inputTokens)
{
// If token is an operator
if (isOperator(token))
{
// While stack not empty AND stack top element
// is an operator
while (!stack.empty() && isOperator(stack.peek()))
{
if ((isAssociative(token, LEFT_ASSOC) &&
cmpPrecedence(token, stack.peek()) <= 0) ||
(isAssociative(token, RIGHT_ASSOC) &&
cmpPrecedence(token, stack.peek()) < 0))
{
out.add(stack.pop());
continue;
}
break;
}
// Push the new operator on the stack
stack.push(token);
}
// If token is a left bracket '('
else if (token.equals("("))
{
stack.push(token); //
}
// If token is a right bracket ')'
else if (token.equals(")"))
{
while (!stack.empty() && !stack.peek().equals("("))
{
out.add(stack.pop());
}
stack.pop();
}
// If token is a number
else
{
// if(!isOperator(stack.peek())){
// out.add(String.valueOf(token*10));
// }
out.add(token);
}
}
while (!stack.empty())
{
out.add(stack.pop());
}
String[] output = new String[out.size()];
return out.toArray(output);
}
public static double RPNtoDouble(String[] tokens)
{
Stack<String> stack = new Stack<String>();
// For each token
for (String token : tokens) //for each
{
// If the token is a value push it onto the stack
if (!isOperator(token))
{
stack.push(token);
}
else
{
// Token is an operator: pop top two entries
Double d2 = Double.valueOf( stack.pop() );
Double d1 = Double.valueOf( stack.pop() );
//Get the result
Double result = token.compareTo("*") == 0 ? d1 * d2 :
token.compareTo("/") == 0 ? d1 / d2 :
token.compareTo("+") == 0 ? d1 + d2 :
d1 - d2;
// Push result onto stack
stack.push( String.valueOf( result ));
}
}
return Double.valueOf(stack.pop());
}
public static void main(String[] args) throws Exception{
Scanner in = new Scanner(System.in);
String reg = "((?<=[<=|>=|==|\\+|\\*|\\-|<|>|/|=])|(?=[<=|>=|==|\\+|\\*|\\-|<|>|/|=]))";
while(true){
try{
System.out.println("Enter Your Expression");
//String[] input = "( 1 + 2 ) * ( 3 / 4 ) - ( 5 + 6 )".split(" ");
String[] input = in.nextLine() .split(reg);
String[] output = expToRPN(input);
// Build output RPN string minus the commas
System.out.print("Stack: ");
for (String token : output) {
System.out.print("[ ");System.out.print(token + " "); System.out.print("]");
}
System.out.println(" ");
// Feed the RPN string to RPNtoDouble to give result
Double result = RPNtoDouble( output );
System.out.println("Answer= " + result);
}catch (NumberFormatException | EmptyStackException nfe){
System.out.println("INVALID EXPRESSION"); }
}
}
}
UPDATED CODE:
Added: unaryToexp() function.
what I wanted to do was that everytime a " - " occurs, the code treats it as a binary by changing it to " _ " as another operator and this operator solves multiplies thing by -1 (what I wanted first was to add [-1] and [*] to the rpn stack). still got problems here.
compiler says:
Enter Your Expression
-5+3
Stack: [ ][ 5 ][ - ][ 3 ][ + ]
Exception in thread "main" java.lang.NumberFormatException: empty String
at sun.misc.FloatingDecimal.readJavaFormatString(FloatingDecimal.java:10 11)
at java.lang.Double.valueOf(Double.java:504)
at ExpressionParser.RPNtoDouble(ExpressionParser.java:160)
at ExpressionParser.main(ExpressionParser.java:194)*
I think it has something to do with the Double d1 = Double.valueOf( stack.pop() ); cause it still pops another two values, where I only need one for a solving a unary operator. any help?
public class ExpressionParser
{
// Associativity constants for operators
private static final int LEFT_ASSOC = 0;
private static final int RIGHT_ASSOC = 1;
// Operators
private static final Map<String, int[]> OPERATORS = new HashMap<String, int[]>();
static
{
// Map<"token", []{precendence, associativity}>
OPERATORS.put("-", new int[] { 0, LEFT_ASSOC });
OPERATORS.put("+", new int[] { 0, LEFT_ASSOC });
OPERATORS.put("*", new int[] { 5, LEFT_ASSOC });
OPERATORS.put("/", new int[] { 5, LEFT_ASSOC });
OPERATORS.put("_", new int[] { 5, RIGHT_ASSOC });
}
// Test if token is an operator
private static boolean isOperator(String token)
{
return OPERATORS.containsKey(token);
}
// Test associativity of operator token
private static boolean isAssociative(String token, int type)
{
if (!isOperator(token))
{
throw new IllegalArgumentException("Invalid token: " + token);
}
if (OPERATORS.get(token)[1] == type) {
return true;
}
return false;
}
// Compare precedence of operators.
private static final int cmpPrecedence(String token1, String token2)
{
if (!isOperator(token1) || !isOperator(token2))
{
throw new IllegalArgumentException("Invalid tokens: " + token1
+ " " + token2);
}
return OPERATORS.get(token1)[0] - OPERATORS.get(token2)[0];
}
// CONVERT UNARY OPERATORS
public static String[] unaryToexp(String[] inputTokens)
{
ArrayList<String> out = new ArrayList<String>();
Stack<String> stack = new Stack<String>();
//if token is an unary minus
for (String token : inputTokens)
{
if( ((token == "-") && (isOperator(stack.peek()) || stack.empty() ))){ //
token = "_";
}
else if (token == "-"){
token = "-";
}
out.add(token);
while (!stack.empty())
{
out.add(stack.pop());
}
}
String[] output = new String[out.size()];
return out.toArray(output);
}
// Convert infix expression format into reverse Polish notation
public static String[] expToRPN(String[] inputTokens)
{
ArrayList<String> out = new ArrayList<String>();
Stack<String> stack = new Stack<String>();
// For each token
for (String token : inputTokens)
{
// If token is an operator
if (isOperator(token))
{
// While stack not empty AND stack top element
// is an operator
while (!stack.empty() && isOperator(stack.peek()))
{
if ((isAssociative(token, LEFT_ASSOC) &&
cmpPrecedence(token, stack.peek()) <= 0) ||
(isAssociative(token, RIGHT_ASSOC) &&
cmpPrecedence(token, stack.peek()) < 0))
{
out.add(stack.pop());
continue;
}
break;
}
// Push the new operator on the stack
stack.push(token);
}
// If token is a left bracket '('
else if (token.equals("("))
{
stack.push(token); //
}
// If token is a right bracket ')'
else if (token.equals(")"))
{
while (!stack.empty() && !stack.peek().equals("("))
{
out.add(stack.pop());
}
stack.pop();
}
// If token is a number
else
{
out.add(token);
}
}
while (!stack.empty())
{
out.add(stack.pop());
}
String[] output = new String[out.size()];
return out.toArray(output);
}
public static double RPNtoDouble(String[] tokens)
{
Stack<String> stack = new Stack<String>();
// For each token
for (String token : tokens)
{
// If the token is a value push it onto the stack
if (!isOperator(token))
{
stack.push(token);
}
else
{
// Token is an operator: pop top two entries
Double d2 = Double.valueOf( stack.pop() );
Double d1 = Double.valueOf( stack.pop() );
//Get the result
Double result = token.compareTo("_") == 0 ? d2 * -1 :
token.compareTo("*") == 0 ? d1 * d2 :
token.compareTo("/") == 0 ? d1 / d2 :
token.compareTo("+") == 0 ? d1 + d2 :
d1 - d2;
// Push result onto stack
stack.push( String.valueOf( result ));
}
}
return Double.valueOf(stack.pop());
}
public static void main(String[] args) throws Exception{
Scanner in = new Scanner(System.in);
String reg = "((?<=[<=|>=|==|\\+|\\*|\\-|\\_|<|>|/|=])|(?=[<=|>=|==|\\+|\\*|\\-|<|>|/|=]))";
while(true){
//try{
System.out.println("Enter Your Expression");
//String[] input = "( 1 + 2 ) * ( 3 / 4 ) - ( 5 + 6 )".split(" ");
String[] input = in.nextLine() .split(reg);
String[] unary = unaryToexp(input); //.split(reg);
String[] output = expToRPN(unary);
// Build output RPN string minus the commas
System.out.print("Stack: ");
for (String token : output) {
System.out.print("[ ");System.out.print(token); System.out.print(" ]");
}
System.out.println(" ");
// Feed the RPN string to RPNtoDouble to give result
Double result = RPNtoDouble( output );
System.out.println("Answer= " + result);
//}catch (){
//System.out.println("INVALID EXPRESSION"); }
}
}
}

Here you are:
private static final ScriptEngine engine = new ScriptEngineManager().getEngineByName("JavaScript");
public static String eval(String matlab_expression){
if(matlab_expression == null){
return "NULL";
}
String js_parsable_expression = matlab_expression
.replaceAll("\\((\\-?\\d+)\\)\\^(\\-?\\d+)", "(Math.pow($1,$2))")
.replaceAll("(\\d+)\\^(\\-?\\d+)", "Math.pow($1,$2)");
try{
return engine.eval(js_parsable_expression).toString();
}catch(javax.script.ScriptException e1){
return null; // Invalid Expression
}
}

Couldn't you use the javascript scripting engine? (you would need a bit of tweaking for the 5--2 expression) The code below outputs:
3+2*1-6/3 = 3.0
3++2 = Invalid Expression
-5+2 = -3.0
5--2 = 7.0
Code:
public class Test1 {
static ScriptEngine engine;
public static void main(String[] args) throws Exception {
engine = new ScriptEngineManager().getEngineByName("JavaScript");
printValue("3+2*1-6/3");
printValue("3++2");
printValue("-5+2");
printValue("5--2");
}
private static void printValue(String expression) {
String adjustedExpression = expression.replaceAll("--", "- -");
try {
System.out.println(expression + " = " + engine.eval(adjustedExpression));
} catch (ScriptException e) {
System.out.println(expression + " = Invalid Expression");
}
}
}

Rather than re-invent the wheel you could use a parser generator such as JavaCC or antlr, which is specifically designed for this kind of task. This is a nice example of a simple expression parser and evaluator in a couple of dozen lines of JavaCC.

Take a look at some examples and try to find a rule how to distinguish negative values from operators.
A rule like:
if (token is + or -) and next token is a number
and
(the previous token was empty
or the prvious token was ')' or another operator)
then it is a sign to the current value.
You could iterate through your original token list and create a new token list based on this rules.
I have just written such an expression evaluator and have an iterator for tokenizing expressions at hand. plan to publish it after some extensions on GitHub.
EDIT: Here is the iterator, the references and calls should be clear, it is a bit more complex because of support for variables/functions and multi-character operators:
private class Tokenizer implements Iterator<String> {
private int pos = 0;
private String input;
private String previousToken;
public Tokenizer(String input) {
this.input = input;
}
#Override
public boolean hasNext() {
return (pos < input.length());
}
private char peekNextChar() {
if (pos < (input.length() - 1)) {
return input.charAt(pos + 1);
} else {
return 0;
}
}
#Override
public String next() {
StringBuilder token = new StringBuilder();
if (pos >= input.length()) {
return previousToken = null;
}
char ch = input.charAt(pos);
while (Character.isWhitespace(ch) && pos < input.length()) {
ch = input.charAt(++pos);
}
if (Character.isDigit(ch)) {
while ((Character.isDigit(ch) || ch == decimalSeparator)
&& (pos < input.length())) {
token.append(input.charAt(pos++));
ch = pos == input.length() ? 0 : input.charAt(pos);
}
} else if (ch == minusSign
&& Character.isDigit(peekNextChar())
&& ("(".equals(previousToken) || ",".equals(previousToken)
|| previousToken == null || operators
.containsKey(previousToken))) {
token.append(minusSign);
pos++;
token.append(next());
} else if (Character.isLetter(ch)) {
while (Character.isLetter(ch) && (pos < input.length())) {
token.append(input.charAt(pos++));
ch = pos == input.length() ? 0 : input.charAt(pos);
}
} else if (ch == '(' || ch == ')' || ch == ',') {
token.append(ch);
pos++;
} else {
while (!Character.isLetter(ch) && !Character.isDigit(ch)
&& !Character.isWhitespace(ch) && ch != '('
&& ch != ')' && ch != ',' && (pos < input.length())) {
token.append(input.charAt(pos));
pos++;
ch = pos == input.length() ? 0 : input.charAt(pos);
if (ch == minusSign) {
break;
}
}
if (!operators.containsKey(token.toString())) {
throw new ExpressionException("Unknown operator '" + token
+ "' at position " + (pos - token.length() + 1));
}
}
return previousToken = token.toString();
}
#Override
public void remove() {
throw new ExpressionException("remove() not supported");
}
}

Develop Reference

Java is a programming language and computing platform first released by Sun Microsystems in 1995.

Dynamic operator tokens in ANTLR4 - java

Related

Parse Boolean Expression in Java

Java Evaluating RPN using Shunting Yard Algorithm

Having trouble checking to see if a string is balanced or not

Parenthesis Check within a Linked stack for infix to postfix

Java Expression Parser & Calculator Shunting Yard Algorithm

Categories

Resources