Skip to content

Commit 6a17ca8

Browse files
author
Maarten
committed
Fix typos
1 parent bbd4825 commit 6a17ca8

File tree

25 files changed

+107
-109
lines changed

25 files changed

+107
-109
lines changed

README.md

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -26,24 +26,14 @@ For example:
2626

2727
This library allows you to do these things [efficiently](https://github.com/digitalheir/probabilistic-earley-parser-javascript#runtime-complexity), as long as you can describe the rules as a [Context-free Grammar](https://en.wikipedia.org/wiki/Context-free_grammar) (CFG).
2828

29-
The innovation of this library with respect to the many other parsing libraries is that this one allows the production rules in your grammar to have a probability attached to them. That is: it parses [Stochastic Context-free Grammars](https://en.wikipedia.org/wiki/Stochastic_context-free_grammar). This allows us to make better choices in case of ambiguous sentences: we can order them by probability.
29+
The innovation of this library with respect to the many other parsing libraries is that this one allows the production rules in your grammar to have a probability attached to them. That is: it parses [Stochastic Context-free Grammars](https://en.wikipedia.org/wiki/Stochastic_context-free_grammar). This allows us to make better choices in case of ambiguous sentences: we can order them by probability. Furthermore, this parser does not limit token types to strings.
3030

31-
The parser seems to work correctly and efficiently, but is still < v1.0 because I have not added as much utility functions and tests as I would like.
31+
The parser seems to work correctly and efficiently, but is still < v1.0 because I still
32+
want some extra utility functions and tests.
3233

3334
For a theoretical grounding of this work, refer to [*Stolcke, An Efficient Probabilistic Context-Free
3435
Parsing Algorithm that Computes Prefix
3536
Probabilities*](http://www.aclweb.org/anthology/J95-2002).
36-
37-
## Motivation
38-
I made this library because I could not find an existing Java
39-
implementation of the Probabilistic Earley Parser.
40-
41-
I have made a stochastic CYK parser before, but I wanted something
42-
more top down that makes it easier to intervene in the parsing process,
43-
for instance when an unexpected token is encountered.
44-
45-
Furthermore, I needed a efficient parser that does not limit token types
46-
to strings.
4737

4838
## Usage
4939
You can use this project as a library in your Java application or as a standalone command-line app.
@@ -118,8 +108,8 @@ public class Example {
118108
// Note that tokens can be of multiple terminal types (homographs: "bank" as a noun or "bank" as a verb)
119109
private static final Terminal transitiveVerb = (StringTerminal) token -> token.obj.matches("(hit|chased)");
120110
// Some utility terminal types are pre-defined:
121-
private static final Terminal the = new CaseInsenstiveStringTerminal("the");
122-
private static final Terminal a = new CaseInsenstiveStringTerminal("a");
111+
private static final Terminal the = new CaseInsensitiveStringTerminal("the");
112+
private static final Terminal a = new CaseInsensitiveStringTerminal("a");
123113
private static final Terminal man = new ExactStringTerminal("man");
124114
private static final Terminal stick = new ExactStringTerminal("stick");
125115
private static final Terminal with = new ExactStringTerminal("with");
@@ -194,7 +184,7 @@ new ParseCallbacks.Builder()
194184
```
195185

196186
## Some notes on implementation
197-
The probability of a parse is defined as the product of the probalities all the applied rules. Usually,
187+
The probability of a parse is defined as the product of the probabilities all the applied rules. Usually,
198188
we define probability as a number between 0 and 1 inclusive, and use common algebraic notions of addition and
199189
multiplication.
200190

src/main/java/org/leibnizcenter/cfg/algebra/semiring/Semiring.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
@SuppressWarnings("unused")
1515
public interface Semiring<T> {
1616
// @Deprecated
17-
// static boolean isRighSemiring(Semiring semiring) {
17+
// static boolean isRightSemiring(Semiring semiring) {
1818
// return isRightSemiring(semiring);
1919
// }
2020
//

src/main/java/org/leibnizcenter/cfg/algebra/semiring/dbl/ExpressionSemiring.java

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,17 @@ public abstract class ExpressionSemiring implements DblSemiring {
1414

1515
public final Atom ZERO_EXPRESSION = new Atom(zero());
1616

17-
public Resolvable times(Resolvable x, Resolvable y, Resolvable z) {
18-
if (isMultiplicativeIdentity(x)) return times(y, z);
19-
else if (isMultiplicativeIdentity(y)) return times(x, z);
20-
else if (isMultiplicativeIdentity(z)) return times(x, y);
21-
return new Times(x, y, z);
17+
public Resolvable times(Resolvable r1, Resolvable r2, Resolvable r3) {
18+
if (isMultiplicativeIdentity(r1)) return times(r2, r3);
19+
else if (isMultiplicativeIdentity(r2)) return times(r1, r3);
20+
else if (isMultiplicativeIdentity(r3)) return times(r1, r2);
21+
return new Times(r1, r2, r3);
2222
}
2323

24-
private Resolvable times(Resolvable x, Resolvable y) {
25-
if (isMultiplicativeIdentity(x)) return y;
26-
else if (isMultiplicativeIdentity(y)) return x;
27-
return new Times(x, y);
24+
private Resolvable times(Resolvable r1, Resolvable r2) {
25+
if (isMultiplicativeIdentity(r1)) return r2;
26+
else if (isMultiplicativeIdentity(r2)) return r1;
27+
return new Times(r1, r2);
2828
}
2929

3030
private boolean isMultiplicativeIdentity(Resolvable r) {
@@ -35,13 +35,14 @@ private boolean isAdditiveIdentity(Resolvable x) {
3535
return x instanceof Atom && ((Atom) x).value == this.zero();
3636
}
3737

38-
public Resolvable plus(Resolvable x, Resolvable y) {
39-
if (isAdditiveIdentity(x)) return y;
40-
else if (isAdditiveIdentity(y)) return x;
41-
else return new Plus(x, y);
38+
public Resolvable plus(Resolvable r1, Resolvable r2) {
39+
if (isAdditiveIdentity(r1)) return r2;
40+
else if (isAdditiveIdentity(r2)) return r1;
41+
else return new Plus(r1, r2);
4242
}
4343

4444

45+
@SuppressWarnings("WeakerAccess")
4546
public final class Plus extends Resolvable {
4647
private final Resolvable right;
4748
private final Resolvable left;
@@ -67,6 +68,7 @@ public String toString() {
6768
}
6869

6970

71+
@SuppressWarnings("WeakerAccess")
7072
public final class Times extends Resolvable {
7173
private final Resolvable right;
7274
private final Resolvable left;
@@ -102,11 +104,8 @@ public boolean equals(Object o) {
102104

103105
Times times = (Times) o;
104106

105-
if (!right.equals(times.right)) return false;
106-
if (!left.equals(times.left)) return false;
107-
if (right2 != null ? !right2.equals(times.right2) : times.right2 != null) return false;
107+
return right.equals(times.right) && left.equals(times.left) && (right2 != null ? right2.equals(times.right2) : times.right2 == null);
108108

109-
return true;
110109
}
111110

112111
@Override

src/main/java/org/leibnizcenter/cfg/category/Category.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
/**
1111
* A category in a grammar, also known as a type.
12-
* Categories are the atomic subparts that make up
12+
* Categories are the atomic sub-parts that make up
1313
* {@link Rule grammar rules}.
1414
* <p>
1515
* Categories can either be <em>terminal</em> or <em>non-terminal</em>. A
Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,17 @@
99
* <p>
1010
* Created by maarten on 11-6-16.
1111
*/
12-
public class CaseInsenstiveStringTerminal implements StringTerminal {
12+
public class CaseInsensitiveStringTerminal implements StringTerminal {
1313
public final String string;
14+
@SuppressWarnings("WeakerAccess")
1415
public final Locale locale;
1516

16-
public CaseInsenstiveStringTerminal(String s) {
17+
public CaseInsensitiveStringTerminal(String s) {
1718
this.locale = Locale.ROOT;
1819
this.string = s.toLowerCase(locale);
1920
}
2021

21-
public CaseInsenstiveStringTerminal(String s, Locale locale) {
22+
public CaseInsensitiveStringTerminal(String s, Locale locale) {
2223
this.locale = locale;
2324
this.string = s.toLowerCase(locale);
2425
}
@@ -33,7 +34,7 @@ public boolean equals(Object o) {
3334
if (this == o) return true;
3435
if (o == null || getClass() != o.getClass()) return false;
3536

36-
CaseInsenstiveStringTerminal that = (CaseInsenstiveStringTerminal) o;
37+
CaseInsensitiveStringTerminal that = (CaseInsensitiveStringTerminal) o;
3738

3839
if (!string.equals(that.string)) return false;
3940
if (!locale.equals(that.locale)) return false;

src/main/java/org/leibnizcenter/cfg/earleyparser/CommandLine.java

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import java.util.stream.Stream;
1717

1818
/**
19+
* Interface for runnable jar
1920
* Created by maarten on 27-1-17.
2021
*/
2122
public class CommandLine {
@@ -34,19 +35,19 @@ public class CommandLine {
3435
*/
3536
public static void main(String[] args) {
3637
HandleArguments handleArguments = new HandleArguments(args).invoke();
37-
ParseTreeWithScore parse = Parser.getViterbiParseWithScore(
38-
handleArguments.getGoal(),
39-
handleArguments.getGrammar(),
40-
Stream.of(handleArguments.getTokens()).map(Token::of).collect(Collectors.toList())
41-
);
38+
ParseTreeWithScore parse = new Parser<>(handleArguments.getGrammar())
39+
.getViterbiParseWithScore(
40+
handleArguments.getGoal(),
41+
Stream.of(handleArguments.getTokens()).map(Token::of).collect(Collectors.toList())
42+
);
4243

4344
System.out.println(parse.score.semiring.toProbability(parse.score.getScore()));
4445
System.out.println(parse.parseTree);
4546
}
4647

4748

4849
private static class HandleArguments {
49-
private String[] args;
50+
final private String[] args;
5051
private String[] tokens;
5152
private Grammar<String> grammar;
5253
private NonTerminal goal;

src/main/java/org/leibnizcenter/cfg/earleyparser/Complete.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ private void completeNoViterbi(final int position,
130130
/* */
131131
Collection<State> newCompletedStates = null;
132132
for (Delta delta : deltas) {
133-
//todo these plus operation may be parallelized a little?
133+
//todo these plus operation may be parallellized a little?
134134
addForwardScores.plus(delta.state, delta.addForward);
135135
addInnerScores.plus(delta.state, delta.addInner);
136136

src/main/java/org/leibnizcenter/cfg/earleyparser/DeferredStateScoreComputations.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ public void plus(State s, Resolvable addValue) {
3838
this.states.put(s, current);
3939
}
4040

41-
public Complete.Delta addForward(Complete.Delta delta) {
41+
Complete.Delta addForward(Complete.Delta delta) {
4242
plus(delta.state, delta.addForward);
4343
return delta;
4444
}

src/main/java/org/leibnizcenter/cfg/earleyparser/ParseTree.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,9 +96,11 @@ public String toString() {
9696
}
9797

9898
private void toString(StringBuilder sb, String prefix, boolean isTail) {
99-
sb.append(prefix + (isTail ? "└── " : "├── ") + (
100-
category.toString() + ((this instanceof Token) ? (" (" + ((Token) this).token + ")") : "")
101-
) + "\n");
99+
sb.append(prefix)
100+
.append(isTail ? "└── " : "├── ")
101+
.append(category.toString())
102+
.append((this instanceof Token) ? (" (" + ((Token) this).token + ")") : "")
103+
.append("\n");
102104
if (children != null) {
103105
for (int i = 0; i < children.size() - 1; i++) {
104106
children.get(i).toString(sb, prefix + (isTail ? " " : "│ "), false);

src/main/java/org/leibnizcenter/cfg/earleyparser/Parser.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ public Parser(Grammar<T> grammar) {
4040
* @param goal Goal category, typically S for Sentence
4141
* @param grammar Grammar to apply to tokens
4242
* @param tokens list of tokens to parse
43-
* @return Probability that given string of tokens mathces gven non-terminal with given grammar
43+
* @return Probability that given string of tokens mathces given non-terminal with given grammar
4444
*/
4545
@Deprecated
4646
public static <T> double recognize(NonTerminal goal,
@@ -55,7 +55,7 @@ public static <T> double recognize(NonTerminal goal,
5555
* @param goal Goal category, typically S for Sentence
5656
* @param grammar Grammar to apply to tokens
5757
* @param tokens list of tokens to parse
58-
* @return Probability that given string of tokens mathces gven non-terminal with given grammar
58+
* @return Probability that given string of tokens mathces given non-terminal with given grammar
5959
*/
6060
@Deprecated
6161
public static <T> double recognize(NonTerminal goal,
@@ -207,7 +207,7 @@ public static ParseTree getViterbiParse(State state, Chart chart) {
207207
*
208208
* @param goal Goal category, typically S for Sentence
209209
* @param tokens list of tokens to parse
210-
* @return Probability that given string of tokens mathces gven non-terminal with given grammar
210+
* @return Probability that given string of tokens mathces given non-terminal with given grammar
211211
*/
212212
public double recognize(NonTerminal goal, Iterable<Token<T>> tokens) {
213213
return recognize(goal, tokens, null);
@@ -218,7 +218,7 @@ public double recognize(NonTerminal goal, Iterable<Token<T>> tokens) {
218218
*
219219
* @param goal Goal category, typically S for Sentence
220220
* @param tokens list of tokens to parse
221-
* @return Probability that given string of tokens mathces gven non-terminal with given grammar
221+
* @return Probability that given string of tokens mathces given non-terminal with given grammar
222222
*/
223223
public double recognize(NonTerminal goal,
224224
Iterable<Token<T>> tokens,
@@ -318,8 +318,8 @@ public ChartWithInputPosition<T> parseAndCountTokens(NonTerminal S,
318318
int i = 0;
319319

320320
final Complete<T> complete = new Complete<>(chart.stateSets, true);
321-
final Scan<T> scan = new Scan<T>(chart.stateSets);
322-
final Predict<T> predict = new Predict<T>(chart.stateSets);
321+
final Scan<T> scan = new Scan<>(chart.stateSets);
322+
final Predict<T> predict = new Predict<>(chart.stateSets);
323323

324324
for (TokenWithCategories<T> token : TokenWithCategories.from(tokens, grammar)) {
325325
predict.predict(callbacks, chart, i, token);

0 commit comments

Comments
 (0)