diff --git a/README.md b/README.md
index a68201fe..bf4ce09d 100644
--- a/README.md
+++ b/README.md
@@ -46,6 +46,7 @@ coqc coqlib.v
```
Our system assigns semantics to CCG structures. At the moment, we support C&C for English, and Jigg for Japanese.
+If you are working with templates that require semantic tags, you will also need a universal semantic tagger.
### Installing [C&C parser](http://www.cl.cam.ac.uk/~sc609/candc-1.00.html) (for English)
@@ -72,6 +73,19 @@ Simply do:
The command above will download Jigg, its models, and create the file `ja/jigg_location.txt` where the path to Jigg is specified. That is all.
+### Installing [semtagger](https://github.com/ginesam/semtagger) (for English, optional)
+
+You can optionally download and install a semantic tagger by running the following
+script from the ccg2lambda directory:
+
+```bash
+./en/install_semtagger.sh
+```
+
+This will generate a file `en/semtagger_location.txt` with the path to the semantic tagger.
+Note that after downloading, you must follow the instructions given [here](https://github.com/ginesam/semtagger) in order to train a
+tagging model.
+
## Using the Semantic Parser
Let's assume that we have a file `sentences.txt` with one sentence per line,
diff --git a/en/emnlp2015exp.sh b/en/emnlp2015exp.sh
index 6d37b568..868be9ad 100755
--- a/en/emnlp2015exp.sh
+++ b/en/emnlp2015exp.sh
@@ -65,6 +65,12 @@ parser_cmd="${parser_dir}/bin/candc \
--candc-printer xml \
--input"
+# Set a variable with the location of the semtagger tool (if used)
+semtagger_dir=""
+if [ -f en/semtagger_location.txt ]; then
+ semtagger_dir=`cat en/semtagger_location.txt`
+fi
+
# These variables contain the names of the directories where intermediate
# results will be written.
plain_dir=${dataset}"_plain"
@@ -121,6 +127,25 @@ for f in ${plain_dir}/*.tok; do
python en/candc2transccg.py ${parsed_dir}/${base_filename}.candc.xml \
> ${parsed_dir}/${base_filename/.tok/}.xml
fi
+ # inject semantic tag information when using semtagger
+ if [ -n "$semtagger_dir" ]; then
+ if [ -f "$semtagger_dir"/run.sh ]; then
+ cp ${parsed_dir}/${base_filename/.tok/}.xml \
+ ${parsed_dir}/${base_filename/.tok/}.xml.old
+ python scripts/xml2conll.py ${parsed_dir}/${base_filename/.tok/}.xml.old \
+ > ${parsed_dir}/${base_filename/.tok/}.off
+ . ${semtagger_dir}/run.sh --predict \
+ --input ${parsed_dir}/${base_filename/.tok/}.off \
+ --output ${parsed_dir}/${base_filename/.tok/}.sem
+ python scripts/xml_add_stag.py \
+ ${parsed_dir}/${base_filename/.tok/}.xml.old \
+ ${parsed_dir}/${base_filename/.tok/}.sem \
+ ${parsed_dir}/${base_filename/.tok/}.xml
+ rm -f ${parsed_dir}/${base_filename/.tok/}.xml.old
+ rm -f ${parsed_dir}/${base_filename/.tok/}.off
+ rm -f ${parsed_dir}/${base_filename/.tok/}.sem
+ fi
+ fi
done
echo
diff --git a/en/fracas.md b/en/fracas.md
index b69d1230..951b56c4 100644
--- a/en/fracas.md
+++ b/en/fracas.md
@@ -1,6 +1,6 @@
# Running the RTE pipeline on FraCas.
-First, ensure that you have downloaded C&C parser and wrote its location in the file `en/candc_location.txt`.
+First, ensure that you have downloaded C&C parser and wrote its location in the file `en/candc_location.txt`. Also ensure that you have downloaded semtagger, wrote its location in the file `en/semtagger_location.txt` and trained a tagging model in case you are willing to use semantic templates with semantic tags.
Second, you need to download the copy of [FraCaS provided by MacCartney and Manning (2007)](http://www-nlp.stanford.edu/~wcmac/downloads/fracas.xml):
@@ -16,7 +16,14 @@ git checkout tags/fracas
./en/emnlp2015exp.sh en/semantic_templates_en_emnlp2015.yaml fracas.xml
```
-This script will:
+If you are using semantic tags in your templates, you can similarly do:
+
+```bash
+git checkout semtag-fracas
+./en/emnlp2015exp.sh en/semantic_templates_en_semtags_emnlp2015.yaml fracas.xml
+```
+
+The scripts will:
1. Extract the plain text corresponding to the hypotheses and conclusions of all fracas problems. These hypotheses and conclusions are stored in a different file for each fracas problem, under the directory `fracas.xml_plain`. The gold entailment judgment is stored in files `fracas.xml_plain/*.answer`.
2. Parse the hypotheses and conclusions using C&C parser, and save them under the directory `fracas.xml_parsed`.
diff --git a/en/install_semtagger.sh b/en/install_semtagger.sh
new file mode 100755
index 00000000..bb5d26af
--- /dev/null
+++ b/en/install_semtagger.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+#
+# Download semtagger from https://github.com/ginesam/semtagger
+
+semtagger_url="https://github.com/ginesam/semtagger.git"
+semtagger_dir=`pwd`"/"semtagger
+
+git clone https://github.com/ginesam/semtagger $semtagger_dir
+echo $semtagger_dir > en/semtagger_location.txt
+
diff --git a/en/semantic_templates_en_semtags_emnlp2015.yaml b/en/semantic_templates_en_semtags_emnlp2015.yaml
new file mode 100644
index 00000000..c38d8648
--- /dev/null
+++ b/en/semantic_templates_en_semtags_emnlp2015.yaml
@@ -0,0 +1,570 @@
+#
+# Copyright 2015 Koji Mineshima
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+##### Semantic templates for English using semantic tags #####
+
+## NOTE: In some cases we do still check the surface form of words due to the
+## existence of certain specialized predicates that are hard to invoke using
+## semantic tags in isolation.
+
+
+##### Unary lexical rules #####
+
+# existential type-raising from N to NP
+- category: NP
+ rule: lex
+ semantics: \E F1 F2. exists x. (E(x) & F1(x) & F2(x))
+
+# Unary rule for negative NPs
+- category: NP
+ rule: lex
+ semantics: \C F1 F2. - exists x. (C(x) & F1(x) & F2(x))
+ child_any_stag: NOT
+
+# Unary rule for proportional NPs
+- category: NP
+ rule: lex
+ semantics: \C. C
+ child_any_base: most
+
+# existential type-raising for proper nouns from N to NP
+- category: NP
+ rule: lex
+ semantics: \E F1 F2. exists x. ((x = E) & F1(E) & F2(E))
+ child0_costag: NAM
+
+- category: NP
+ rule: lex
+ semantics: \E F1 F2. exists x. ((x = E) & F1(E) & F2(E))
+ child_any_costag: NAM
+ child_any_stag: DIS
+
+- category: N\N
+ rule: lex
+ semantics: \V F x. (V(\G1 G2.G2(x)) & F(x))
+
+- category: NP\NP
+ rule: lex
+ semantics: \V Q F1. Q(\x.(V(\F2 F3.F3(x)) & F1(x)))
+
+- category: S[X=true]/(S[X=true]\NP)
+ rule: tr
+ semantics: \Q V. V(Q)
+
+
+##### Binary lexical rules #####
+
+## universal interpretattion of plural definite descriptions
+- category: NP
+ rule: fa
+ semantics: \L F1 F2 F3. forall x. (F1(x) -> (F2(x) -> F3(x)))
+ child0_stag: DEF
+ child_any_pos: NNPS
+
+## Conjunction
+- category: S\S
+ rule: conj
+ semantics: \L S1 S2. (S1 & S2)
+ child0_stag: AND
+
+- category: NP\NP
+ rule: conj
+ semantics: \L Q1 Q2 F1 F2. (Q2(F1, F2) & Q1(F1, F2))
+ child0_stag: GRP
+
+- category: NP\NP
+ rule: conj
+ semantics: \L Q1 Q2 F1 F2. (Q2(F1, F2) & Q1(F1, F2))
+ child0_stag: AND
+
+- category: N\N
+ rule: conj
+ semantics: \L F1 F2 x. (F1(x) & F2(x))
+ child0_stag: AND
+
+- category: (S\NP)\(S\NP)
+ rule: conj
+ semantics: \L V1 V2 Q. Q(\w.TrueP, \x.(V1(\F1 F2.F2(x)) & V2(\F1 F2.F2(x))))
+ child0_stag: COO
+
+- category: (N/N)\(N/N)
+ rule: conj
+ semantics: \L M1 M2 F x. M1(M2(F),x)
+ child0_stag: AND
+
+- category: (N/N)/(N/N)
+ rule: conj
+ semantics: \L M1 M2 F x. M1(M2(F),x)
+ child0_stag: AND
+
+## Disjunction
+- category: S\S
+ rule: conj
+ semantics: \L S1 S2. (S1 & S2)
+ child0_stag: DIS
+
+- category: NP\NP
+ rule: conj
+ semantics: \L Q1 Q2 F1 F2. (Q2(F1, F2) | Q1(F1, F2))
+ child0_stag: DIS
+
+- category: N/N
+ rule: conj
+ semantics: \L F1 F2 x. (F1(x) | F2(x))
+ child0_stag: DIS
+
+- category: N\N
+ rule: conj
+ semantics: \L F1 F2 x. (F1(x) | F2(x))
+ child0_stag: DIS
+
+- category: (S\NP)\(S\NP)
+ rule: conj
+ semantics: \L V1 V2 Q. Q(\w.TrueP, \x.(V1(\F1 F2.F2(x)) | V2(\F1 F2.F2(x))))
+ child0_stag: DIS
+
+- category: (N/N)\(N/N)
+ rule: conj
+ semantics: \L M1 M2 F x. (M1(F,x) | M2(F,x))
+ child0_stag: DIS
+
+- category: (N/N)/(N/N)
+ rule: conj
+ semantics: \L M1 M2 F x. M1(M2(F),x)
+ child0_stag: DIS
+
+## sentence final particle
+- category: S
+ rule: rp
+ semantics: \S D. S
+
+## Rules for commas
+- category: S
+ rule: lp
+ semantics: \L S. S
+ child0_stag: NIL
+
+- category: NP
+ rule: rp
+ semantics: \L R. L
+ child0_stag: NIL
+
+- category: S\NP
+ rule: rp
+ semantics: \L R. L
+ child0_stag: NIL
+
+- category: NP\NP
+ rule: conj
+ semantics: \L Q1 Q2 F1 F2. (Q2(F1, F2) & Q1(F1, F2))
+ child0_stag: EQU
+
+- category: NP\NP
+ rule: conj
+ semantics: \L Q1 Q2 F1 F2. (Q2(F1, F2) | Q1(F1, F2))
+ child0_stag: NIL
+ child_any_stag: DIS
+
+- category: S\S
+ rule: conj
+ semantics: \L S. S
+ child0_stag: NIL
+
+
+##### Noun Phrases and Pronouns #####
+
+## Common nouns
+- category: N
+ semantics: \E x. E(x)
+ coq_type: Entity -> Prop
+
+## Proper nouns
+- category: N
+ semantics: \E. E
+ costag: NAM
+
+- category: N
+ semantics: \E. E
+ costag: UNE
+ pos: NNP
+
+# default existential interpretation
+- category: NP
+ semantics: \E F1 F2. exists x. (F1(x) & F2(x))
+
+- category: NP
+ semantics: \E F1 F2. forall x. (F1(x) -> F2(x))
+ stag: AND
+
+- category: NP
+ semantics: \E F1 F2. forall x. (_people(x) -> (F1(x) -> F2(x)))
+ surf: everyone
+
+- category: NP
+ semantics: \E F1 F2. exists x. (two(x) & F1(x) & F2(x))
+ surf: both
+
+- category: NP
+ semantics: \E F1 F2. (exists x. (F1(x) & two(x)) & forall x. (F1(x) -> - F2(x)))
+ surf: neither
+
+
+##### Determiners #####
+
+# default existential interpretation
+- category: NP[nb=true]/N
+ semantics: \E F1 F2 F3. exists x. (F1(x) & F2(x) & F3(x))
+
+- category: NP[nb=true]/N
+ semantics: \E F1 F2 F3. forall x. (F1(x) -> (F2(x) -> F3(x)))
+ stag: AND
+
+- category: NP[nb=true]/N
+ semantics: \E F1 F2 F3. exists x. (F1(x) & F2(x) & F3(x))
+ stag: DIS
+
+- category: NP[nb=true]/N
+ semantics: \E F1 F2 F3. exists x. (F1(x) & F2(x) & F3(x))
+ stag: DEF
+
+- category: NP[nb=true]/N
+ semantics: \E F1 F2 F3. forall x. (F1(x) -> (F2(x) -> - F3(x)))
+ stag: NOT
+
+- category: NP[nb=true]/N
+ semantics: \E F1 F2 F3. Most(\x(F1(x) & F2(x)), F3)
+ surf: most
+
+- category: NP[nb=true]/N
+ semantics: \E F1 F2 F3. exists x. (two(x) & F1(x) & F2(x) & F3(x))
+ surf: both
+
+- category: NP[nb=true]/N
+ semantics: \E F1 F2 F3. (exists x. (F1(x) & two(x)) & forall x. (F1(x) -> (F2(x) -> - F3(x))))
+ surf: neither
+
+- category: NP/NP
+ semantics: \E Q F1 F2. forall x. (Q(\w.TrueP, \y.((x = y) & F1(y))) -> F2(x))
+ stag: AND
+
+- category: NP\NP
+ semantics: \E Q F1 F2. forall x. (Q(\w.TrueP, \y.((x = y) & F1(y))) -> F2(x))
+ stag: AND
+
+
+##### Adverbs #####
+
+- category: (N/N)/(N/N)
+ semantics: \E A F x. (E(x) & A(F)(x))
+ coq_type: Entity -> Prop
+
+- category: (S\NP)\(S\NP)
+ semantics: \E V Q. Q(\w.TrueP, \x.(E(x) & V(\F1 F2.F2(x))))
+ coq_type: Entity -> Prop
+
+- category: (S\NP)/(S\NP)
+ semantics: \E V Q. Q(\w.TrueP, \x.(E(x) & V(\F1 F2.F2(x))))
+ coq_type: Entity -> Prop
+
+- category: (S/S)/NP
+ semantics : \E Q S. (E(Q(\w.TrueP, \w.TrueP),S) & S)
+ coq_type: Prop -> Prop -> Prop
+
+- category: ((S\NP)\(S\NP))/S[dcl=true]
+ semantics: \E S V Q. Q(\w.TrueP, \x.(S & V(\F1 F2.F2(x)) & E(S, V(\F1 F2.F2(x)))))
+ coq_type: Entity -> Prop
+
+- category: ((S\NP)\(S\NP))/((S\NP)\(S\NP))
+ semantics: \E A V Q. Q(\w.TrueP, \x.(A(V)(\F1 F2.(F2(x) & E(x)))))
+ coq_type: Entity -> Prop
+
+
+##### Modifiers #####
+- category: N/N
+ semantics: \E F x. (E(x) & F(x))
+ coq_type: Entity -> Prop
+
+- category: N/N
+ semantics: \E F1 F2 F3. Most(\x.(F1(x) & F2(x)), F3)
+ surf: most
+
+- category: N\N
+ semantics: \E F x. (E(x) & F(x))
+ coq_type: Entity -> Prop
+
+- category: (N/N)\NP
+ semantics: \E Q F x. Q(\w.TrueP, \y.(E(x,y) & F(x)))
+ coq_type: Entity -> Entity -> Prop
+
+
+##### Prepositions #####
+
+- category: (NP\NP)/NP
+ semantics: \E Q1 Q2 F1. Q2(\x.(Q1(\w.TrueP, \y.E(x,y)) & F1(x)))
+ coq_type: Entity -> Entity -> Prop
+
+- category: PP/NP
+ semantics: \E Q x. Q(\w.TrueP, \y.E(x,y))
+ coq_type: Entity -> Entity -> Prop
+
+- category: PP/(S[ng=true]\NP)
+ semantics: \E V x. V(\F1 F2.F2(x))
+ coq_type: Entity -> Entity -> Prop
+
+- category: (NP\NP)/S
+ semantics: \E S Q F1 F2. (Q(F1,F2) & S)
+
+- category: ((S\NP)\(S\NP))/NP
+ semantics: \E Q1 V Q2. Q2(\w.TrueP, \x.Q1(\w.TrueP, \y.(E(x,y) & V(\F1 F2.F2(x)))))
+ coq_type: Entity -> Entity -> Prop
+
+- category: ((S\NP)\(S\NP))/(S[ng=true]\NP)
+ semantics: \E V1 V2 Q. Q(\w.TrueP, \x.V1(\F1 F2.(V2(\G1 G2.G2(x)) & F2(x))))
+
+- category: ((S\NP)\(S\NP))/(S[ng=true]\NP)
+ semantics: \E V1 V2 Q. Q(\w.TrueP, \x.V1(\F1 F2.(V2(\G1 G2.G2(x)) & -F2(x))))
+ stag: NOT
+
+- category: (NP\NP)/(S[ng=true]\NP)
+ semantics: \E V Q F. Q(\x.V(\G1 G2.(G2(x) & F(x))))
+
+
+##### Verb phrases #####
+
+## intransitive verbs
+- category: S\NP
+ semantics: \E Q. Q(\w.TrueP, \x.E(x))
+ coq_type: Entity -> Prop
+
+- category: S\NP
+ semantics: \E Q. Q(\w.TrueP, \x.(Prog(E(x))))
+ coq_type: Entity -> Prop
+ stag: EXG
+
+## transitive verbs
+- category: (S\NP)/NP
+ semantics: \E Q1 Q2. Q2(\w.TrueP, \x.Q1(\w.TrueP, \y.E(x,y)))
+ coq_type: Entity -> Entity -> Prop
+
+- category: (S\NP)/NP
+ semantics: \E Q1 Q2. Q2(\w.TrueP, \x.Q1(\w.TrueP, \y.Prog(E(x,y))))
+ coq_type: Entity -> Entity -> Prop
+ stag: EXG
+
+- category: ((S\NP)/PP)/NP
+ semantics: \E Q1 F Q2. Q2(\w.TrueP, \x.(Q1(\w.TrueP, \y.E(x,y)) & F(x)))
+ coq_type: Entity -> Entity -> Prop
+
+- category: (S[dcl=true]\NP)/(S[to=true]\NP)
+ semantics: \E V Q. Q(\w.TrueP, \x.V(\F1 F2.E(x,F2(x))))
+ coq_type: Entity -> Prop -> Prop
+
+- category: ((S\NP)/(S[ng=true]\NP))/NP
+ semantics: \E Q1 V Q2. Q2(\w.TrueP, \x.E(x,Q1(\w.TrueP, \y.V(\F1 F2.F2(y)))))
+ coq_type: Entity -> Entity -> Entity -> Prop
+
+- category: ((S[dcl=true]\NP)/(S[to=true]\NP))/NP
+ semantics: \E Q1 V Q2. Q2(\w.TrueP, \x.E(x,Q1(\w.TrueP, \y.V(\F1 F2.F2(y)))))
+ coq_type: Entity -> Prop -> Prop
+
+- category: ((S\NP)/NP)/NP
+ semantics: \E Q1 Q2 Q3. Q3(\w.TrueP,\x1.Q2(\w.TrueP,\x2.Q1(\w.TrueP,\x3.E(x1,x2,x3))))
+ coq_type: Entity -> Entity -> Entity -> Prop
+
+- category: (S\NP)/PP
+ semantics: \E F Q. Q(\w.TrueP, \x.(E(x) & F(x)))
+ coq_type: Entity -> Prop
+
+- category: (S\NP)/S
+ semantics: \E S Q. Q(\w.TrueP, \x.E(x,S))
+ coq_type: Entity -> Prop -> Prop
+
+- category: (S\NP)/S[em=true]
+ semantics: \E S Q. Q(\w.TrueP, \x.E(x,S))
+ costag: EVE
+
+- category: (S\NP)/S[qem=true]
+ semantics: \E S Q. Q(\w.TrueP, \x.E(x,S))
+ costag: EVE
+
+- category: ((S[dcl=true]\NP)/(S[b=true]\NP))/NP
+ semantics: \E Q1 V Q2. Q2(\w.TrueP, \x.Q1(\w.TrueP, \y.(E(x,y) & V(\F1 F2.F2(y)))))
+ coq_type: Entity -> Entity -> Prop
+
+
+##### Copula #####
+
+- category: (S\NP)/NP
+ semantics: \E Q1 Q2. Q2(\w.TrueP, \x.Q1(\w.TrueP, \y.(x = y)))
+ base: be
+
+- category: (S[dcl=true]\NP)/PP
+ semantics: \E F Q. Q(\w.TrueP, F)
+ costag: TNS
+
+- category: (S\NP)/(S[pss=true]\NP)
+ semantics: \E V Q. Q(\w.TrueP, \x.V(\F1 F2.F2(x)))
+
+- category: (S\NP)/(S[adj=true]\NP)
+ semantics: \E X. X
+ costag: TNS
+
+- category: (S\NP)/(S[adj=true]\NP)
+ semantics: \E X. X
+ costag: TNS
+
+- category: ((S[dcl=true]\NP[expl=true])/S[em=true])/(S[adj=true]\NP)
+ semantics: \E V S Q. V(\F1 F2.F2(S))
+
+- category: (S[dcl=true]\(S[adj=true]\NP))/NP
+ semantics: \E Q1 Q2. Q2(\w.TrueP, \x.Q1(\w.TrueP, \y.E(x,y)))
+ coq_type: Entity -> Entity -> Prop
+
+
+##### Negation #####
+
+- category: (S\NP)\(S\NP)
+ semantics: \E V Q. Q(\w.TrueP, \x.-V(\F1 F2.F2(x)))
+ stag: NOT
+
+- category: (S[adj=true]\NP)/(S[adj=true]\NP)
+ semantics: \E V Q. Q(\w.TrueP, \x.-V(\F1 F2.F2(x)))
+ stag: NOT
+
+
+##### Adjectives #####
+
+- category: S[adj=true]\NP
+ semantics: \E Q. Q(\w.TrueP, \x.E(x))
+ coq_type: Entity -> Prop
+
+- category: S[adj=true]\NP
+ semantics: \E Q. Q(\w.TrueP, \x.E(x))
+ stag: IST
+
+- category: S[adj=true]\NP
+ semantics: \E Q. Q(\w.TrueP, \x.E(x))
+ stag: SST
+
+- category: N/N
+ semantics: \E F x. E(F(x))
+ stag: PST
+
+- category: (S[adj=true]\NP)/S[em=true]
+ semantics: \E S Q. Q(\w.TrueP, \x.E(x,S))
+ coq_type: Entity -> Prop -> Prop
+
+
+##### Auxiliary verbs #####
+
+- category: (S[dcl=true]\NP)/(S[b=true]\NP)
+ semantics: \E V Q. Q(\w.TrueP, \x.V(\F1 F2.E(F2(x))))
+ coq_type: Prop -> Prop
+
+- category: (S[dcl=true]\NP)/(S[b=true]\NP)
+ semantics: \E V. V
+ costag: TNS
+
+
+##### Floating quantifiers #####
+
+- category: (S\NP)/(S\NP)
+ semantics: \E V Q. forall x. (Q(\w.TrueP, \y.(x = y)) -> V(\F1 F2.F2(x)))
+ stag: AND
+
+- category: (S\NP)\(S\NP)
+ semantics: \E V Q. forall x. (Q(\w.TrueP, \y.(x = y)) -> V(\F1 F2.F2(x)))
+ stag: AND
+
+
+##### Relative clauses #####
+
+- category: (NP\NP)/(S[dcl=true]\NP)
+ semantics: \E V Q F1. Q(\x. (V(\F2 F3. F3(x)) & F1(x)))
+
+- category: NP/(S[dcl=true]/NP)
+ semantics: \E V F1 F2. exists x. (V(\G1 G2.G2(x)) & F1(x) & F2(x))
+
+- category: (NP\NP)/(S[dcl=true]/NP)
+ semantics: \E V Q F1. Q(\x. (V(\F2 F3. F3(x)) & F1(x)))
+
+
+##### Complementizers #####
+
+- category: S[em=true]/S[dcl=true]
+ semantics: \E X. X
+
+
+##### Connectives #####
+
+## default conjunctive interpretation
+- category: (S/S)/S[dcl=true]
+ semantics: \E S1 S2. (S1 & S2 & E(S1, S2))
+ coq_type: Prop -> Prop -> Prop
+
+- category: (S/S)/S[dcl=true]
+ semantics: \E S1 S2. (S1 -> S2)
+ stag: IMP
+
+- category: (S/S)/S[dcl=true]
+ semantics: \E S1 S2. (S1 & S2)
+ costag: DSC
+
+- category: ((S\NP)\(S\NP))/S[dcl=true]
+ semantics: \E S V Q. E(S, V(Q))
+ stag: REL
+
+- category: (S\NP)/(S\NP)
+ semantics: \E V Q. V(Q)
+ stag: ALT
+
+- category : N/N
+ semantics: \E X.X
+ costag: LOG
+
+
+##### Semantically empty expressions #####
+
+- category: NP[thr=true]
+ semantics: \E F1 F2. exists x.F2(x)
+
+- category: (S[to=true]\NP)/(S[b=true]\NP)
+ semantics: \E X. X
+
+- category: S[asup=true]\NP
+ semantics: \E X. X
+
+- category: (S/S)/(S[asup=true]\NP)
+ semantics: \E X Y. Y
+
+
+##### Possesive particle #####
+
+- category: (NP[nb=true]/N)\NP
+ semantics: \E Q F1 F2 F3. exists x.((Q(\w.TrueP, \y.Rel(x, y)) & F1(x)) & F2(x) & F3(x))
+ stag: HAS
+
+
+##### Sentence final particle #####
+
+- category: .
+ semantics: \S X. X
+ stag: NIL
+
+- category: =true,
+ semantics: \S X. X
+ stag: NIL
+
+
diff --git a/scripts/visualization_tools.py b/scripts/visualization_tools.py
index f9ff1573..aa63c91a 100644
--- a/scripts/visualization_tools.py
+++ b/scripts/visualization_tools.py
@@ -36,6 +36,7 @@
kLexicalColor = 'Black'
kEntityColor = 'Green'
kPosColor = 'Green'
+kStagColor = 'Fuchsia'
# The full list of colors is:
# Black Green Silver Lime Gray Olive White Maroon Red Purple Fuchsia Yellow Navy
# Blue Teal Aqua
@@ -97,6 +98,13 @@ def get_pos_mathml(pos):
+ pos \
+ "\n"
+def get_stag_mathml(stag):
+ return "" \
+ + stag \
+ + "\n"
+
def get_semantics_mathml(semantics):
return "" + pos3_mathml
if pos == '.':
- mathml_str = get_fraction_mathml(category_mathml, surf_mathml, '0')
+ if stag:
+ mathml_stag_str = get_fraction_mathml(category_mathml, stag_mathml, '0')
+ mathml_str = get_fraction_mathml(mathml_stag_str, surf_mathml, '0')
+ else:
+ mathml_str = get_fraction_mathml(category_mathml, surf_mathml, '0')
else:
mathml_pos_str = get_fraction_mathml(category_mathml, pos_mathml, '0')
- mathml_str = get_fraction_mathml(mathml_pos_str, surf_mathml, '0')
+ if stag:
+ mathml_stag_str = get_fraction_mathml(mathml_pos_str, stag_mathml, '0')
+ mathml_str = get_fraction_mathml(mathml_stag_str, surf_mathml, '0')
+ else:
+ mathml_str = get_fraction_mathml(mathml_pos_str, surf_mathml, '0')
elif len(ccg_node) == 1:
mathml_str_child = convert_node_to_mathml(ccg_node[0], sem_tree, tokens)
rule = ccg_node.get('rule')
diff --git a/scripts/xml2conll.py b/scripts/xml2conll.py
new file mode 100644
index 00000000..6f668b1a
--- /dev/null
+++ b/scripts/xml2conll.py
@@ -0,0 +1,35 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright 2018 Joan Gines i Ametlle
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import string
+from lxml import etree
+
+# source xml file to extract tokens from
+ifile = sys.argv[1]
+
+# navigate the tags contained in the xml tree
+tree = etree.parse(ifile)
+root = tree.getroot()
+num_sents = 0
+
+for sent in root.iter('sentence'):
+ if num_sents > 0:
+ print('')
+ for token in sent[0].findall('token'):
+ print(token.get('surf'))
+ num_sents = num_sents + 1
+
diff --git a/scripts/xml_add_stag.py b/scripts/xml_add_stag.py
new file mode 100644
index 00000000..b7dbdaf9
--- /dev/null
+++ b/scripts/xml_add_stag.py
@@ -0,0 +1,177 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright 2018 Joan Gines i Ametlle
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import codecs
+import string
+from lxml import etree
+
+# define the mapping from fine to coarse sem-tags
+fine2coarse = dict()
+
+# anaphoric
+fine2coarse['PRO'] = 'ANA'
+fine2coarse['DEF'] = 'ANA'
+fine2coarse['HAS'] = 'ANA'
+fine2coarse['REF'] = 'ANA'
+fine2coarse['EMP'] = 'ANA'
+
+# speech act
+fine2coarse['GRE'] = 'ACT'
+fine2coarse['ITJ'] = 'ACT'
+fine2coarse['HES'] = 'ACT'
+fine2coarse['QUE'] = 'ACT'
+
+# attribute
+fine2coarse['QUC'] = 'ATT'
+fine2coarse['QUV'] = 'ATT'
+fine2coarse['COL'] = 'ATT'
+fine2coarse['IST'] = 'ATT'
+fine2coarse['SST'] = 'ATT'
+fine2coarse['PRI'] = 'ATT'
+fine2coarse['DEG'] = 'ATT'
+fine2coarse['INT'] = 'ATT'
+fine2coarse['REL'] = 'ATT'
+fine2coarse['SCO'] = 'ATT'
+
+# comparative
+fine2coarse['EQU'] = 'COM'
+fine2coarse['MOR'] = 'COM'
+fine2coarse['LES'] = 'COM'
+fine2coarse['TOP'] = 'COM'
+fine2coarse['BOT'] = 'COM'
+fine2coarse['ORD'] = 'COM'
+
+# unnamed entity
+fine2coarse['CON'] = 'UNE'
+fine2coarse['ROL'] = 'UNE'
+fine2coarse['GRP'] = 'UNE'
+
+# deixis
+fine2coarse['DXP'] = 'DXS'
+fine2coarse['DXT'] = 'DXS'
+fine2coarse['DXD'] = 'DXS'
+
+# logical
+fine2coarse['ALT'] = 'LOG'
+fine2coarse['XCL'] = 'LOG'
+fine2coarse['NIL'] = 'LOG'
+fine2coarse['DIS'] = 'LOG'
+fine2coarse['IMP'] = 'LOG'
+fine2coarse['AND'] = 'LOG'
+
+# modality
+fine2coarse['NOT'] = 'MOD'
+fine2coarse['NEC'] = 'MOD'
+fine2coarse['POS'] = 'MOD'
+
+# discourse
+fine2coarse['SUB'] = 'DSC'
+fine2coarse['COO'] = 'DSC'
+fine2coarse['APP'] = 'DSC'
+fine2coarse['BUT'] = 'DSC'
+
+# named entity
+fine2coarse['PER'] = 'NAM'
+fine2coarse['GPE'] = 'NAM'
+fine2coarse['GPO'] = 'NAM'
+fine2coarse['GEO'] = 'NAM'
+fine2coarse['ORG'] = 'NAM'
+fine2coarse['ART'] = 'NAM'
+fine2coarse['HAP'] = 'NAM'
+fine2coarse['UOM'] = 'NAM'
+fine2coarse['CTC'] = 'NAM'
+fine2coarse['URL'] = 'NAM'
+fine2coarse['LIT'] = 'NAM'
+fine2coarse['NTH'] = 'NAM'
+
+# events
+fine2coarse['EXS'] = 'EVE'
+fine2coarse['ENS'] = 'EVE'
+fine2coarse['EPS'] = 'EVE'
+fine2coarse['EXG'] = 'EVE'
+fine2coarse['EXT'] = 'EVE'
+
+# tense and aspect
+fine2coarse['NOW'] = 'TNS'
+fine2coarse['PST'] = 'TNS'
+fine2coarse['FUT'] = 'TNS'
+fine2coarse['PRG'] = 'TNS'
+fine2coarse['PFT'] = 'TNS'
+
+# temporal entity
+fine2coarse['DAT'] = 'TIM'
+fine2coarse['DOM'] = 'TIM'
+fine2coarse['YOC'] = 'TIM'
+fine2coarse['DOW'] = 'TIM'
+fine2coarse['MOY'] = 'TIM'
+fine2coarse['DEC'] = 'TIM'
+fine2coarse['CLO'] = 'TIM'
+
+# source xml file to inject tokens to
+ifile = sys.argv[1]
+
+# source file with tagged sentences
+semfile = sys.argv[2]
+
+# output file
+ofile = sys.argv[3]
+
+# extract semantic tags
+stags = [[]]
+sent_index = 0
+
+for line in codecs.open(semfile, mode = 'r', errors = 'ignore', encoding = 'utf-8'):
+ line = line[:-1]
+ if line:
+ tag, _ = line.split('\t')
+ stags[sent_index].append(tag)
+ else:
+ stags.append([])
+ sent_index = sent_index + 1
+
+# navigate the tags contained in the xml tree
+tree = etree.parse(ifile)
+root = tree.getroot()
+sent_index = -1
+word_index = -1
+
+for sent in root.iter('sentence'):
+ sent_index = sent_index + 1
+
+ word_index = 0
+ for token in sent[0].findall('token'):
+ token.set('stag', stags[sent_index][word_index])
+ if stags[sent_index][word_index] in fine2coarse:
+ token.set('costag', fine2coarse[stags[sent_index][word_index]])
+ else:
+ token.set('costag', 'UNK')
+ word_index = word_index + 1
+
+ word_index = 0
+ for span in sent[1].findall('span'):
+ surf = span.get('surf')
+ if surf:
+ span.set('stag', stags[sent_index][word_index])
+ if stags[sent_index][word_index] in fine2coarse:
+ span.set('costag', fine2coarse[stags[sent_index][word_index]])
+ else:
+ span.set('costag', 'UNK')
+ word_index = word_index + 1
+
+# write out result
+tree.write(ofile)
+