Commit 3b4a732
authored
01_Aspect_Based_Sentiment_analysis.ipynb on colab
# the above did not work, so use the below for colab
# code is adopted from this site: https://colab.research.google.com/github/stanfordnlp/stanza/blob/master/demo/Stanza_CoreNLP_Interface.ipynb#scrollTo=LS4OKnqJ8wui
"""
# Install stanza
!pip install stanza
# Import stanza
import stanza
# Download the Stanford CoreNLP package with Stanza's installation command
# This'll take several minutes, depending on the network speed
corenlp_dir = './corenlp'
stanza.install_corenlp(dir=corenlp_dir)
# Set the CORENLP_HOME environment variable to point to the installation location
import os
os.environ["CORENLP_HOME"] = corenlp_dir
# Examine the CoreNLP installation folder to make sure the installation is successful
!ls $CORENLP_HOME
# Import client module
from stanza.server import CoreNLPClient
# Construct a CoreNLPClient with some basic annotators, a memory allocation of 4GB, and port number 9001
client = CoreNLPClient(
annotators=['tokenize','ssplit', 'pos', 'parse', 'lemma', 'ner', 'sentiment'],
memory='4G',
endpoint='http://localhost:9001',
be_quiet=True)
print(client)
# Start the background server and wait for some time
# Note that in practice this is totally optional, as by default the server will be started when the first annotation is performed
client.start()
import time; time.sleep(10)
# Print background processes and look for java
# You should be able to see a StanfordCoreNLPServer java process running in the background
!ps -o pid,cmd | grep java
document2 = client.annotate(positive)
# Iterate over all tokens in all sentences, and print out the word, lemma, pos and ner tags
print("{:12s}\t{:12s}\t{:6s}\t{}\t\t{}".format("Word", "Lemma", "POS", "NER", "Sentiment"))
for i, sent in enumerate(document2.sentence):
print("[Sentence {}]".format(i+1))
for t in sent.token:
print("{:12s}\t{:12s}\t{:6s}\t{}\t\t{}".format(t.word, t.lemma, t.pos, t.ner, t.sentiment))
print("")
document3 = client.annotate(negative)
# Iterate over all tokens in all sentences, and print out the word, lemma, pos and ner tags
print("{:12s}\t{:12s}\t{:6s}\t{}\t\t{}".format("Word", "Lemma", "POS", "NER", "Sentiment"))
for i, sent in enumerate(document3.sentence):
print("[Sentence {}]".format(i+1))
for t in sent.token:
print("{:12s}\t{:12s}\t{:6s}\t{}\t\t{}".format(t.word, t.lemma, t.pos, t.ner, t.sentiment))
print("")
document4 = client.annotate(ambiguous)
# Iterate over all tokens in all sentences, and print out the word, lemma, pos and ner tags
print("{:12s}\t{:12s}\t{:6s}\t{}\t\t{}".format("Word", "Lemma", "POS", "NER", "Sentiment"))
for i, sent in enumerate(document4.sentence):
print("[Sentence {}]".format(i+1))
for t in sent.token:
print("{:12s}\t{:12s}\t{:6s}\t{}\t\t{}".format(t.word, t.lemma, t.pos, t.ner, t.sentiment))
print("")
# Shut down the background CoreNLP server
client.stop()
time.sleep(10)
!ps -o pid,cmd | grep java
"""1 parent 00b8b02 commit 3b4a732
1 file changed
+2
-1
lines changed| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
281 | 281 | | |
282 | 282 | | |
283 | 283 | | |
284 | | - | |
| 284 | + | |
| 285 | + | |
285 | 286 | | |
286 | 287 | | |
287 | 288 | | |
| |||
0 commit comments