Skip to content

Commit 26405a9

Browse files
committed
New divvun-suggest mode -a/--autocorrect to auto-apply corrections
Implements #37 - Autocorrect mode for divvun-checker
1 parent 68d591c commit 26405a9

File tree

4 files changed

+67
-10
lines changed

4 files changed

+67
-10
lines changed

src/main_suggest.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ int main(int argc, char ** argv)
3131

3232
options.add_options()
3333
("j,json", "Use JSON output format (default: CG)")
34+
("a,autocorrect", "Use Autocorrect output format (default: CG)")
3435
("g,generator", "Generator (HFSTOL format)", cxxopts::value<std::string>(), "BIN")
3536
#ifdef HAVE_LIBPUGIXML
3637
("m,messages", "ERROR messages (XML format)", cxxopts::value<std::string>(), "FILE")
@@ -84,7 +85,17 @@ int main(int argc, char ** argv)
8485
}
8586

8687
const auto& genfile = options["generator"].as<std::string>();
87-
bool json = options.count("j");
88+
divvun::RunMode mode = divvun::RunCG;
89+
if(options.count("j")) {
90+
mode = divvun::RunJson;
91+
if(options.count("a")) {
92+
std::cerr << argv[0] << " ERROR: Pick just one of --json/--autocorrect" << std::endl;
93+
return (EXIT_FAILURE);
94+
}
95+
};
96+
if(options.count("a")) {
97+
mode = divvun::RunAutoCorrect;
98+
};
8899
bool genall = options.count("A");
89100
bool verbose = options.count("v");
90101

@@ -123,7 +134,7 @@ int main(int argc, char ** argv)
123134
#endif
124135

125136
divvun::Suggest s(t, m, locale, verbose, genall);
126-
s.run(std::cin, std::cout, json);
137+
s.run(std::cin, std::cout, mode);
127138
}
128139
catch (const cxxopts::OptionException& e)
129140
{

src/pipeline.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ SuggestCmd::SuggestCmd (const string& gen_path, const string& msg_path, const st
130130
}
131131
void SuggestCmd::run(stringstream& input, stringstream& output) const
132132
{
133-
suggest->run(input, output, true);
133+
suggest->run(input, output, RunJson);
134134
}
135135
vector<Err> SuggestCmd::run_errs(stringstream& input) const
136136
{

src/suggest.cpp

Lines changed: 45 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -879,6 +879,39 @@ RunState Suggest::run_json(std::istream& is, std::ostream& os)
879879
return sentence.runstate;
880880
}
881881

882+
RunState Suggest::run_autocorrect(std::istream& is, std::ostream& os)
883+
{
884+
json::sanity_test();
885+
Sentence sentence = run_sentence(is, *generator, msgs, generate_all_readings);
886+
vector<Err> errs = mk_errs(sentence);
887+
888+
size_t offset = 0;
889+
u16string text = fromUtf8(sentence.text.str());
890+
for(const auto& e : errs) {
891+
if(e.beg > offset) {
892+
os << toUtf8(text.substr(offset, e.beg - offset));
893+
}
894+
bool printed = false;
895+
for(const auto& r : e.rep) {
896+
os << toUtf8(r);
897+
printed = true;
898+
break;
899+
}
900+
if(!printed) {
901+
os << toUtf8(e.form);
902+
}
903+
offset = e.end;
904+
}
905+
os << toUtf8(text.substr(offset));
906+
907+
if(sentence.runstate == flushing) {
908+
os << '\0';
909+
os.flush();
910+
os.clear();
911+
}
912+
return sentence.runstate;
913+
}
914+
882915

883916
void print_cg_reading(const Casing& inputCasing, const string& readinglines, std::ostream& os, const hfst::HfstTransducer& t, bool generate_all_readings) {
884917
os << readinglines;
@@ -938,7 +971,7 @@ void run_cg(std::istream& is, std::ostream& os, const hfst::HfstTransducer& t, b
938971
}
939972
}
940973

941-
void Suggest::run(std::istream& is, std::ostream& os, bool json)
974+
void Suggest::run(std::istream& is, std::ostream& os, RunMode mode)
942975
{
943976
try {
944977
auto _old = std::locale::global(std::locale(""));
@@ -947,11 +980,17 @@ void Suggest::run(std::istream& is, std::ostream& os, bool json)
947980
{
948981
std::cerr << "WARNING: Couldn't set global locale \"\" (locale-specific native environment): " << e.what() << std::endl;
949982
}
950-
if(json) {
951-
while(run_json(is, os) == flushing);
952-
}
953-
else {
954-
run_cg(is, os, *generator, generate_all_readings); // ignores ignores
983+
switch(mode) {
984+
case RunJson:
985+
while(run_json(is, os) == flushing);
986+
break;
987+
case RunAutoCorrect:
988+
while(run_autocorrect(is, os) == flushing);
989+
break;
990+
case RunCG:
991+
default:
992+
run_cg(is, os, *generator, generate_all_readings); // ignores ignores
993+
break;
955994
}
956995
}
957996

src/suggest.hpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,12 @@ enum RunState {
7373
eof
7474
};
7575

76+
enum RunMode {
77+
RunCG,
78+
RunJson,
79+
RunAutoCorrect
80+
};
81+
7682
using rel_id = size_t;
7783
using relations = std::unordered_map<string, rel_id>;
7884

@@ -187,7 +193,7 @@ class Suggest {
187193
Suggest(const string& gen_path, const string& locale, bool verbose);
188194
~Suggest() = default;
189195

190-
void run(std::istream& is, std::ostream& os, bool json);
196+
void run(std::istream& is, std::ostream& os, RunMode mode);
191197

192198
vector<Err> run_errs(std::istream& is);
193199
void setIgnores(const std::set<ErrId>& ignores);
@@ -200,6 +206,7 @@ class Suggest {
200206
private:
201207
const SortedMsgLangs sortedmsglangs; // invariant: contains all and only the keys of msgs
202208
RunState run_json(std::istream& is, std::ostream& os);
209+
RunState run_autocorrect(std::istream& is, std::ostream& os);
203210
std::unique_ptr<const hfst::HfstTransducer> generator;
204211
std::set<ErrId> ignores;
205212
bool generate_all_readings = false;

0 commit comments

Comments
 (0)