Skip to content

Commit 8d6ef52

Browse files
committed
Prevent false nicknames due to multiple quotes
Certain Anglicized names such as those from some Hawaiian, Samoan, and Kenyan traditions, include multiple single quotation marks. This adjusts the quoted_word regex to only capture single quote marks that are not inside words. Without this fix, false nicknames are extracted from inside names like Ng'ang'a and Kawai'ae'a. Tests are included to cover; existing Benjamin 'Ben' Franklin test assures that the typical nickname case is unchanged.
1 parent 9b8c30b commit 8d6ef52

File tree

3 files changed

+32
-4
lines changed

3 files changed

+32
-4
lines changed

nameparser/config/regexes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
("word", re.compile(r"(\w|\.)+", re.U)),
2424
("mac", re.compile(r'^(ma?c)(\w{2,})', re.I | re.U)),
2525
("initial", re.compile(r'^(\w\.|[A-Z])?$', re.U)),
26-
("quoted_word", re.compile(r'\'([^\s]*?)\'', re.U)),
26+
("quoted_word", re.compile(r'(?<!\w)\'([^\s]*?)\'(?!\w)', re.U)),
2727
("double_quotes", re.compile(r'\"(.*?)\"', re.U)),
2828
("parenthesis", re.compile(r'\((.*?)\)', re.U)),
2929
("roman_numeral", re.compile(r'^(X|IX|IV|V?I{0,3})$', re.I | re.U)),

nameparser/parser.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -413,9 +413,9 @@ def parse_nicknames(self):
413413
The content of parenthesis or quotes in the name will be added to the
414414
nicknames list. This happens before any other processing of the name.
415415
416-
Single quotes cannot span white space characters to allow for single
417-
quotes in names like O'Connor. Double quotes and parenthesis can span
418-
white space.
416+
Single quotes cannot span white space characters and must border
417+
white space to allow for quotes in names like O'Connor and Kawai'ae'a.
418+
Double quotes and parenthesis can span white space.
419419
420420
Loops through 3 :py:data:`~nameparser.config.regexes.REGEXES`;
421421
`quoted_word`, `double_quotes` and `parenthesis`.

tests.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1431,6 +1431,34 @@ def test_single_quotes_on_end_of_last_name_not_treated_as_nickname(self):
14311431
self.m(hn.last, "Aube'", hn)
14321432
self.m(hn.nickname, "", hn)
14331433

1434+
def test_okina_inside_name_not_treated_as_nickname(self):
1435+
hn = HumanName("Harrieta Keōpūolani Nāhiʻenaʻena")
1436+
self.m(hn.first, "Harrieta", hn)
1437+
self.m(hn.middle, "Keōpūolani", hn)
1438+
self.m(hn.last, "Nāhiʻenaʻena", hn)
1439+
self.m(hn.nickname, "", hn)
1440+
1441+
def test_single_quotes_not_treated_as_nickname_Hawaiian_example(self):
1442+
hn = HumanName("Harietta Keopuolani Nahi'ena'ena")
1443+
self.m(hn.first, "Harietta", hn)
1444+
self.m(hn.middle, "Keopuolani", hn)
1445+
self.m(hn.last, "Nahi'ena'ena", hn)
1446+
self.m(hn.nickname, "", hn)
1447+
1448+
def test_single_quotes_not_treated_as_nickname_Kenyan_example(self):
1449+
hn = HumanName("Naomi Wambui Ng'ang'a")
1450+
self.m(hn.first, "Naomi", hn)
1451+
self.m(hn.middle, "Wambui", hn)
1452+
self.m(hn.last, "Ng'ang'a", hn)
1453+
self.m(hn.nickname, "", hn)
1454+
1455+
def test_single_quotes_not_treated_as_nickname_Samoan_example(self):
1456+
hn = HumanName("Va'apu'u Vitale")
1457+
self.m(hn.first, "Va'apu'u", hn)
1458+
self.m(hn.middle, "", hn)
1459+
self.m(hn.last, "Vitale", hn)
1460+
self.m(hn.nickname, "", hn)
1461+
14341462
# http://code.google.com/p/python-nameparser/issues/detail?id=17
14351463
def test_parenthesis_are_removed_from_name(self):
14361464
hn = HumanName("John Jones (Unknown)")

0 commit comments

Comments
 (0)