From 94f2b4c3ef72eb91d32a46e284384f83cac7b873 Mon Sep 17 00:00:00 2001 From: Andre Wyrwa Date: Tue, 12 Nov 2019 04:23:36 +1100 Subject: [PATCH] Handle combined initials --- README.md | 20 ++++++++++++ src/Mapper/InitialMapper.php | 20 ++++++++++-- src/Parser.php | 28 +++++++++++++++-- tests/Mapper/InitialMapperTest.php | 29 ++++++++++++++++-- tests/ParserTest.php | 49 ++++++++++++++++++++++-------- 5 files changed, 127 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index bf836af..c3d3122 100644 --- a/README.md +++ b/README.md @@ -154,6 +154,26 @@ This defaults to half the amount of words in the input string, meaning that effectively the salutation may occur within the first half of the name parts. +### Adjusting combined initials support +```php +$parser = new TheIconic\NameParser\Parser(); +$parser->setMaxCombinedInitials(3); +``` +Combined initials are combinations of several +uppercased letters, e.g. `DJ` or `J.T.` without +separating spaces. The parser will treat such sequences +of uppercase letters (with optional dots) as combined +initials and parse them into individual initials. +This value adjusts the maximum number of uppercase letters +in a single name part are recognised as comnined initials. +Parts with more than the specified maximum amount of letters +will not be parsed into initials and hence will most likely +be parsed into first or middle names. + +The default value is 2. + +To disable combined initials support, set this value to 1; + ## Tips ### Provide clean input strings If your input string consists of more than just the name and diff --git a/src/Mapper/InitialMapper.php b/src/Mapper/InitialMapper.php index 5041fd9..3a75dc4 100644 --- a/src/Mapper/InitialMapper.php +++ b/src/Mapper/InitialMapper.php @@ -12,9 +12,12 @@ class InitialMapper extends AbstractMapper { protected $matchLastPart = false; - public function __construct(bool $matchLastPart = false) + private $combinedMax = 2; + + public function __construct(int $combinedMax = 2, bool $matchLastPart = false) { $this->matchLastPart = $matchLastPart; + $this->combinedMax = $combinedMax; } /** @@ -27,7 +30,9 @@ public function map(array $parts): array { $last = count($parts) - 1; - foreach ($parts as $k => $part) { + for ($k = 0; $k < count($parts); $k++) { + $part = $parts[$k]; + if ($part instanceof AbstractPart) { continue; } @@ -36,6 +41,17 @@ public function map(array $parts): array continue; } + if (strtoupper($part) === $part) { + $stripped = str_replace('.', '', $part); + $length = strlen($stripped); + + if (1 < $length && $length <= $this->combinedMax) { + array_splice($parts, $k, 1, str_split($stripped)); + $last = count($parts) - 1; + $part = $parts[$k]; + } + } + if ($this->isInitial($part)) { $parts[$k] = new Initial($part); } diff --git a/src/Parser.php b/src/Parser.php index 6f86c5d..f2040b0 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -38,6 +38,11 @@ class Parser */ protected $maxSalutationIndex = 0; + /** + * @var int + */ + protected $maxCombinedInitials = 2; + public function __construct(array $languages = []) { if (empty($languages)) { @@ -125,7 +130,7 @@ protected function getSecondSegmentParser(): Parser new SalutationMapper($this->getSalutations(), $this->getMaxSalutationIndex()), new SuffixMapper($this->getSuffixes(), true, 1), new NicknameMapper($this->getNicknameDelimiters()), - new InitialMapper(true), + new InitialMapper($this->getMaxCombinedInitials(), true), new FirstnameMapper(), new MiddlenameMapper(true), ]); @@ -156,7 +161,7 @@ public function getMappers(): array new NicknameMapper($this->getNicknameDelimiters()), new SalutationMapper($this->getSalutations(), $this->getMaxSalutationIndex()), new SuffixMapper($this->getSuffixes()), - new InitialMapper(), + new InitialMapper($this->getMaxCombinedInitials()), new LastnameMapper($this->getPrefixes()), new FirstnameMapper(), new MiddlenameMapper(), @@ -299,4 +304,23 @@ public function setMaxSalutationIndex(int $maxSalutationIndex): Parser return $this; } + + /** + * @return int + */ + public function getMaxCombinedInitials(): int + { + return $this->maxCombinedInitials; + } + + /** + * @param int $maxCombinedInitials + * @return Parser + */ + public function setMaxCombinedInitials(int $maxCombinedInitials): Parser + { + $this->maxCombinedInitials = $maxCombinedInitials; + + return $this; + } } diff --git a/tests/Mapper/InitialMapperTest.php b/tests/Mapper/InitialMapperTest.php index 5f1dbb4..92448f6 100644 --- a/tests/Mapper/InitialMapperTest.php +++ b/tests/Mapper/InitialMapperTest.php @@ -71,14 +71,39 @@ public function provider() new Initial('B'), ], 'arguments' => [ + 2, true ], + ], + [ + 'input' => [ + 'JM', + 'Walker', + ], + 'expectation' => [ + new Initial('J'), + new Initial('M'), + 'Walker' + ] + ], + [ + 'input' => [ + 'JM', + 'Walker', + ], + 'expectation' => [ + 'JM', + 'Walker' + ], + 'arguments' => [ + 1 + ] ] ]; } - protected function getMapper($matchLastPart = false) + protected function getMapper($maxCombined = 2, $matchLastPart = false) { - return new InitialMapper($matchLastPart); + return new InitialMapper($maxCombined, $matchLastPart); } } diff --git a/tests/ParserTest.php b/tests/ParserTest.php index 1b1c488..05e79ca 100644 --- a/tests/ParserTest.php +++ b/tests/ParserTest.php @@ -51,7 +51,8 @@ public function provider() [ 'J.B. Hunt', [ - 'firstname' => 'J.B.', + 'firstname' => 'J', + 'initials' => 'B', 'lastname' => 'Hunt', ] ], @@ -534,24 +535,31 @@ public function provider() 'firstname' => 'Etje', 'lastname' => 'Heijdanus-De Boer', ] - ] - ]; - } - - /** - * @return array - */ - public function dysfunctionalFirstnameProvider() - { - return [ - // fails. both initials should be capitalized + ], [ 'JB Hunt', [ - 'firstname' => 'JB', + 'firstname' => 'J', + 'initials' => 'B', 'lastname' => 'Hunt', ] ], + [ + 'Charles Philip Arthur George Mountbatten-Windsor', + [ + 'firstname' => 'Charles', + 'middlename' => 'Philip Arthur George', + 'lastname' => 'Mountbatten-Windsor', + ] + ], + [ + 'Ella Marija Lani Yelich-O\'Connor', + [ + 'firstname' => 'Ella', + 'middlename' => 'Marija Lani', + 'lastname' => 'Yelich-O\'Connor', + ] + ] ]; } @@ -605,6 +613,21 @@ public function testSetMaxSalutationIndex() $this->assertSame('Mr.', $parser->parse('Francis Mr')->getSalutation()); } + public function testSetMaxCombinedInitials() + { + $parser = new Parser(); + $this->assertSame(2, $parser->getMaxCombinedInitials()); + $parser->setMaxCombinedInitials(1); + $this->assertSame(1, $parser->getMaxCombinedInitials()); + $this->assertSame('', $parser->parse('DJ Westbam')->getInitials()); + + $parser = new Parser(); + $this->assertSame(2, $parser->getMaxCombinedInitials()); + $parser->setMaxCombinedInitials(3); + $this->assertSame(3, $parser->getMaxCombinedInitials()); + $this->assertSame('P A G', $parser->parse('Charles PAG Mountbatten-Windsor')->getInitials()); + } + public function testParserAndSubparsersProperlyHandleLanguages() { $parser = new Parser([