forked from jasonpriem/HumanNameParser.php
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Name.php
102 lines (94 loc) · 2.92 KB
/
Name.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
<?php
/**
* Does cutting and matching stuff with a name string.
* Note that the string has to be UTF8-encoded.
*
*/
class HumanNameParser_Name {
private $str;
function __construct($str)
{
$this->setStr($str);
}
/**
* Checks encoding, normalizes whitespace/punctuation, and sets the name string.
*
* @param String $str a utf8-encoding string.
* @return Bool True on success
*/
public function setStr($str)
{
if (!mb_check_encoding($str)){
throw new Exception("Name is not encoded in UTF-8");
}
$this->str = $str;
$this->norm();
return true;
}
public function getStr()
{
return $this->str;
}
/**
* Uses a regex to chop off and return part of the namestring
* There are two parts: first, it returns the matched substring,
* and then it removes that substring from $this->str and normalizes.
*
* @param string $regex matches the part of the namestring to chop off
* @param integer $submatchIndex which of the parenthesized submatches to use
* @param string $regexFlags optional regex flags
* @return string the part of the namestring that got chopped off
*/
public function chopWithRegex($regex, $submatchIndex = 0, $regexFlags = '')
{
$regex = $regex . "ui" . $regexFlags; // unicode + case-insensitive
preg_match($regex, $this->str, $m);
$subset = (isset($m[$submatchIndex])) ? $m[$submatchIndex] : '';
if ($subset){
$this->str = preg_replace($regex, ' ', $this->str, -1, $numReplacements);
if ($numReplacements > 1){
throw new Exception("The regex being used to find the name has multiple matches.");
}
$this->norm();
return $subset;
}
else {
return '';
}
}
/*
* Flips the front and back parts of a name with one another.
* Front and back are determined by a specified character somewhere in the
* middle of the string.
*
* @param String $flipAroundChar the character(s) demarcating the two halves you want to flip.
* @return Bool True on success.
*/
public function flip($flipAroundChar)
{
$substrings = preg_split("/$flipAroundChar/u", $this->str);
if (count($substrings) == 2){
$this->str = $substrings[1] . " " . $substrings[0];
$this->norm();
}
else if (count($substrings) > 2) {
throw new Exception("Can't flip around multiple '$flipAroundChar' characters in namestring.");
}
return true; // if there's 1 or 0 $flipAroundChar found
}
/**
* Removes extra whitespace and punctuation from $this->str
* Strips whitespace chars from ends, strips redundant whitespace, converts whitespace chars to " ".
*
* @return Bool True on success
*/
private function norm()
{
$this->str = preg_replace( "#^\s*#u", "", $this->str );
$this->str = preg_replace( "#\s*$#u", "", $this->str );
if (substr_count($this->str, "\xc2\xa0") == 0) $this->str = preg_replace( "#\s+#u", " ", $this->str );
$this->str = preg_replace( "#,$#u", " ", $this->str );
return true;
}
}
?>