You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

472 lines
13 KiB
PHP

<?php
class StreetNormalizer {
/*
* Machine-readable form of the chart at
* https://pe.usps.com/text/pub28/28apc_002.htm
* with loop->lp added
*/
const SUFFIX_TABLE = [
'ALLEE' => 'ALY',
'ALLEY' => 'ALY',
'ALLY' => 'ALY',
'ANEX' => 'ANX',
'ANNEX' => 'ANX',
'ANNX' => 'ANX',
'ARC ' => 'ARC',
'ARCADE ' => 'ARC',
'AV' => 'AVE',
'AVEN' => 'AVE',
'AVENU' => 'AVE',
'AVENUE' => 'AVE',
'AVN' => 'AVE',
'AVNUE' => 'AVE',
'BAYOO' => 'BYU',
'BAYOU' => 'BYU',
'BEACH' => 'BCH',
'BEND' => 'BND',
'BLUF' => 'BLF',
'BLUFF' => 'BLF',
'BLUFFS ' => 'BLFS',
'BOT' => 'BTM',
'BOTTM' => 'BTM',
'BOTTOM' => 'BTM',
'BOUL' => 'BLVD',
'BOULEVARD ' => 'BLVD',
'BOULV' => 'BLVD',
'BRNCH' => 'BR',
'BRANCH' => 'BR',
'BRDGE' => 'BRG',
'BRIDGE' => 'BRG',
'BROOK' => 'BRK',
'BROOKS ' => 'BRKS',
'BURG' => 'BG',
'BURGS' => 'BGS',
'BYPA' => 'BYP',
'BYPAS' => 'BYP',
'BYPASS' => 'BYP',
'BYPS' => 'BYP',
'CAMP' => 'CP',
'CMP' => 'CP',
'CANYN' => 'CYN',
'CANYON' => 'CYN',
'CNYN' => 'CYN',
'CAPE' => 'CPE',
'CAUSEWAY' => 'CSWY',
'CAUSWA' => 'CSWY',
'CEN' => 'CTR',
'CENT' => 'CTR',
'CENTER' => 'CTR',
'CENTR' => 'CTR',
'CENTRE' => 'CTR',
'CNTER' => 'CTR',
'CNTR' => 'CTR',
'CENTERS ' => 'CTRS',
'CIRC' => 'CIR',
'CIRCL' => 'CIR',
'CIRCLE' => 'CIR',
'CRCL' => 'CIR',
'CRCLE' => 'CIR',
'CIRCLES' => 'CIRS',
'CLIFF' => 'CLF',
'CLIFFS' => 'CLFS',
'CLUB' => 'CLB',
'COMMON' => 'CMN',
'COMMONS' => 'CMNS',
'CORNER' => 'COR',
'CORNERS' => 'CORS',
'COURSE' => 'CRSE',
'COURT' => 'CT',
'COURTS' => 'CTS',
'COVE' => 'CV',
'COVES' => 'CVS',
'CREEK' => 'CRK',
'CRESCENT' => 'CRES',
'CRSENT' => 'CRES',
'CRSNT' => 'CRES',
'CREST' => 'CRST',
'CROSSING ' => 'XING',
'CRSSNG ' => 'XING',
'XING ' => 'XING',
'CROSSROAD' => 'XRD',
'CROSSROADS' => 'XRDS',
'CURVE ' => 'CURV',
'DALE ' => 'DL',
'DL ' => 'DL',
'DAM ' => 'DM',
'DM ' => 'DM',
'DIV' => 'DV',
'DIVIDE' => 'DV',
'DVD' => 'DV',
'DRIV' => 'DR',
'DRIVE' => 'DR',
'DRV' => 'DR',
'DRIVES' => 'DRS',
'ESTATE' => 'EST',
'ESTATES' => 'ESTS',
'EXP' => 'EXPY',
'EXPR' => 'EXPY',
'EXPRESS' => 'EXPY',
'EXPRESSWAY' => 'EXPY',
'EXPW' => 'EXPY',
'EXTENSION' => 'EXT',
'EXTN' => 'EXT',
'EXTNSN' => 'EXT',
'FALLS' => 'FLS',
'FERRY' => 'FRY',
'FRRY' => 'FRY',
'FIELD' => 'FLD',
'FIELDS' => 'FLDS',
'FLAT' => 'FLT',
'FLATS' => 'FLTS',
'FORD' => 'FRD',
'FORDS' => 'FRDS',
'FOREST' => 'FRST',
'FORESTS' => 'FRST',
'FORG' => 'FRG',
'FORGE' => 'FRG',
'FORGES' => 'FRGS',
'FORK' => 'FRK',
'FORKS' => 'FRKS',
'FORT' => 'FT',
'FRT' => 'FT',
'FREEWAY' => 'FWY',
'FREEWY' => 'FWY',
'FRWAY' => 'FWY',
'FRWY' => 'FWY',
'GARDEN' => 'GDN',
'GARDN' => 'GDN',
'GRDEN' => 'GDN',
'GRDN' => 'GDN',
'GARDENS' => 'GDNS',
'GRDNS' => 'GDNS',
'GATEWAY' => 'GTWY',
'GATEWY' => 'GTWY',
'GATWAY' => 'GTWY',
'GTWAY' => 'GTWY',
'GLEN' => 'GLN',
'GLENS' => 'GLNS',
'GREEN' => 'GRN',
'GREENS' => 'GRNS',
'GROV' => 'GRV',
'GROVE' => 'GRV',
'GROVES' => 'GRVS',
'HARB' => 'HBR',
'HARBOR' => 'HBR',
'HARBR' => 'HBR',
'HRBOR' => 'HBR',
'HARBORS' => 'HBRS',
'HAVEN' => 'HVN',
'HT' => 'HTS',
'HIGHWAY' => 'HWY',
'HIGHWY' => 'HWY',
'HIWAY' => 'HWY',
'HIWY' => 'HWY',
'HWAY' => 'HWY',
'HILL' => 'HL',
'HILLS' => 'HLS',
'HLLW' => 'HOLW',
'HOLLOW' => 'HOLW',
'HOLLOWS' => 'HOLW',
'HOLWS' => 'HOLW',
'ISLAND' => 'IS',
'ISLND' => 'IS',
'ISLANDS' => 'ISS',
'ISLNDS' => 'ISS',
'ISLES' => 'ISLE',
'JCTION' => 'JCT',
'JCTN' => 'JCT',
'JUNCTION' => 'JCT',
'JUNCTN' => 'JCT',
'JUNCTON' => 'JCT',
'JCTNS' => 'JCTS',
'JUNCTIONS' => 'JCTS',
'KEY' => 'KY',
'KEYS' => 'KYS',
'KNL' => 'KNL',
'KNOL' => 'KNL',
'KNOLL' => 'KNL',
'KNOLLS' => 'KNLS',
'LAKE' => 'LK',
'LAKES' => 'LKS',
'LANDING' => 'LNDG',
'LNDNG' => 'LNDG',
'LANE' => 'LN',
'LIGHT' => 'LGT',
'LIGHTS' => 'LGTS',
'LOAF' => 'LF',
'LOCK' => 'LCK',
'LOCKS' => 'LCKS',
'LDGE' => 'LDG',
'LODG' => 'LDG',
'LODGE' => 'LDG',
'MANOR' => 'MNR',
'MANORS' => 'MNRS',
'MEADOW' => 'MDW',
'MDW' => 'MDWS',
'MEADOWS' => 'MDWS',
'MEDOWS' => 'MDWS',
'MILL' => 'ML',
'MILLS' => 'MLS',
'MISSN' => 'MSN',
'MSSN' => 'MSN',
'MOTORWAY' => 'MTWY',
'MNT' => 'MT',
'MOUNT' => 'MT',
'MNTAIN' => 'MTN',
'MNTN' => 'MTN',
'MOUNTAIN' => 'MTN',
'MOUNTIN' => 'MTN',
'MTIN' => 'MTN',
'MNTNS' => 'MTNS',
'MOUNTAINS' => 'MTNS',
'NECK' => 'NCK',
'ORCHARD' => 'ORCH',
'ORCHRD' => 'ORCH',
'OVL' => 'OVAL',
'OVERPASS' => 'OPAS',
'PRK' => 'PARK',
'PARKS' => 'PARK',
'PARKWAY' => 'PKWY',
'PARKWY' => 'PKWY',
'PKWAY' => 'PKWY',
'PKY' => 'PKWY',
'PARKWAYS' => 'PKWY',
'PKWYS' => 'PKWY',
'PASSAGE' => 'PSGE',
'PATHS' => 'PATH',
'PIKES' => 'PIKE',
'PINE' => 'PNE',
'PINES' => 'PNES',
'PLAIN' => 'PLN',
'PLAINS' => 'PLNS',
'PLAZA' => 'PLZ',
'PLZA' => 'PLZ',
'POINT' => 'PT',
'POINTS' => 'PTS',
'PORT' => 'PRT',
'PORTS' => 'PRTS',
'PRAIRIE' => 'PR',
'PRR' => 'PR',
'RAD' => 'RADL',
'RADIAL' => 'RADL',
'RADIEL' => 'RADL',
'RANCH' => 'RNCH',
'RANCHES' => 'RNCH',
'RNCHS' => 'RNCH',
'RAPID' => 'RPD',
'RAPIDS' => 'RPDS',
'REST' => 'RST',
'RDGE' => 'RDG',
'RIDGE' => 'RDG',
'RIDGES' => 'RDGS',
'RIVER' => 'RIV',
'RVR' => 'RIV',
'RIVR' => 'RIV',
'ROAD' => 'RD',
'ROADS' => 'RDS',
'ROUTE' => 'RTE',
'SHOAL' => 'SHL',
'SHOALS' => 'SHLS',
'SHOAR' => 'SHR',
'SHORE' => 'SHR',
'SHOARS' => 'SHRS',
'SHORES' => 'SHRS',
'SKYWAY' => 'SKWY',
'SPNG' => 'SPG',
'SPRING' => 'SPG',
'SPRNG' => 'SPG',
'SPNGS' => 'SPGS',
'SPRINGS' => 'SPGS',
'SPRNGS' => 'SPGS',
'SPURS' => 'SPUR',
'SQR' => 'SQ',
'SQRE' => 'SQ',
'SQU' => 'SQ',
'SQUARE' => 'SQ',
'SQRS' => 'SQS',
'SQUARES' => 'SQS',
'STATION' => 'STA',
'STATN' => 'STA',
'STN' => 'STA',
'STRAV' => 'STRA',
'STRAVEN' => 'STRA',
'STRAVENUE' => 'STRA',
'STRAVN' => 'STRA',
'STRVN' => 'STRA',
'STRVNUE' => 'STRA',
'STREAM' => 'STRM',
'STREME' => 'STRM',
'STREET' => 'ST',
'STRT' => 'ST',
'STR' => 'ST',
'STREETS' => 'STS',
'SUMIT' => 'SMT',
'SUMITT' => 'SMT',
'SUMMIT' => 'SMT',
'TERR' => 'TER',
'TERRACE' => 'TER',
'THROUGHWAY' => 'TRWY',
'TRACE' => 'TRCE',
'TRACES' => 'TRCE',
'TRACK' => 'TRAK',
'TRACKS' => 'TRAK',
'TRK' => 'TRAK',
'TRKS' => 'TRAK',
'TRAFFICWAY' => 'TRFY',
'TRAIL' => 'TRL',
'TRAILS' => 'TRL',
'TRLS' => 'TRL',
'TRAILER' => 'TRLR',
'TRLRS' => 'TRLR',
'TUNEL' => 'TUNL',
'TUNLS' => 'TUNL',
'TUNNEL' => 'TUNL',
'TUNNELS' => 'TUNL',
'TUNNL' => 'TUNL',
'TRNPK' => 'TPKE',
'TURNPIKE' => 'TPKE',
'TURNPK' => 'TPKE',
'UNDERPASS' => 'UPAS',
'UNION' => 'UN',
'UNIONS' => 'UNS',
'VALLEY' => 'VLY',
'VALLY' => 'VLY',
'VLLY' => 'VLY',
'VALLEYS' => 'VLYS',
'VDCT' => 'VIA',
'VIADCT' => 'VIA',
'VIADUCT' => 'VIA',
'VIEW' => 'VW',
'VIEWS' => 'VWS',
'VILL' => 'VLG',
'VILLAG' => 'VLG',
'VILLAGE' => 'VLG',
'VILLG' => 'VLG',
'VILLIAGE' => 'VLG',
'VILLAGES' => 'VLGS',
'VILLE' => 'VL',
'VIST' => 'VIS',
'VISTA' => 'VIS',
'VST' => 'VIS',
'VSTA' => 'VIS',
'WALKS' => 'WALK',
'WY' => 'WAY',
'WELL' => 'WL',
'WELLS' => 'WLS'
];
const CARDINAL_TABLE = [
"NORTH" => "N",
"SOUTH" => "S",
"EAST" => "E",
"WEST" => "W",
"NORTHWEST" => "NW",
"SOUTHWEST" => "SW",
"NORTHEAST" => "NE",
"SOUTHEAST" => "SE"
];
/**
* Normalize a street name (ex. Street Road)
* @param string $street
* @param bool $python Set to false to use built-in less-accurate code,
* or true to use normalize.py and https://github.com/mcmire/address_standardization
* @return string
*/
public static function normalizeStreet(string $street, bool $python = true): string {
// Give the script a dummy house number so it doesn't get lost
$filler_address = "10000001 ";
return str_replace($filler_address, "", static::normalizeAddress($filler_address . $street));
}
/**
* Normalize an address line (ex. 1234 street road)
* @param string $address
* @param bool $python Set to false to use built-in less-accurate code,
* or true to use normalize.py and https://github.com/mcmire/address_standardization
* @return string
* @throws Exception
*/
public static function normalizeAddress(string $address, bool $python = true): string {
global $SETTINGS;
try {
if (empty($SETTINGS["normalize_python_script"]) || !file_exists($SETTINGS["normalize_python_script"])) {
throw new Exception("failing back to builtin: python script missing");
}
if (!$python) {
throw new Exception("failing back to builtin due to user request");
}
$escaped = escapeshellarg($address);
$json = shell_exec($SETTINGS["normalize_python_script"] . " " . $escaped);
$address = json_decode($json, true);
if (empty($address)) {
throw new Exception("failing back to builtin due to JSON error");
}
$address = $address["address_line_1"];
return $address;
} catch (Exception $ex) {
$address = strtoupper(trim($address));
$address = static::normalizeSuffix($address);
$address = static::normalizeCardinals($address);
return $address;
}
}
private static function findReplace(string $str, array $lookuptable) {
foreach ($lookuptable as $find => $replace) {
if ($str == $find) {
$str = $replace;
break;
}
}
return $str;
}
private static function findReplaceAll(string $str, array $lookuptable, string $seperator = " ") {
$words = explode($seperator, $str);
for ($i = 0; $i < count($words); $i++) {
$words[$i] = static::findReplace($words[$i], $lookuptable);
}
return implode($seperator, $words);
}
/**
* Replace cardinal/compass directions with abbreviations
* @param string $street
* @return string
*/
public static function normalizeCardinals(string $street): string {
$street = strtoupper(trim($street));
return static::findReplaceAll($street, static::CARDINAL_TABLE);
}
/**
* Replace the street suffix with the standard abbreviation.
* @param string $street
* @return string
*/
public static function normalizeSuffix(string $street): string {
$street = strtoupper(trim($street));
$parts = explode(" ", $street);
$suffix = static::findReplace(end($parts), static::SUFFIX_TABLE);
$parts[count($parts) - 1] = $suffix;
return implode(" ", $parts);
}
}