Punycode.php (9354B)
1 <?php 2 3 namespace Finwo\Punycode; 4 5 /** 6 * Class Punycode 7 * 8 * Fully static Punycode en-/decoder based on https://www.ietf.org/rfc/rfc3492.txt 9 * This encoder does not limit string sizes, like https://github.com/true/php-punycode does 10 * 11 * @package Finwo\Punycode 12 */ 13 class Punycode 14 { 15 /** 16 * Bootstring parameter values 17 * 18 */ 19 const BASE = 36; 20 const DAMP = 700; 21 const DELIMITER = '-'; 22 const INITIAL_BIAS = 72; 23 const INITIAL_N = 128; 24 const PREFIX = 'xn--'; 25 const SKEW = 38; 26 const TMAX = 26; 27 const TMIN = 1; 28 29 /** 30 * See page 9 of the RFC 31 * 32 * @var array 33 */ 34 protected static $decodeTable = array( 35 'a' => 0, 'b' => 1, 'c' => 2, 'd' => 3, 'e' => 4, 'f' => 5, 36 'g' => 6, 'h' => 7, 'i' => 8, 'j' => 9, 'k' => 10, 'l' => 11, 37 'm' => 12, 'n' => 13, 'o' => 14, 'p' => 15, 'q' => 16, 'r' => 17, 38 's' => 18, 't' => 19, 'u' => 20, 'v' => 21, 'w' => 22, 'x' => 23, 39 'y' => 24, 'z' => 25, '0' => 26, '1' => 27, '2' => 28, '3' => 29, 40 '4' => 30, '5' => 31, '6' => 32, '7' => 33, '8' => 34, '9' => 35, 41 ); 42 43 /** 44 * This will be build during __construct 45 * 46 * @var array 47 */ 48 protected static $encodeTable = array(); 49 50 /** 51 * @var bool 52 */ 53 protected static $initialized = false; 54 55 public static $encoding = 'UTF-8'; 56 57 /** 58 * @return array 59 */ 60 protected static function buildEncodeTable() 61 { 62 if (!count(self::$encodeTable)) { 63 self::$encodeTable = array_keys(self::$decodeTable); 64 } 65 66 return self::$encodeTable; 67 } 68 69 /** 70 * Initialize the encoder if needed 71 */ 72 protected static function init() 73 { 74 if (self::$initialized) { 75 return; 76 } 77 self::buildEncodeTable(); 78 } 79 80 /** 81 * List code points for a given input 82 * 83 * @param string $input 84 * 85 * @return array Multi-dimension array with basic, non-basic and aggregated code points 86 */ 87 protected static function listCodePoints($input) 88 { 89 $codePoints = array( 90 'all' => array(), 91 'basic' => array(), 92 'nonBasic' => array(), 93 ); 94 95 $length = mb_strlen($input, self::$encoding); 96 for ($i = 0; $i < $length; $i++) { 97 $char = mb_substr($input, $i, 1, self::$encoding); 98 $code = self::charToCodePoint($char); 99 if ($code < 128) { 100 $codePoints['all'][] = $codePoints['basic'][] = $code; 101 } else { 102 $codePoints['all'][] = $codePoints['nonBasic'][] = $code; 103 } 104 } 105 106 return $codePoints; 107 } 108 109 /** 110 * Convert a single or multi-byte character to its code point 111 * 112 * @param string $char 113 * 114 * @return integer 115 */ 116 protected static function charToCodePoint($char) 117 { 118 $code = ord($char[0]); 119 if ($code < 128) { 120 return $code; 121 } elseif ($code < 224) { 122 return (($code - 192) * 64) + (ord($char[1]) - 128); 123 } elseif ($code < 240) { 124 return (($code - 224) * 4096) + ((ord($char[1]) - 128) * 64) + (ord($char[2]) - 128); 125 } else { 126 return (($code - 240) * 262144) + ((ord($char[1]) - 128) * 4096) + ((ord($char[2]) - 128) * 64) + (ord($char[3]) - 128); 127 } 128 } 129 130 /** 131 * Convert a code point to its single or multi-byte character 132 * 133 * @param integer $code 134 * 135 * @return string 136 */ 137 protected static function codePointToChar($code) 138 { 139 if ($code <= 0x7F) { 140 return chr($code); 141 } elseif ($code <= 0x7FF) { 142 return chr(($code >> 6) + 192) . chr(($code & 63) + 128); 143 } elseif ($code <= 0xFFFF) { 144 return chr(($code >> 12) + 224) . chr((($code >> 6) & 63) + 128) . chr(($code & 63) + 128); 145 } else { 146 return chr(($code >> 18) + 240) . chr((($code >> 12) & 63) + 128) . chr((($code >> 6) & 63) + 128) . chr(($code & 63) + 128); 147 } 148 } 149 150 /** 151 * Calculate the bias threshold to fall between TMIN and TMAX 152 * 153 * @param integer $k 154 * @param integer $bias 155 * 156 * @return integer 157 */ 158 protected static function calculateThreshold($k, $bias) 159 { 160 if ($k <= $bias + static::TMIN) { 161 return static::TMIN; 162 } elseif ($k >= $bias + static::TMAX) { 163 return static::TMAX; 164 } 165 166 return $k - $bias; 167 } 168 169 /** 170 * Bias adaptation 171 * 172 * @param integer $delta 173 * @param integer $numPoints 174 * @param boolean $firstTime 175 * 176 * @return integer 177 */ 178 protected static function adapt($delta, $numPoints, $firstTime) 179 { 180 $delta = (int)( 181 ($firstTime) 182 ? $delta / static::DAMP 183 : $delta / 2 184 ); 185 $delta += (int)($delta / $numPoints); 186 187 $k = 0; 188 while ($delta > ((static::BASE - static::TMIN) * static::TMAX) / 2) { 189 $delta = (int)($delta / (static::BASE - static::TMIN)); 190 $k = $k + static::BASE; 191 } 192 $k = $k + (int)(((static::BASE - static::TMIN + 1) * $delta) / ($delta + static::SKEW)); 193 194 return $k; 195 } 196 197 /** 198 * @param string $input 199 * 200 * @return string $encodedString 201 */ 202 public static function encode($input) 203 { 204 self::init(); 205 $codePoints = self::listCodePoints($input); 206 207 $n = static::INITIAL_N; 208 $bias = static::INITIAL_BIAS; 209 $delta = 0; 210 $h = $b = count($codePoints['basic']); 211 212 $output = ''; 213 foreach ($codePoints['basic'] as $code) { 214 $output .= self::codePointToChar($code); 215 } 216 if ($input === $output) { 217 return $output; 218 } 219 if ($b > 0) { 220 $output .= static::DELIMITER; 221 } 222 223 $codePoints['nonBasic'] = array_unique($codePoints['nonBasic']); 224 sort($codePoints['nonBasic']); 225 226 $i = 0; 227 $length = mb_strlen($input, self::$encoding); 228 while ($h < $length) { 229 $m = $codePoints['nonBasic'][$i++]; 230 $delta = $delta + ($m - $n) * ($h + 1); 231 $n = $m; 232 233 foreach ($codePoints['all'] as $c) { 234 if ($c < $n || $c < static::INITIAL_N) { 235 $delta++; 236 } 237 if ($c === $n) { 238 $q = $delta; 239 for ($k = static::BASE; ; $k += static::BASE) { 240 $t = self::calculateThreshold($k, $bias); 241 if ($q < $t) { 242 break; 243 } 244 245 $code = $t + (($q - $t) % (static::BASE - $t)); 246 $output .= static::$encodeTable[$code]; 247 248 $q = ($q - $t) / (static::BASE - $t); 249 } 250 251 $output .= static::$encodeTable[$q]; 252 $bias = self::adapt($delta, $h + 1, ($h === $b)); 253 $delta = 0; 254 $h++; 255 } 256 } 257 258 $delta++; 259 $n++; 260 } 261 $out = static::PREFIX . $output; 262 263 return $out; 264 } 265 266 /** 267 * @param string $encodedString 268 * 269 * @return string $decodedString 270 */ 271 public static function decode($encodedString) 272 { 273 self::init(); 274 if (!self::isPunycode($encodedString)) { 275 return $encodedString; 276 } 277 $encodedString = substr($encodedString, strlen(static::PREFIX)); 278 $n = static::INITIAL_N; 279 $i = 0; 280 $bias = static::INITIAL_BIAS; 281 $output = ''; 282 283 $pos = strrpos($encodedString, static::DELIMITER); 284 if ($pos !== false) { 285 $output = substr($encodedString, 0, $pos++); 286 } else { 287 $pos = 0; 288 } 289 290 $outputLength = strlen($output); 291 $inputLength = strlen($encodedString); 292 while ($pos < $inputLength) { 293 $oldi = $i; 294 $w = 1; 295 296 for ($k = static::BASE; ; $k += static::BASE) { 297 $digit = static::$decodeTable[$encodedString[$pos++]]; 298 $i = $i + ($digit * $w); 299 $t = self::calculateThreshold($k, $bias); 300 301 if ($digit < $t) { 302 break; 303 } 304 305 $w = $w * (static::BASE - $t); 306 } 307 308 $bias = self::adapt($i - $oldi, ++$outputLength, ($oldi === 0)); 309 $n = $n + (int)($i / $outputLength); 310 $i = $i % ($outputLength); 311 $output = mb_substr($output, 0, $i, self::$encoding) . self::codePointToChar($n) . mb_substr($output, $i, $outputLength - 1, self::$encoding); 312 313 $i++; 314 } 315 316 return $output; 317 } 318 319 /** 320 * @param string $stringToCheck 321 * 322 * @return bool 323 */ 324 public static function isPunycode($stringToCheck) 325 { 326 if (substr($stringToCheck, 0, strlen(static::PREFIX)) != static::PREFIX) { 327 return false; 328 } 329 if (strpos($stringToCheck, static::DELIMITER, strlen(static::PREFIX)) === false) { 330 return false; 331 } 332 333 return true; 334 } 335 }