短网址整体算是个功能单一的平台化做事。产品需求包含天生短网址、短网址去重、管理平台。但技能难点不小包含高并发、热点网址、短网址重复率。下面是我总结的一些短网址天生算法和技能难点的办理方案。
1、crc32算法
function short_url_crc32($strUrl) {$strShortUrl = ""; //求字符串crc32数字$intCrc32 = floatval(sprintf('%u', crc32($strUrl, '%u'))); //循环取余求ascii码while($intCrc32) {$intfMod = fmod($intCrc32, 62);if ($intfMod >= 0 && $intfMod <= 9) {$intfMod = chr($intfMod + 48);} elseif ($intfMod >= 10 && $intfMod <= 35) {$intfMod = chr($intfMod + 55);} elseif ($intfMod >= 36 && $intfMod <= 62) {$intfMod = chr($intfMod + 61);}$strShortUrl .= $intfMod;$intCrc32= floor($intCrc32 / 62);}return $strShortUrl;}$strUrl = "https://www.baidu.com/a/b/c";var_dump(short_url_crc32($strUrl));运行结果:string(5) "MajOz"
2、crc64算法

function crc64Table(){ $crc64tab = []; // ECMA polynomial $poly64rev = (0xC96C5795 << 32) | 0xD7870F42; // ISO polynomial // $poly64rev = (0xD8 << 56); for ($i = 0; $i < 256; $i++) { for ($part = $i, $bit = 0; $bit < 8; $bit++) { if ($part & 1) { $part = (($part >> 1) & ~(0x8 << 60)) ^ $poly64rev; } else { $part = ($part >> 1) & ~(0x8 << 60); } } $crc64tab[$i] = $part; } return $crc64tab;}/ @param string $string @param string $format @return mixed Formats: crc64('php'); // afe4e823e7cef190 crc64('php', '0x%x'); // 0xafe4e823e7cef190 crc64('php', '0x%X'); // 0xAFE4E823E7CEF190 crc64('php', '%d'); // -5772233581471534704 signed int crc64('php', '%u'); // 12674510492238016912 unsigned int/function crc64($string, $format = '%x'){ static $crc64tab; if ($crc64tab === null) { $crc64tab = crc64Table(); } $crc = 0; for ($i = 0; $i < strlen($string); $i++) { $crc = $crc64tab[($crc ^ ord($string[$i])) & 0xff] ^ (($crc >> 8) & ~(0xff << 56)); } return sprintf($format, $crc);}function short_url_crc64($strUrl) {$strShortUrl = ""; //求字符串crc64数字 $intCrc64 = floatval(sprintf('%u', crc64($strUrl, '%u'))); //循环取余求ascii码while($intCrc64) {$intfMod = fmod($intCrc64, 62);if ($intfMod >= 0 && $intfMod <= 9) {$intfMod = chr($intfMod + 48);} elseif ($intfMod >= 10 && $intfMod <= 35) {$intfMod = chr($intfMod + 55);} elseif ($intfMod >= 36 && $intfMod <= 62) {$intfMod = chr($intfMod + 61);}$strShortUrl .= $intfMod;$intCrc64= floor($intCrc64 / 62);} return ((strlen($strShortUrl) > 6) ? substr($strShortUrl, -6) : $strShortUrl);}$strUrl = "https://www.baidu.com/a/b/c";var_dump(short_url_crc64($strUrl));运行结果:string(6) "vlm2UF"
3、Hash MD5算法
function short_url_hashmd5($strUrl) { $arrChars = array( 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '7','8','9','A','B','C','D','E','F','G', 'H','I','J','K','L','M','N','O','P','Q', 'R','S','T','U','V','W','X','Y','Z' ); $arrShortUrl = array(); //求字符串md5 32字节值 $strMd5 = md5($strUrl); for ($i = 0; $i < 4; $i++) { $strShortUrl = ''; //每段8字节 $subHex = substr($strMd5, $i 8, 8); //取8字节的前30位 $intHex = 0x3FFFFFFF & (1 bin2hex('0x' . $subHex)); for ($j = 0; $j < 6; $j++) { //与0x0000003E进行位与运算,取得字符数组chars索引 $intIndex = 0x0000003E & $intHex; $strShortUrl .= $arrChars[$intIndex]; $intHex = $intHex >> 5; } $arrShortUrl[] = $strShortUrl; } $intRandIndex = array_rand($arrShortUrl, 1); return $arrShortUrl[$intRandIndex];}$strUrl = "https://www.baidu.com/a/b/c";var_dump(short_url_hashmd5($strUrl));运行结果:string(6) "aaVqLw"
4、自增id+openssl加密算法
function short_url_openssl($intUrlId) {$strEncodeUrl = openssl_encrypt($intUrlId, 'aes128', "2022", false, "1234567890123456");return substr($strEncodeUrl, 0, 6);}$intUrlId = 112;var_dump(short_url_openssl($intUrlId));运行结果:string(6) "W7noUA"
5、随机字符串算法
function short_url_randchar() {$strChars= "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz";$strRandChar= substr(str_shuffle($strChars), mt_rand(0, strlen($strChars) - 11), 6);return $strRandChar;}var_dump(short_url_randchar());运行结果:string(6) "OZYzkH"
6、数据效验(效验位)
function short_url_valid($strShortUrl) {$intSum = 0;$intLen = strlen($strShortUrl);for($i = 0; $i < $intLen; ++$i) {$intSum += ord($strShortUrl[$i]);}$strOdd = "X";$intOdd = $intSum % 62;if ($intOdd >= 0 && $intOdd <= 9) {$strOdd = chr($intOdd + 48);} elseif ($intOdd >= 10 && $intOdd <= 35) {$strOdd = chr($intOdd + 55);} elseif ($intOdd >= 36 && $intOdd <= 62) {$strOdd = chr($intOdd + 61);}return $strOdd;}$strShortUrl = 'W7noUA';var_dump(short_url_valid($strShortUrl));运行结果:string(1) "H"
7、Nginx Rewrite配置
location ~ /[a-zA-Z0-9]+$ { root /home/work/shorturl; fastcgi_pass 127.0.0.1:9000; fastcgi_index shorturl.php; fastcgi_param SCRIPT_FILENAME $document_root/$fastcgi_script_name; include fastcgi_params; rewrite ^/([a-zA-Z0-9]+)$ /shorturl.php last;}
8、技能难点
高并发:
水平扩容Web做事器分布式Redis集群MySQL一主多从(写少读多)热点网址
增加Web做事器确当地缓存+管理平台同步热点网址到Redis集群ALL做事器+管理平台热点网址监控
Redis SortSet凑集排序(短韶光内访问量达到一定阈值的key才入Set)大数据实时数据监控(前端打点日志)。9、总结
根据存储量和重复率剖析5种天生算法个人推举利用crc64算法。缘故原由是扩展性强、重复率低。短网址增加数据效验位减少恶意流量的无效业务逻辑处理。Redis内存利用量根据产品前期须要打算清楚。感谢大家的评论、点赞、分享。。。