php生成sitemap
由于工作的原因,最近需要生成网站的sitemap.xml,谷歌百度了很多地方,没有发现并合适可用的代码,三思之后还是决定自己写吧!虽然可能写的有所缺陷,但是毕竟是认认真真写的,希望对一些后来者有所帮助......
1、为什么要自己写脚本生成sitemap.xml?
很多人会说,在网上有现成的工具,扫一下就可以了,没有必要自己写。是的,的确是这样的。但是假设我们的网站进行经常更新,那么是不是每次我都要手动更新sitemap呢。我很懒,那么,有没有更好的方案呢?肯定是有的,我是否可以起一个定时任务,每天晚上更新一次呢,此时脚本就有用武之地了
2、文档目录:
配置文件 - config/config.ini.php sitemap主文件 - SiteMap.class.php
3、配置文件的代码
true, 'CookiePath' => '/tmp/sitemapcookie' ); //sitemap文件的保存地址 $SITEMAPPATH = './sitemap.xml'; //根据链接关键字设置priority $PRIORITYLIST = array( 'product' => '0.8', 'device' => '0.6', 'intelligent' => '0.4', 'course' => '0.2' ); //根据链接关键字设置CHANGEFREQ $CHANGEFREQLIST = array( 'product' => 'Always', 'device' => 'Hourly', 'intelligent' => 'Daily', 'course' => 'Weekly', 'login' => 'Monthly', 'about' => 'Yearly' );?>
4、sitemap主文件
* @version 1.0 */namespace Maweibinguo\SiteMap;class SiteMap{ const SCHEMA = 'http://www.sitemaps.org/schemas/sitemap/0.9'; / * @var webUrlList * @access public */ public $webUrlList = array(); / * @var siteMapList * @access public */ public $siteMapList = array(); / * @var isUseCookie * @access public */ public $isUseCookie = false; / * @var cookieFilePath * @access public */ public $cookieFilePath = ''; / * @var xmlWriter * @access private */ private $_xmlWriter = ''; / * init basic config * * @access public */ public function __construct() { $this->_xmlWriter = new \XMLWriter(); $result = $this->_enviromentTest(); } / * test the enviroment for the script * * @access pirvate */ private function _enviromentTest() { $sapiType = \php_sapi_name (); if( strtolower($sapiType) != 'cli' ) { echo ' The Script Must Run In Command Lines ', "\r\n"; exit(); } } / * load the configValue for genrating sitemap by configname * * @param string $configName * @return string $configValue * @access public */ public function loadConfig($configName) { /* init return value */ $configValue = ''; /* load config value */ $configPath = __DIR__ . '/config/config.ini.php'; if(file_exists( $configPath )) { require $configPath; } else { echo "Can not find config file", "\r\n"; exit(); } $configValue = $$configName; /* return config value */ return $configValue; } / * generate sitemap.xml for the web * * @param siteMapList * @access public */ public function generateSiteMapXml($siteMapList) { /* init return result */ $result = false; if( !is_array($siteMapList) || count($siteMapList) loadConfig('SITEMAPPATH'); if(!file_exists($siteMapPath)) { $commandStr = "touch ${siteMapPath}"; exec($commandStr); } if( !is_writable($siteMapPath) ) { echo 'Is Not Writeable',"\r\n"; exit(); } $this->_xmlWriter->openURI($siteMapPath); $this->_xmlWriter->startDocument('1.0', 'UTF-8'); $this->_xmlWriter->setIndent(true); $this->_xmlWriter->startElement('urlset'); $this->_xmlWriter->writeAttribute('xmlns', self::SCHEMA); foreach($siteMapList as $siteMapKey => $siteMapItem) { $this->_xmlWriter->startElement('url'); $this->_xmlWriter->writeElement('loc',$siteMapItem['Url']); $this->_xmlWriter->writeElement('title',$siteMapItem['Title']); $changefreq = !empty($siteMapItem['ChangeFreq']) ? $siteMapItem['ChangeFreq'] : 'Daily'; $this->_xmlWriter->writeElement('changefreq',$changefreq); $priority = !empty($siteMapItem['Priority']) ? $siteMapItem['Priority'] : 0.5; $this->_xmlWriter->writeElement('priority',$priority); $this->_xmlWriter->endElement(); } /* return return */ return $result; } / * start to send request to the target url, and get the reponse * * @param string $targetUrl * @return mixed $returnData * @access public */ public function sendRequest($url) { /* init return value */ $responseData = false; /* check the parameter */ if( !filter_var($url, FILTER_VALIDATE_URL) ) { return $responseData; } $connectTimeOut = $this->loadConfig('CURLOPT_CONNECTTIMEOUT'); if( $connectTimeOut === false ) { return $responseData; } $timeOut = $this->loadConfig('CURLOPT_TIMEOUT'); if( $timeOut === false ) { return $responseData; } $handle = curl_init(); curl_setopt($handle, CURLOPT_URL, $url); curl_setopt($handle, CURLOPT_HEADER, false); curl_setopt($handle, CURLOPT_AUTOREFERER, true); curl_setopt($handle, CURLOPT_RETURNTRANSFER , true); curl_setopt($handle, CURLOPT_CONNECTTIMEOUT, $connectTimeOut); curl_setopt($handle, CURLOPT_TIMEOUT, $timeOut); curl_setopt($handle, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; MSIE 5.01; Windows NT 5.0)" ); $headersItem = array( 'Accept:text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Connection: Keep-Alive' ); curl_setopt($handle, CURLOPT_HTTPHEADER, $headersItem); curl_setopt($handle, CURLOPT_FOLLOWLOCATION, 1); $cookieList = $this->loadConfig('COOKIELIST'); $isUseCookie = $cookieList['IsUseCookie']; $cookieFilePath = $cookieList['CookiePath']; if($isUseCookie) { if(!file_exists($cookieFilePath)) { $touchCommand = " touch {$cookieFilePath} "; exec($touchCommand); } curl_setopt($handle, CURLOPT_COOKIEFILE, $cookieFilePath); curl_setopt($handle, CURLOPT_COOKIEJAR, $cookieFilePath); } $responseData = curl_exec($handle); $httpCode = curl_getinfo($handle, CURLINFO_HTTP_CODE); if($httpCode != 200) { $responseData = false; } curl_close($handle); /* return response data */ return $responseData; } / * get the sitemap content of the url, it contains url, title, priority, changefreq * * @param string $url * @access public */ public function generateSiteMapList($url) { $content = $this->sendRequest($url); if($content !== false) { $tagsList = $this->_parseContent($content); $urlItem = $tagsList['UrlItem']; $title = $tagsList['Title']; $siteMapItem = array( 'Url' => trim($url), 'Title' => trim($title) ); $priority = $this->_calculatePriority($siteMapItem['Url']); $siteMapItem['Priority'] = $priority; $changefreq = $this->_calculateChangefreq($siteMapItem['Url']); $siteMapItem['ChangeFreq'] = $changefreq; $this->siteMapList[] = $siteMapItem; foreach($urlItem as $nextUrl) { if( !in_array($nextUrl, $this->webUrlList) ) { $skipUrlList = $this->loadConfig('SKIP_URLLIST'); foreach($skipUrlList as $keyWords) { if( stripos($nextUrl, $keyWords) !== false ) { continue 2; } } $this->webUrlList[] = $nextUrl; echo $nextUrl,"\r\n"; $this->generateSiteMapList($nextUrl); } } } } / *teChangefreq get sitemaplist of the web * * @access public * @return array $siteMapList */ public function getSiteMapList() { return $this->siteMapList; } / * calate the priority of the targeturl * * @param string $targetUrl * @return float $priority * @access private */ private function _calculatePriority($targetUrl) { /* init priority */ $priority = 0.5; /* calculate the priority */ if( filter_var($targetUrl, FILTER_VALIDATE_URL) ) { $priorityList = $this->loadConfig('PRIORITYLIST'); foreach($priorityList as $priorityKey => $priorityValue) { if(stripos($targetUrl, $priorityKey) !== false) { $priority = $priorityValue; break; } } } /* return priority */ return $priority; } / * calate the changefreq of the targeturl * * @param string $targetUrl * @return float $changefreq * @access private */ private function _calculateChangefreq($targetUrl) { /* init changefreq*/ $changefreq = 'Daily'; /* calculate the priority */ if( filter_var($targetUrl, FILTER_VALIDATE_URL) ) { $changefreqList = $this->loadConfig('CHANGEFREQLIST'); foreach($changefreqList as $changefreqKey => $changefreqValue) { if(stripos($targetUrl, $changefreqKey) !== false) { $changefreq = $changefreqValue; break; } } } /* return priority */ return $changefreq; } / * format url * * @param $url * @return $url * @access private */ private function _formatUrl($url) { /* init url */ $formatUrl = ''; /* format url */ if($url) { $formatUrl = trim($url, '"\''); $formatUrl = trim($formatUrl, '/'); $formatUrl = trim($formatUrl, '# '); } /* return url */ return $formatUrl; } / * check domain is right * * @param $url * @return $url * @access private */ private function _checkDomain($url) { /* init url */ $result = false; /* check domain */ if($url) { $domainName = $this->loadConfig('DOMAIN_NAME'); if(stripos($url, $domainName) !== false) { $result = true; } } /* return url */ return $result; } / * parse the response content, so that we can get the urls * * @param string $content * @return array $urlItem * @access public */ public function _parseContent($content) { /* init return data */ $tagsList = array(); /* start parse */ if( !empty($content) ) { $domainName = $this->loadConfig('DOMAIN_NAME'); /* get the attribute of href for tags */ $regStrForTagA = '# $url) { if( stripos($url, 'javascript') !== false ) { continue; } $url = $this->_formatUrl($url); if( empty($url) ) { continue; } if(stripos($url, 'http') === false) { $targetUrl = $domainName . $url; } else { $targetUrl = $url; } $result = $this->_checkDomain($targetUrl); if($result === false) { continue; } $urlItem[$urlKey] = $targetUrl; } //delete the bad url $urlItem = array_unique($urlItem); foreach($urlItem as $urlKey => $url) { if( !filter_var($url, FILTER_VALIDATE_URL) ) { unset($urlItem[$urlKey]); } } } $tagsList['UrlItem'] = $urlItem; /* get the title tags content */ $regStrForTitle = '# (.*?)# um'; if( preg_match($regStrForTitle, $content, $matches) ) { $title = $matches[1]; } $tagsList['Title'] = $title; } /* return tagsList */ return $tagsList; }}/* here is a examplre */$startTime = microtime(true);echo "/ */","\r\n";echo "/* start to run {$startTime} */","\r\n";echo "/ */","\r\n\r\n";$siteMap = new SiteMap();$domain = $siteMap->loadConfig('DOMAIN_NAME');$siteMap->generateSiteMapList($domain);$siteMapList = $siteMap->getSiteMapList();$siteMap->generateSiteMapXml($siteMapList);$endTime = microtime(true);$takeTime = $endTime - $startTime;echo "/ */","\r\n";echo "/* Had Done, \t it total take {$takeTime} */","\r\n";echo "/ */","\r\n";
?>
5、获取源码包
单击下载源代码(提取码:44fc)
关键字:php
版权声明
本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处。如若内容有涉嫌抄袭侵权/违法违规/事实不符,请点击 举报 进行投诉反馈!