| <?php
/**
 * Wave Framework <http://github.com/kristovaher/Wave-Framework>
 * Sitemap Handler
 *
 * Sitemap Handler is used to return sitemap.xml files, if a request is made to such a file. This 
 * handler either returns the existing /sitemap.xml file, or generates a new one based on sitemap 
 * files in /resources/ folder and the languages defined in configuration.
 *
 * @package    Index Gateway
 * @author     Kristo Vaher <[email protected] >
 * @copyright  Copyright (c) 2012, Kristo Vaher
 * @license    GNU Lesser General Public License Version 3
 * @tutorial   /doc/pages/handler_sitemap.htm
 * @since      1.5.0
 * @version    3.5.0
 */
// INITIALIZATION
	// Stopping all requests that did not come from Index Gateway
	if(!isset($resourceAddress)){
		header('HTTP/1.1 403 Forbidden');
		die();
	}
	// Sitemap is always returned in XML format
	header('Content-Type: text/xml;charset=utf-8;'); 
	
	// This flag stores whether cache was used
	$cacheUsed=false;
	// Default cache timeout of one month, unless timeout is set
	if(!isset($config['sitemap-cache-timeout'])){
		$config['sitemap-cache-timeout']=14400; // Four hours
	}
// GENERATING SITEMAP
	// Sitemap is generated only if it does not exist in root
	if(!file_exists(__ROOT__.'sitemap.xml')){
	
		// ASSIGNING PARAMETERS FROM REQUEST
			// If filename includes & symbol, then system assumes it should be dynamically generated
			$parameters=array_unique(explode('&',$resourceFile));
			// Looking for cache
			$cacheFilename=md5('sitemap.xml&'.$config['version-system'].'&'.$config['version-api'].'&'.$resourceRequest).'.tmp';
			$cacheDirectory=__ROOT__.'filesystem'.DIRECTORY_SEPARATOR.'cache'.DIRECTORY_SEPARATOR.'resources'.DIRECTORY_SEPARATOR.substr($cacheFilename,0,2).DIRECTORY_SEPARATOR;
			// If cache file exists then cache modified is considered that time
			if(file_exists($cacheDirectory.$cacheFilename)){
				$lastModified=filemtime($cacheDirectory.$cacheFilename);
			} else {
				// Otherwise it is server request time
				$lastModified=$_SERVER['REQUEST_TIME'];
			}
			
		// GENERATING NEW SITEMAP OR LOADING FROM CACHE
			// If sitemap cannot be found from cache, it is generated
			if(in_array('nocache',$parameters) || ($lastModified==$_SERVER['REQUEST_TIME'] || $lastModified<($_SERVER['REQUEST_TIME']-$config['sitemap-cache-timeout']))){
			
				// STATE AND DATABASE
					// State stores a lot of settings that are taken into account during Sitemap generation
					require(__ROOT__.'engine'.DIRECTORY_SEPARATOR.'class.www-state.php');
					$state=new WWW_State($config);
					// Connecting to database, if configuration is set
					// Uncomment this if you actually need to use database connection for sitemap.txt file
					// if(isset($config['database-name']) && $config['database-name']!='' && isset($config['database-type']) && isset($config['database-host']) && isset($config['database-username']) && isset($config['database-password'])){
						// require(__ROOT__.'engine'.DIRECTORY_SEPARATOR.'class.www-database.php');
						// $databaseConnection=new WWW_Database($config['database-type'],$config['database-host'],$config['database-name'],$config['database-username'],$config['database-password'],((isset($config['database-errors']))?$config['database-errors']:false),((isset($config['database-persistent']))?$config['database-persistent']:false));
					// }
					
				// GENERATING SITEMAP STRING 
					// Sitemap XML string is stored here
					$siteMapXML='';
					// Sitemap can only be generated if system actually uses languages and sitemap files
					if(!empty($state->data['languages'])){
						// XML header
						$siteMapXML.='<?xml version="1.0" encoding="utf-8"?>';
						
						// Defining sitemap schema
						$siteMapXML.='<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">';
						
						// Root depends on whether website is forced to work on HTTPS or not
						$root=((isset($config['limiter-https']) && $config['limiter-https']==true)?'https://':'http://').$_SERVER['HTTP_HOST'].$state->data['url-web'];
						
						// Every language defined in state is generated for sitemap
						foreach($state->data['languages'] as $language){
						
							// Checking for existence of URL Map file
							if(file_exists(__ROOT__.'overrides'.DIRECTORY_SEPARATOR.'resources'.DIRECTORY_SEPARATOR.$language.'.sitemap.ini')){
								// Overrides can be used if they are stored in /overrides/resources/ subfolder
								$siteMap=parse_ini_file(__ROOT__.'overrides'.DIRECTORY_SEPARATOR.'resources'.DIRECTORY_SEPARATOR.$language.'.sitemap.ini',true,INI_SCANNER_RAW);
							} elseif(file_exists(__ROOT__.'resources'.DIRECTORY_SEPARATOR.$language.'.sitemap.ini')){
								// If there was no override, the URL Map is loaded from /resources/
								$siteMap=parse_ini_file(__ROOT__.'resources'.DIRECTORY_SEPARATOR.$language.'.sitemap.ini',true,INI_SCANNER_RAW);
							}
							
							// If first language does not require language node in URL's then it is ignored
							if($language==$state->data['language'] && $state->data['enforce-first-language-url']==false){
								$langRoot=$root;
							} else {
								$langRoot=$root.$language.'/';
							}
							
							// As long as sitemap file is not empty, the nodes are added to output
							if(!empty($siteMap)){
							
								// Read more about $node values from Sitemap files
								foreach($siteMap as $url=>$node){
								
									// Hidden URL's and URL's with redirects are not placed in sitemap
									if((!isset($node['permissions']) || $node['permissions']=='*' || $node['permissions']=='') && (!isset($node['hidden']) || $node['hidden']!=true) && !isset($node['temporary-redirect']) && !isset($node['permanent-redirect'])){
									
										// Building single URL node
										$siteMapXML.='<url>';
											// Location is the full URL of the page
											if(strpos($url,':')!==false){
												$url=explode(':',$url);
												$siteMapXML.='<loc>'.$langRoot.$url[0].'</loc>';
											} else {
												$siteMapXML.='<loc>'.$langRoot.$url.'/</loc>';
											}
											// Priority is a value from 0.0 to 1.0 (default is 0.5). This tells how important this URL is in relation to other URL's
											if(isset($node['priority'])){
												$siteMapXML.='<priority>'.$node['priority'].'</priority>';
											}
											// This can be 'always','hourly','daily','weekly','monthly','yearly','never' and tell robots how often this URL changes
											if(isset($node['change-frequency'])){
												$siteMapXML.='<changefreq>'.$node['change-frequency'].'</changefreq>';
											}
											// It is possible to state in Sitemap when the URL was last modified
											if(isset($node['last-modified'])){
												// This should be in YYYY-MM-DD format
												$siteMapXML.='<lastmod>'.$node['last-modified'].'</lastmod>';
											}
										$siteMapXML.='</url>';
									}
									
								}
								
							}
						}
						
						// Closing the sitemap tag
						$siteMapXML.='</urlset>';
						
					}
					
				// WRITING TO CACHE
				
					// Resource cache is cached in subdirectories, if directory does not exist then it is created
					if(!is_dir($cacheDirectory)){
						if(!mkdir($cacheDirectory,0755)){
							trigger_error('Cannot create cache folder',E_USER_ERROR);
						}
					}
					
					// Data is written to cache file
					if(!file_put_contents($cacheDirectory.$cacheFilename,$siteMapXML)){
						trigger_error('Cannot create resource cache',E_USER_ERROR);
					}
				
			} else {
				// Notifying logger that cache was used
				$cacheUsed=true;
			}
			
		// HEADERS
			// If cache is used, then proper headers will be sent
			if(in_array('nocache',$parameters)){
				// user agent is told to cache these results for set duration
				header('Cache-Control: no-cache,no-store');
				header('Expires: '.gmdate('D, d M Y H:i:s',$_SERVER['REQUEST_TIME']).' GMT');
				header('Last-Modified: '.gmdate('D, d M Y H:i:s',$lastModified).' GMT');
			} else {
				// user agent is told to cache these results for set duration
				header('Cache-Control: public,max-age='.$config['sitemap-cache-timeout']);
				header('Expires: '.gmdate('D, d M Y H:i:s',($_SERVER['REQUEST_TIME']+$config['sitemap-cache-timeout'])).' GMT');
				header('Last-Modified: '.gmdate('D, d M Y H:i:s',$lastModified).' GMT');
			}
			
			// Content length of the file
			$contentLength=filesize($cacheDirectory.$cacheFilename);
			// Content length is defined that can speed up website requests, letting user agent to determine file size
			header('Content-Length: '.$contentLength);
			
		// OUTPUT
			// Returning the file to user agent
			readfile($cacheDirectory.$cacheFilename);
			// File is deleted if cache was requested to be off
			if(in_array('nocache',$parameters)){
				unlink($cacheDirectory.$cacheFilename);
			}
	} else {
	
		// RETURNING EXISTING SITEMAP
		
			// This is technically considered as using cache
			$cacheUsed=true;
		
			// Cache headers
			header('Cache-Control: public,max-age='.$config['sitemap-cache-timeout']);
			header('Expires: '.gmdate('D, d M Y H:i:s',($_SERVER['REQUEST_TIME']+$config['sitemap-cache-timeout'])).' GMT');
			// Last modified header
			header('Last-Modified: '.gmdate('D, d M Y H:i:s',filemtime(__ROOT__.'sitemap.xml')).' GMT');
			// Content length of the file
			$contentLength=filesize(__ROOT__.'sitemap.xml');
			// Content length is defined that can speed up website requests, letting user agent to determine file size
			header('Content-Length: '.$contentLength);
			// Since sitemap.xml did exist in root, it is simply returned
			readfile(__ROOT__.'sitemap.xml');
		
	}
	
// WRITING TO LOG
	// If Logger is defined then request is logged and can be used for performance review later
	if(isset($logger)){
		// Assigning custom log data to logger
		$logger->setCustomLogData(array('category'=>'sitemap','cache-used'=>$cacheUsed,'content-length-used'=>$contentLength,'database-query-count'=>((isset($databaseConnection))?$databaseConnection->queryCounter:0)));
		// Writing log entry
		$logger->writeLog('sitemap');
	}
?>
 |