posts | commentsAdd to Google
22
Jul

Sitemap Builder

commentsNo comments

Site maps are important tools for webmasters to have some sort of control over how their sites are indexed. This doesn’t give ultimate control by any stretch of the imagination. Google and yahoo will still index websites the way they will, a site map is more like a webmasters ‘tip’ to crawler bots to tell them what pages they should crawl, how often they change and what kind of priority the crawler bots should give to certain pages.

What are they?

Site maps are nothing more than simple xml files that are based on sitemap protocol. They are easy to create, but most sites these days are dynamic and change based on user input. So to change the sitemap you need a site map building script. Here is a simple PHP class that allows you to create and modify sitemap.xml files.

The Script

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
<?php
class sitemap
{
	private $DOM;
	private $url;
	private $strXML;
 
	public function __construct($SiteMap)
	{
		$this->url = $SiteMap;
		if (realpath($this->url))
		{
			try
			{
				$sitemap = realpath($this->url);
				$this->DOM = new DOMDocument();
				$this->DOM->preserveWhiteSpace = false;
				$this->DOM->load($sitemap);
			}
			catch(Exception $e)
			{
				return false;
			}
		}
		else
		{
			try 
			{
				$this->DOM = new DOMDocument("1.0", "UTF-8");
				$namespace = $this->DOM->createElementNS('http://www.sitemaps.org/schemas/sitemap/0.9', 'urlset');
				$this->DOM->appendChild($namespace);
				$this->strXML = $this->DOM->save($this->url);
			}
			catch(Exception $e)
			{
				return false;
			}
		}
	}
 
	public function AddURL($Location, $LastModified = "", $ChangeFrequencey = "", $Priority = "")
	{
		$url = $this->DOM->createElement('url');
		$loc = $this->DOM->createElement('loc');
		$loc_value = $this->DOM->createTextNode($Location);
		$loc->appendChild($loc_value);
		$url->appendChild($loc);
 
		if ($LastModified != "")
		{
			try
			{
				$content = date("Y-m-d\TH:i:sP", strtotime($LastModified));
				$lastmod = $this->DOM->createElement('lastmod');
				$lastmod_value = $this->DOM->createTextNode($content);
				$lastmod->appendChild($lastmod_value);	
				$url->appendChild($lastmod);
			}
			catch (Exception $e)
			{
				return false;
			}
		}
 
		if ($ChangeFrequencey != "")
		{
			switch ($ChangeFrequencey)
			{
				case "always":
					break;
				case "hourly":
					break;
				case "daily":
					break;
				case "weekly":
					break;
				case "monthly";
					break;
				case "yearly";
					break;
				case "never";
					break;
				default:
					return false;
			}
 
			try
			{
				$changefreq = $this->DOM->createElement('changefreq');
				$changefreq_value = $this->DOM->createTextNode($ChangeFrequencey);
				$changefreq->appendChild($changefreq_value);
				$url->appendChild($changefreq);
			}
			catch(Exception $e)
			{
				return false;
			}
		}
 
		if ($Priority != "")
		{
			if ((float)$Priority > 0 && (float)$Priority < 1)
			{
				try
				{
					$priority = $this->DOM->createElement('priority');
					$priority_value = $this->DOM->createTextNode($Priority);
					$priority->appendChild($priority_value);
					$url->appendChild($priority);
				}
				catch(Exception $e)
				{
					return false;
				}
			}
		}
 
		try
		{
			$this->DOM->getElementsByTagName('urlset')->item(0)->appendChild($url);
			$this->strXML = $this->DOM->save($this->url);
		}
		catch(Exception $e)
		{
			return false;
		}
	}
 
	public function RemoveURL($Search, $SearchElem = 'loc', $Operator = '=')
	{
		$xpath = new DOMXpath($this->DOM);
		$xpath->registerNamespace("lib", "http://www.sitemaps.org/schemas/sitemap/0.9");
 
		if ($SearchElem == 'loc')
		{
			$result = $xpath->query("///lib:" . $SearchElem . "[contains(., '" . $Search . "')]");
		}
		else if($SearchElem == 'lastmod')
		{
			$result = $xpath->query("///lib:" . $SearchElem . "[. = '" . date("Y-m-d\TH:i:sP", strtotime($Search)) . "']");
		}
		else if($SearchElem == 'changefreq')
		{
			$result = $xpath->query("///lib:" . $SearchElem . "[. = '" . $Search . "']");
		}
		else if($SearchElem == 'priority')
		{
			$result = $xpath->query("///lib:" . $SearchElem . "[. " . $Operator . " '" . $Search . "']");
		}
 
		$i=0;
		foreach($result as $node) 
		{
    		$ary[$i] = $node;
			$i++;
		}
 
		if (count($ary)==0)
		{
			return false;	
		}
 
		$ary = array_reverse($ary);
 
		foreach($ary as $node)
		{
			$node->parentNode->parentNode->removeChild($node->parentNode);
		}
 
		$this->strXML = $this->DOM->save($this->url);
	}
 
	public function __toString()
	{
		return "sitemap";
	}
}
?>

Usage

To use the PHP class to add a url:

1
2
$x = new sitemap("sitemap.xml");
$x->AddURL("http://www.mysite.com/mypage.html", "07/22/2008", "monthly", "0.8");

A couple things to note; the script automatically converts the date into the date format required by the sitemap protocol. So you can pretty much just put in any valid date string and it will transform it for you. Second, the only parameter required is the location. All other parameters are optional.

To remove the url:

1
2
$x = new sitemap("sitemap.xml");
$x->RemoveURL("mysite");

The only required argument for RemoveURL is $Search and the default element that will be searched through is ‘loc’. When ‘loc’ is searched the function will match any string within the loc element (aka ‘mysite’ matches ‘http://www.mysite.com’). If you want to remove urls based on the last modified date then change the $SearchElem parameter to ‘lastmod’ and input a valid date string into $Search. To remove urls based on the change frequency simply change the $SearchElem to ‘changefreq’ and enter ‘daily’, ‘weekly’, ‘monthly’ or ‘yearly’ in the $Search parameter. Last of all if you want to remove urls based on priority then change the $SearchElem parameter to ‘priority’. The last parameter ($Operator) is only valid for when you are removing based on priority. The valid values for $Operator are ‘=’ (default), ‘>=’, ‘<=', '>‘, ‘<' and '!='.

Quick Reference

AddURL
$Location required Datatype: string. The url to the page to be indexed.
$LastModified optional Datatype: string. The last time the url was changed.
$ChangeFrequencey optional Datatype: string. How often the page changes. (’always’, ‘hourly’, ‘daily’, ‘weekly’, ‘monthly’, ‘yearly’, ‘never’)
$Priority optional Datatype: string. The priority of the page. Accepts values between 0.0 and 1.0.
RemoveURL
$Search required The search text that will be used to match the url.
$SearchElem optional The element that will be searched (’loc’ is default, ‘lastmod’, ‘changefreq’, ‘priority’)
$Operator optional Only valid when searching ‘priority’.
Categories: PHP
Bookmark and Share

Tuesday, July 22nd, 2008 at 6:57 pm and is filed under PHP. You can follow any responses to this entry through the RSS 2.0 feed. You can leave a response, or trackback from your own site.

Leave a reply