Site maps are important tools for webmasters to have some sort of control over how their sites are indexed. This doesn’t give ultimate control by any stretch of the imagination. Google and yahoo will still index websites the way they will, a site map is more like a webmasters ‘tip’ to crawler bots to tell them what pages they should crawl, how often they change and what kind of priority the crawler bots should give to certain pages.
What are they?
Site maps are nothing more than simple xml files that are based on sitemap protocol. They are easy to create, but most sites these days are dynamic and change based on user input. So to change the sitemap you need a site map building script. Here is a simple PHP class that allows you to create and modify sitemap.xml files.
The Script
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 | <?php class sitemap { private $DOM; private $url; private $strXML; public function __construct($SiteMap) { $this->url = $SiteMap; if (realpath($this->url)) { try { $sitemap = realpath($this->url); $this->DOM = new DOMDocument(); $this->DOM->preserveWhiteSpace = false; $this->DOM->load($sitemap); } catch(Exception $e) { return false; } } else { try { $this->DOM = new DOMDocument("1.0", "UTF-8"); $namespace = $this->DOM->createElementNS('http://www.sitemaps.org/schemas/sitemap/0.9', 'urlset'); $this->DOM->appendChild($namespace); $this->strXML = $this->DOM->save($this->url); } catch(Exception $e) { return false; } } } public function AddURL($Location, $LastModified = "", $ChangeFrequencey = "", $Priority = "") { $url = $this->DOM->createElement('url'); $loc = $this->DOM->createElement('loc'); $loc_value = $this->DOM->createTextNode($Location); $loc->appendChild($loc_value); $url->appendChild($loc); if ($LastModified != "") { try { $content = date("Y-m-d\TH:i:sP", strtotime($LastModified)); $lastmod = $this->DOM->createElement('lastmod'); $lastmod_value = $this->DOM->createTextNode($content); $lastmod->appendChild($lastmod_value); $url->appendChild($lastmod); } catch (Exception $e) { return false; } } if ($ChangeFrequencey != "") { switch ($ChangeFrequencey) { case "always": break; case "hourly": break; case "daily": break; case "weekly": break; case "monthly"; break; case "yearly"; break; case "never"; break; default: return false; } try { $changefreq = $this->DOM->createElement('changefreq'); $changefreq_value = $this->DOM->createTextNode($ChangeFrequencey); $changefreq->appendChild($changefreq_value); $url->appendChild($changefreq); } catch(Exception $e) { return false; } } if ($Priority != "") { if ((float)$Priority > 0 && (float)$Priority < 1) { try { $priority = $this->DOM->createElement('priority'); $priority_value = $this->DOM->createTextNode($Priority); $priority->appendChild($priority_value); $url->appendChild($priority); } catch(Exception $e) { return false; } } } try { $this->DOM->getElementsByTagName('urlset')->item(0)->appendChild($url); $this->strXML = $this->DOM->save($this->url); } catch(Exception $e) { return false; } } public function RemoveURL($Search, $SearchElem = 'loc', $Operator = '=') { $xpath = new DOMXpath($this->DOM); $xpath->registerNamespace("lib", "http://www.sitemaps.org/schemas/sitemap/0.9"); if ($SearchElem == 'loc') { $result = $xpath->query("///lib:" . $SearchElem . "[contains(., '" . $Search . "')]"); } else if($SearchElem == 'lastmod') { $result = $xpath->query("///lib:" . $SearchElem . "[. = '" . date("Y-m-d\TH:i:sP", strtotime($Search)) . "']"); } else if($SearchElem == 'changefreq') { $result = $xpath->query("///lib:" . $SearchElem . "[. = '" . $Search . "']"); } else if($SearchElem == 'priority') { $result = $xpath->query("///lib:" . $SearchElem . "[. " . $Operator . " '" . $Search . "']"); } $i=0; foreach($result as $node) { $ary[$i] = $node; $i++; } if (count($ary)==0) { return false; } $ary = array_reverse($ary); foreach($ary as $node) { $node->parentNode->parentNode->removeChild($node->parentNode); } $this->strXML = $this->DOM->save($this->url); } public function __toString() { return "sitemap"; } } ?> |
Usage
To use the PHP class to add a url:
1 2 | $x = new sitemap("sitemap.xml"); $x->AddURL("http://www.mysite.com/mypage.html", "07/22/2008", "monthly", "0.8"); |
A couple things to note; the script automatically converts the date into the date format required by the sitemap protocol. So you can pretty much just put in any valid date string and it will transform it for you. Second, the only parameter required is the location. All other parameters are optional.
To remove the url:
1 2 | $x = new sitemap("sitemap.xml"); $x->RemoveURL("mysite"); |
The only required argument for RemoveURL is $Search and the default element that will be searched through is ‘loc’. When ‘loc’ is searched the function will match any string within the loc element (aka ‘mysite’ matches ‘http://www.mysite.com’). If you want to remove urls based on the last modified date then change the $SearchElem parameter to ‘lastmod’ and input a valid date string into $Search. To remove urls based on the change frequency simply change the $SearchElem to ‘changefreq’ and enter ‘daily’, ‘weekly’, ‘monthly’ or ‘yearly’ in the $Search parameter. Last of all if you want to remove urls based on priority then change the $SearchElem parameter to ‘priority’. The last parameter ($Operator) is only valid for when you are removing based on priority. The valid values for $Operator are ‘=’ (default), ‘>=’, ‘<=', '>‘, ‘<' and '!='.
Quick Reference
| AddURL | ||
|---|---|---|
| $Location | required | Datatype: string. The url to the page to be indexed. |
| $LastModified | optional | Datatype: string. The last time the url was changed. |
| $ChangeFrequencey | optional | Datatype: string. How often the page changes. (’always’, ‘hourly’, ‘daily’, ‘weekly’, ‘monthly’, ‘yearly’, ‘never’) |
| $Priority | optional | Datatype: string. The priority of the page. Accepts values between 0.0 and 1.0. |
| RemoveURL | ||
|---|---|---|
| $Search | required | The search text that will be used to match the url. |
| $SearchElem | optional | The element that will be searched (’loc’ is default, ‘lastmod’, ‘changefreq’, ‘priority’) |
| $Operator | optional | Only valid when searching ‘priority’. |
Leave a Reply
You must be logged in to post a comment.
