Automatically Redirect 404's in HubSpot CMS to a The Closest Page
I needed to automatically redirect thousands of "Not Found" pages for one of our clients by using JavaScript, despite traditionally server-side redirects being the preferred method.
These were not critical top-level pages, but automatically generated pages or variations that no longer exist.
This script activates on your 404 page. If a user hits a non-existent page, the script checks your sitemap, finds the most similar existing page URL, and redirects them there.
For instance, if "/how-to-write-javascript-code/" no longer exists and has been changed to "/blog/write-javascript-code/", the script identifies and redirects to the correct page by matching keywords. It also includes customizable settings for improved flexibility, like excluding certain keywords and setting a minimum word length.
Here's the script:
<script>
// Settings 
const baseUrl = 'https://domain.com/'; // Your website root
const sitemapURL = 'https://domain.com/sitemap.xml'; // Your sitemap URL
const notFoundTitleKeyword = 'Not Found'; // A keyword that is present in your "Not Found" page title. This might be "404", "Not Found" etc.
const ignoreWords = ['blog', 'feed', 'guide']; // Common words to ignore. These should be low-value words that are present in most of your URLs.
const minWordLength = 3; // Set the minimum word length in characters. This ignores low-value words like "to", "a", "the", etc. 
// Only edit below here if you know what you're doing:
document.addEventListener('DOMContentLoaded', function() {
	if (document.title.includes(notFoundTitleKeyword)) {
		async function fetchSitemap(url) {
			try {
				const response = await fetch(url);
				const text = await response.text();
				const parser = new DOMParser();
				const xmlDoc = parser.parseFromString(text, "text/xml");
				const urls = xmlDoc.querySelectorAll('loc');
				return Array.from(urls).map(node => node.textContent);
			} catch (error) {
				console.error('Error fetching sitemap:', error);
				return [];
			}
		}
		function processCurrentURL() {
			const path = window.location.pathname;
			let segments = path.split('/').filter(segment => segment.length > 0 && !ignoreWords.includes(segment));
			segments = segments.map(segment => {
				if (segment.match(/^\d+$/)) return null;
				return segment.replace(/-/g, ' ');
			}).filter(segment => segment != null && segment.length > minWordLength);
			return segments.join(' ').split(' ').filter(word => word.length > minWordLength);
		}
		function findClosestMatch(currentKeywords, urls) {
		  let bestMatch = '';
		  let bestScore = 0;
		  urls.forEach(url => {
			  const urlParts = url.replace(baseUrl, '').split('/').filter(part => part.length);
			  let score = 0;
			  currentKeywords.forEach(keyword => {
				  if(urlParts.some(part => part.includes(keyword))) score++;
			  });
			  if(score > bestScore) {
				  bestScore = score;
				  bestMatch = url;
			  }
		  });
		  return bestMatch;
		}
		async function redirectToClosestMatch() {
		  const sitemapUrls = await fetchSitemap(sitemapURL);
		  if (sitemapUrls.length === 0) {
			  console.log('Sitemap is empty or could not be fetched.');
			  return;
		  }
		  const currentKeywords = processCurrentURL();
		  const closestMatch = findClosestMatch(currentKeywords, sitemapUrls);
		  if(closestMatch) {
			  console.log('Redirecting to:', closestMatch);
			  window.location.replace(closestMatch); 
		  } else {
			  console.log('No close match found.');
		  }
		}
		redirectToClosestMatch();
	} 
	else {
		console.log('Incorrect page title:', document.title);
	}
});
</script>To add this to your site:
- Click the gear in the upper right
- Click the "Website" dropdown under "Tools" in the bottom left
- Click "Pages"
- Add to your site footer HTML.
All done! Now, go try visiting the wrong URL or adding a couple of extra characters after one of your blog post links. You'll see it gracefully redirects to the closest page.
While this was designed specifically for a client on HubSpot, it can theoretically work just about any CMS.
What do you think? Leave me a comment below!
 
  
  
  
  
  
  
  
  
  
 
Comments