Metadata API

How to create a simple api to get metadata from a url

Based on the repo from DUB

They also have a hosted version at https://dub.co/tools/metatags

Create sveltekit endpoint

src/routes/api/metatags/+server.ts

import { json } from '@sveltejs/kit';
import { getMetaTags } from './utils';

export const GET = async () => {
	const metatags = await getMetaTags('https://example.com');

	return json(metatags);
};

Create utils

src/routes/api/metatags/utils.ts

import he from 'he';
import { parse } from 'node-html-parser';

export const isValidUrl = (url: string) => {
	try {
		new URL(url);
		return true;
	} catch (e) {
		return false;
	}
};

export const getHtml = async (url: string) => {
	return await fetch(url, {
		headers: {
			'User-Agent': 'Metatag Bot (https://example.com)'
		}
	})
		.then((r) => r.text())
		.catch(() => null);
};

export const getHeadChildNodes = (html: string) => {
	const ast = parse(html); // parse the html into AST format with node-html-parser
	const metaTags = ast.querySelectorAll('meta').map(({ attributes }) => {
		const property = attributes.property || attributes.name || attributes.href;
		return {
			property,
			content: attributes.content
		};
	});
	const title = ast.querySelector('title')?.innerText;
	const linkTags = ast.querySelectorAll('link').map(({ attributes }) => {
		const { rel, href } = attributes;
		return {
			rel,
			href
		};
	});

	return { metaTags, title, linkTags };
};

export const getRelativeUrl = (url: string, imageUrl: string) => {
	if (!imageUrl) {
		return null;
	}
	if (isValidUrl(imageUrl)) {
		return imageUrl;
	}
	const { protocol, host } = new URL(url);
	const baseURL = `${protocol}//${host}`;
	return new URL(imageUrl, baseURL).toString();
};

export const getMetaTags = async (url: string) => {
	const html = await getHtml(url);
	if (!html) {
		return {
			title: url,
			description: 'No description',
			image: null
		};
	}
	const { metaTags, title: titleTag, linkTags } = getHeadChildNodes(html);

	let object: Record<string, string> = {};

	for (let k in metaTags) {
		let { property, content } = metaTags[k];

		// !object[property] → (meaning we're taking the first instance of a metatag and ignoring the rest)
		property && !object[property] && (object[property] = content && he.decode(content));
	}

	for (let m in linkTags) {
		let { rel, href } = linkTags[m];

		// !object[rel] → (ditto the above)
		rel && !object[rel] && (object[rel] = href);
	}

	const title = object['og:title'] || object['twitter:title'] || titleTag;

	const description =
		object['description'] || object['og:description'] || object['twitter:description'];

	const image =
		object['og:image'] ||
		object['twitter:image'] ||
		object['image_src'] ||
		object['icon'] ||
		object['shortcut icon'];

	const color = object['theme-color'] || object['msapplication-TileColor'];

	return {
		title: title || url,
		description: description || 'No description',
		image: getRelativeUrl(url, image),
		color: color || null
	};
};

TODO

  • Make sure to change the User-Agent to your own
  • Add caching to the endpoint to prevent unnecessary requests
  • Cache images to prevent unnecessary requests