chore: refactor domain parsing to be more general (#3448)

* change the api of domain parsing to just parseDomain and getCookieDomain
* adds getBaseDomain as the method to get the domain after any official subdomains
This commit is contained in:
Nan Yu
2022-05-31 18:48:23 -07:00
committed by GitHub
parent 876f788f59
commit 41e425756d
16 changed files with 216 additions and 237 deletions

View File

@@ -1,149 +1,180 @@
import { stripSubdomain, parseDomain, isCustomSubdomain } from "./domains";
import env from "@shared/env";
import { parseDomain, getCookieDomain } from "./domains";
// test suite is based on subset of parse-domain module we want to support
// https://github.com/peerigon/parse-domain/blob/master/test/parseDomain.test.js
describe("#parseDomain", () => {
beforeEach(() => {
env.URL = "https://example.com";
});
it("should remove the protocol", () => {
expect(parseDomain("http://example.com")).toMatchObject({
subdomain: "",
domain: "example",
tld: "com",
teamSubdomain: "",
host: "example.com",
custom: false,
});
expect(parseDomain("//example.com")).toMatchObject({
subdomain: "",
domain: "example",
tld: "com",
teamSubdomain: "",
host: "example.com",
custom: false,
});
expect(parseDomain("https://example.com")).toMatchObject({
subdomain: "",
domain: "example",
tld: "com",
teamSubdomain: "",
host: "example.com",
custom: false,
});
});
it("should remove sub-domains", () => {
it("should find team sub-domains", () => {
expect(parseDomain("myteam.example.com")).toMatchObject({
teamSubdomain: "myteam",
host: "myteam.example.com",
custom: false,
});
});
it("should ignore reserved sub-domains", () => {
expect(parseDomain("www.example.com")).toMatchObject({
subdomain: "www",
domain: "example",
tld: "com",
teamSubdomain: "",
host: "www.example.com",
custom: false,
});
});
it("should remove the path", () => {
expect(parseDomain("example.com/some/path?and&query")).toMatchObject({
subdomain: "",
domain: "example",
tld: "com",
teamSubdomain: "",
host: "example.com",
custom: false,
});
expect(parseDomain("example.com/")).toMatchObject({
subdomain: "",
domain: "example",
tld: "com",
teamSubdomain: "",
host: "example.com",
custom: false,
});
});
it("should remove the query string", () => {
expect(parseDomain("example.com?and&query")).toMatchObject({
subdomain: "",
domain: "example",
tld: "com",
expect(parseDomain("www.example.com?and&query")).toMatchObject({
teamSubdomain: "",
host: "www.example.com",
custom: false,
});
});
it("should remove special characters", () => {
expect(parseDomain("http://m.example.com\r")).toMatchObject({
subdomain: "m",
domain: "example",
tld: "com",
expect(parseDomain("http://example.com\r")).toMatchObject({
teamSubdomain: "",
host: "example.com",
custom: false,
});
});
it("should remove the port", () => {
expect(parseDomain("example.com:8080")).toMatchObject({
subdomain: "",
domain: "example",
tld: "com",
teamSubdomain: "",
host: "example.com",
custom: false,
});
});
it("should allow @ characters in the path", () => {
expect(parseDomain("https://medium.com/@username/")).toMatchObject({
subdomain: "",
domain: "medium",
tld: "com",
teamSubdomain: "",
host: "medium.com",
custom: true,
});
});
it("should also work with three-level domains like .co.uk", () => {
expect(parseDomain("www.example.co.uk")).toMatchObject({
subdomain: "www",
domain: "example",
tld: "co.uk",
});
});
it("should not include private domains like blogspot.com by default", () => {
it("should recognize include private domains like blogspot.com as custom", () => {
expect(parseDomain("foo.blogspot.com")).toMatchObject({
subdomain: "foo",
domain: "blogspot",
tld: "com",
teamSubdomain: "",
host: "foo.blogspot.com",
custom: true,
});
});
it("should also work with the minimum", () => {
expect(parseDomain("example.com")).toMatchObject({
subdomain: "",
domain: "example",
tld: "com",
teamSubdomain: "",
host: "example.com",
custom: false,
});
});
it("should return null if the given value is not a string", () => {
expect(parseDomain(undefined)).toBe(null);
expect(parseDomain("")).toBe(null);
it("should throw a TypeError if the given value is not a valid string", () => {
expect(() => parseDomain("")).toThrow(TypeError);
});
it("should also work with three-level domains like .co.uk", () => {
env.URL = "https://example.co.uk";
expect(parseDomain("myteam.example.co.uk")).toMatchObject({
teamSubdomain: "myteam",
host: "myteam.example.co.uk",
custom: false,
});
});
it("should work with custom top-level domains (eg .local)", () => {
expect(parseDomain("mymachine.local")).toMatchObject({
subdomain: "",
domain: "mymachine",
tld: "local",
env.URL = "mymachine.local";
expect(parseDomain("myteam.mymachine.local")).toMatchObject({
teamSubdomain: "myteam",
host: "myteam.mymachine.local",
custom: false,
});
});
it("should work with localhost", () => {
env.URL = "http://localhost:3000";
expect(parseDomain("https://localhost:3000/foo/bar?q=12345")).toMatchObject(
{
teamSubdomain: "",
host: "localhost",
custom: false,
}
);
});
it("should work with localhost subdomains", () => {
env.URL = "http://localhost:3000";
expect(parseDomain("https://www.localhost:3000")).toMatchObject({
teamSubdomain: "",
host: "www.localhost",
custom: false,
});
expect(parseDomain("https://myteam.localhost:3000")).toMatchObject({
teamSubdomain: "myteam",
host: "myteam.localhost",
custom: false,
});
});
});
describe("#stripSubdomain", () => {
test("to work with localhost", () => {
expect(stripSubdomain("localhost")).toBe("localhost");
describe("#getCookieDomain", () => {
beforeEach(() => {
env.URL = "https://example.com";
env.SUBDOMAINS_ENABLED = true;
});
test("to return domains without a subdomain", () => {
expect(stripSubdomain("example")).toBe("example");
expect(stripSubdomain("example.com")).toBe("example.com");
expect(stripSubdomain("example.org:3000")).toBe("example.org");
it("returns the normalized app host when on the host domain", () => {
expect(getCookieDomain("subdomain.example.com")).toBe("example.com");
expect(getCookieDomain("www.example.com")).toBe("example.com");
expect(getCookieDomain("http://example.com:3000")).toBe("example.com");
expect(getCookieDomain("myteam.example.com/document/12345?q=query")).toBe(
"example.com"
);
});
test("to remove subdomains", () => {
expect(stripSubdomain("test.example.com")).toBe("example.com");
expect(stripSubdomain("test.example.com:3000")).toBe("example.com");
});
});
describe("#isCustomSubdomain", () => {
test("to work with localhost", () => {
expect(isCustomSubdomain("localhost")).toBe(false);
});
test("to return false for domains without a subdomain", () => {
expect(isCustomSubdomain("example")).toBe(false);
expect(isCustomSubdomain("example.com")).toBe(false);
expect(isCustomSubdomain("example.org:3000")).toBe(false);
});
test("to return false for www", () => {
expect(isCustomSubdomain("www.example.com")).toBe(false);
expect(isCustomSubdomain("www.example.com:3000")).toBe(false);
});
test("to return true for subdomains", () => {
expect(isCustomSubdomain("test.example.com")).toBe(true);
expect(isCustomSubdomain("test.example.com:3000")).toBe(true);
it("returns the input if not on the host domain", () => {
expect(getCookieDomain("www.blogspot.com")).toBe("www.blogspot.com");
expect(getCookieDomain("anything else")).toBe("anything else");
});
it("always returns the input when subdomains are not enabled", () => {
env.SUBDOMAINS_ENABLED = false;
expect(getCookieDomain("example.com")).toBe("example.com");
expect(getCookieDomain("www.blogspot.com")).toBe("www.blogspot.com");
expect(getCookieDomain("anything else")).toBe("anything else");
});
});

View File

@@ -1,85 +1,72 @@
import { trim } from "lodash";
import env from "../env";
type Domain = {
tld: string;
subdomain: string;
domain: string;
teamSubdomain: string;
host: string;
custom: boolean;
};
// strips protocol and whitespace from input
// then strips the path and query string
function normalizeUrl(url: string) {
return trim(url.replace(/(https?:)?\/\//, "")).split(/[/:?]/)[0];
}
// The base domain is where root cookies are set in hosted mode
// It's also appended to a team's hosted subdomain to form their app URL
export function getBaseDomain() {
const normalEnvUrl = normalizeUrl(env.URL);
const tokens = normalEnvUrl.split(".");
// remove reserved subdomains like "app"
// from the env URL to form the base domain
return tokens.length > 1 && RESERVED_SUBDOMAINS.includes(tokens[0])
? tokens.slice(1).join(".")
: normalEnvUrl;
}
// we originally used the parse-domain npm module however this includes
// a large list of possible TLD's which increase the size of the bundle
// unnecessarily for our usecase of trusted input.
export function parseDomain(url?: string): Domain | null | undefined {
if (typeof url !== "string") {
return null;
}
if (url === "") {
return null;
export function parseDomain(url: string): Domain {
if (!url) {
throw new TypeError("a non-empty url is required");
}
// strip extermeties and whitespace from input
const normalizedDomain = trim(url.replace(/(https?:)?\/\//, ""));
const parts = normalizedDomain.split(".");
const host = normalizeUrl(url);
const baseDomain = getBaseDomain();
// ensure the last part only includes something that looks like a TLD
function cleanTLD(tld = "") {
return tld.split(/[/:?]/)[0];
// if the url doesn't include the base url, then it must be a custom domain
const baseUrlStart = host === baseDomain ? 0 : host.indexOf(`.${baseDomain}`);
if (baseUrlStart === -1) {
return { teamSubdomain: "", host, custom: true };
}
// simplistic subdomain parse, we don't need to take into account subdomains
// with "." characters as these are not valid in Outline
if (parts.length >= 3) {
return {
subdomain: parts[0],
domain: parts[1],
tld: cleanTLD(parts.slice(2).join(".")),
};
}
// we consider anything in front of the baseUrl to be the subdomain
const subdomain = host.substring(0, baseUrlStart);
const isReservedSubdomain = RESERVED_SUBDOMAINS.includes(subdomain);
if (parts.length === 2) {
return {
subdomain: "",
domain: parts[0],
tld: cleanTLD(parts.slice(1).join(".")),
};
}
// one-part domain handler for things like localhost
if (parts.length === 1) {
return {
subdomain: "",
domain: cleanTLD(parts.slice(0).join()),
tld: "",
};
}
return null;
return {
teamSubdomain: isReservedSubdomain ? "" : subdomain,
host,
custom: false,
};
}
export function stripSubdomain(hostname: string) {
const parsed = parseDomain(hostname);
if (!parsed) {
return hostname;
}
if (parsed.tld) {
return `${parsed.domain}.${parsed.tld}`;
}
return parsed.domain;
}
export function getCookieDomain(domain: string) {
// always use the base URL for cookies when in hosted mode
// and the domain is not custom
if (env.SUBDOMAINS_ENABLED) {
const parsed = parseDomain(domain);
export function isCustomSubdomain(hostname: string) {
const parsed = parseDomain(hostname);
if (
!parsed ||
!parsed.subdomain ||
parsed.subdomain === "app" ||
parsed.subdomain === "www"
) {
return false;
if (!parsed.custom) {
return getBaseDomain();
}
}
return true;
return domain;
}
export const RESERVED_SUBDOMAINS = [

View File

@@ -1,32 +1,32 @@
import env from "../env";
import { parseDomain } from "./domains";
const env = typeof window !== "undefined" ? window.env : process.env;
export function cdnPath(path: string): string {
return `${env.CDN_URL}${path}`;
}
// TODO: HACK: if this is called server-side, it will always return false.
// - The only call sites to this function and isExternalUrl are on the client
// - The reason this is in a shared util is because it's used in an editor plugin
// which is also in the shared code
export function isInternalUrl(href: string) {
// empty strings are never internal
if (href === "") {
return false;
}
// relative paths are always internal
if (href[0] === "/") {
return true;
}
const outline =
typeof window !== "undefined"
? parseDomain(window.location.href)
: undefined;
const parsed = parseDomain(href);
if (
parsed &&
outline &&
parsed.subdomain === outline.subdomain &&
parsed.domain === outline.domain &&
parsed.tld === outline.tld
) {
return true;
}
return false;
const domain = parseDomain(href);
return outline?.host === domain.host;
}
export function isExternalUrl(href: string) {