fall back to GET when HEAD fails for redirect detection

some URL shorteners (e.g. amzn.eu) return 404 for HEAD requests
but 301 for GET. try HEAD first, fall back to GET on 404/405.
also replace the old test guard hack with proper fetchMock usage.
This commit is contained in:
m5r
2026-02-08 15:50:41 +01:00
parent fd6dafb4db
commit 1210123684
2 changed files with 128 additions and 33 deletions

View File

@@ -29,41 +29,40 @@ export async function cleanUrl(inputUrl: string, rules: ClearURLsRules, maxRedir
} }
} }
async function followRedirect(url: string) { function extractRedirectLocation(originalUrl: string, response: Response) {
// @ts-ignore - Skip redirect following in tests to avoid external HTTP calls if (response.status < 300 || response.status >= 400) return null;
if (typeof global !== "undefined" && global.process?.env?.NODE_ENV === "test") { const location = response.headers.get("Location");
return null; if (!location) return null;
if (location.startsWith("/")) {
const base = new URL(originalUrl);
return `${base.protocol}//${base.host}${location}`;
} }
return location;
}
async function followRedirect(url: string) {
const headers: HeadersInit = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:143.0) Gecko/20100101 Firefox/143.0",
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br, zstd",
"Sec-GPC": "1",
"Upgrade-Insecure-Requests": "1",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1",
Connection: "keep-alive",
};
try { try {
const response = await fetch(url, { const headResponse = await fetch(url, { method: "HEAD", redirect: "manual", headers });
method: "HEAD", const redirect = extractRedirectLocation(url, headResponse);
redirect: "manual", if (redirect) return redirect;
headers: {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:143.0) Gecko/20100101 Firefox/143.0",
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br, zstd",
"Sec-GPC": "1",
"Upgrade-Insecure-Requests": "1",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1",
Connection: "keep-alive",
},
});
if (response.status >= 300 && response.status < 400) { if (headResponse.status === 404 || headResponse.status === 405) {
const location = response.headers.get("Location"); const getResponse = await fetch(url, { method: "GET", redirect: "manual", headers });
if (location) { return extractRedirectLocation(url, getResponse);
// Handle relative redirects
if (location.startsWith("/")) {
const baseUrl = new URL(url);
return `${baseUrl.protocol}//${baseUrl.host}${location}`;
}
return location;
}
} }
return null; return null;

View File

@@ -1,5 +1,5 @@
import { env, createExecutionContext, waitOnExecutionContext, SELF } from "cloudflare:test"; import { env, createExecutionContext, waitOnExecutionContext, fetchMock, SELF } from "cloudflare:test";
import { describe, it, expect, beforeAll } from "vitest"; import { describe, it, expect, beforeAll, beforeEach, afterEach } from "vitest";
import worker, { RulesCache } from "../src/index"; import worker, { RulesCache } from "../src/index";
import type { ClearURLsRules } from "../src/types"; import type { ClearURLsRules } from "../src/types";
@@ -93,6 +93,24 @@ beforeAll(() => {
}); });
describe("URL Cleaner worker", () => { describe("URL Cleaner worker", () => {
beforeEach(() => {
fetchMock.activate();
fetchMock.disableNetConnect();
for (const origin of [
"https://example.com",
"https://youtube.com",
"https://amazon.com",
"https://google.com",
"https://tiktok.com",
"https://unknown-site.com",
"https://nonexistent.com",
]) {
fetchMock.get(origin).intercept({ path: /.*/, method: "HEAD" }).reply(200).persist();
}
});
afterEach(() => fetchMock.deactivate());
it("cleans global tracking parameters", async () => { it("cleans global tracking parameters", async () => {
const testUrl = "https://example.com?utm_source=test&utm_medium=email&normal=keep"; const testUrl = "https://example.com?utm_source=test&utm_medium=email&normal=keep";
const request = new IncomingRequest(`http://example.com/?url=${encodeURIComponent(testUrl)}`); const request = new IncomingRequest(`http://example.com/?url=${encodeURIComponent(testUrl)}`);
@@ -206,3 +224,81 @@ describe("URL Cleaner worker", () => {
expect(await putResponse.text()).toBe("Method not allowed"); expect(await putResponse.text()).toBe("Method not allowed");
}); });
}); });
describe("Redirect following", () => {
beforeEach(() => {
fetchMock.activate();
fetchMock.disableNetConnect();
});
afterEach(() => fetchMock.deactivate());
it("follows redirect when HEAD returns 3xx", async () => {
fetchMock
.get("https://short.test")
.intercept({ path: "/abc", method: "HEAD" })
.reply(301, "", { headers: { Location: "https://no-redirect.test/target" } });
const testUrl = "https://short.test/abc";
const response = await SELF.fetch(`https://example.com/?url=${encodeURIComponent(testUrl)}`);
expect(await response.text()).toBe("https://no-redirect.test/target");
});
it("falls back to GET when HEAD returns 404", async () => {
const pool = fetchMock.get("https://amzn-short.test");
pool.intercept({ path: "/d/abc123", method: "HEAD" }).reply(404);
pool
.intercept({ path: "/d/abc123", method: "GET" })
.reply(301, "", { headers: { Location: "https://amazon.com/dp/B123?ref_=tracking&tag=mytag" } });
const testUrl = "https://amzn-short.test/d/abc123";
const response = await SELF.fetch(`https://example.com/?url=${encodeURIComponent(testUrl)}`);
expect(await response.text()).toBe("https://amazon.com/dp/B123");
});
it("falls back to GET when HEAD returns 405", async () => {
const pool = fetchMock.get("https://head-unsupported.test");
pool.intercept({ path: "/link", method: "HEAD" }).reply(405);
pool
.intercept({ path: "/link", method: "GET" })
.reply(302, "", { headers: { Location: "https://no-redirect.test/destination" } });
const testUrl = "https://head-unsupported.test/link";
const response = await SELF.fetch(`https://example.com/?url=${encodeURIComponent(testUrl)}`);
expect(await response.text()).toBe("https://no-redirect.test/destination");
});
it("does not fall back to GET when HEAD returns 200", async () => {
fetchMock
.get("https://no-redirect.test")
.intercept({ path: "/page?utm_source=test", method: "HEAD" })
.reply(200);
const testUrl = "https://no-redirect.test/page?utm_source=test";
const response = await SELF.fetch(`https://example.com/?url=${encodeURIComponent(testUrl)}`);
expect(await response.text()).toBe("https://no-redirect.test/page");
});
it("cleans parameters on the redirect target", async () => {
fetchMock
.get("https://redir.test")
.intercept({ path: "/go", method: "HEAD" })
.reply(301, "", { headers: { Location: "https://youtube.com/watch?v=xyz&feature=share&si=track" } });
const testUrl = "https://redir.test/go";
const response = await SELF.fetch(`https://example.com/?url=${encodeURIComponent(testUrl)}`);
expect(await response.text()).toBe("https://youtube.com/watch?v=xyz");
});
it("handles relative redirect locations", async () => {
const pool = fetchMock.get("https://relative.test");
pool
.intercept({ path: "/short", method: "HEAD" })
.reply(301, "", { headers: { Location: "/full/path" } });
pool.intercept({ path: "/full/path", method: "HEAD" }).reply(200);
const testUrl = "https://relative.test/short";
const response = await SELF.fetch(`https://example.com/?url=${encodeURIComponent(testUrl)}`);
expect(await response.text()).toBe("https://relative.test/full/path");
});
});