From 12101236849ccaea2af0299cc4ce0ac8b3a69c35 Mon Sep 17 00:00:00 2001 From: m5r Date: Sun, 8 Feb 2026 15:50:41 +0100 Subject: [PATCH] fall back to GET when HEAD fails for redirect detection some URL shorteners (e.g. amzn.eu) return 404 for HEAD requests but 301 for GET. try HEAD first, fall back to GET on 404/405. also replace the old test guard hack with proper fetchMock usage. --- src/cleaner.ts | 61 ++++++++++++++------------- test/index.spec.ts | 100 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 128 insertions(+), 33 deletions(-) diff --git a/src/cleaner.ts b/src/cleaner.ts index d0aaf60..b20d624 100644 --- a/src/cleaner.ts +++ b/src/cleaner.ts @@ -29,41 +29,40 @@ export async function cleanUrl(inputUrl: string, rules: ClearURLsRules, maxRedir } } -async function followRedirect(url: string) { - // @ts-ignore - Skip redirect following in tests to avoid external HTTP calls - if (typeof global !== "undefined" && global.process?.env?.NODE_ENV === "test") { - return null; +function extractRedirectLocation(originalUrl: string, response: Response) { + if (response.status < 300 || response.status >= 400) return null; + const location = response.headers.get("Location"); + if (!location) return null; + if (location.startsWith("/")) { + const base = new URL(originalUrl); + return `${base.protocol}//${base.host}${location}`; } + return location; +} + +async function followRedirect(url: string) { + const headers: HeadersInit = { + "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:143.0) Gecko/20100101 Firefox/143.0", + Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Accept-Language": "en-US,en;q=0.5", + "Accept-Encoding": "gzip, deflate, br, zstd", + "Sec-GPC": "1", + "Upgrade-Insecure-Requests": "1", + "Sec-Fetch-Dest": "document", + "Sec-Fetch-Mode": "navigate", + "Sec-Fetch-Site": "none", + "Sec-Fetch-User": "?1", + Connection: "keep-alive", + }; try { - const response = await fetch(url, { - method: "HEAD", - redirect: "manual", - headers: { - "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:143.0) Gecko/20100101 Firefox/143.0", - Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", - "Accept-Language": "en-US,en;q=0.5", - "Accept-Encoding": "gzip, deflate, br, zstd", - "Sec-GPC": "1", - "Upgrade-Insecure-Requests": "1", - "Sec-Fetch-Dest": "document", - "Sec-Fetch-Mode": "navigate", - "Sec-Fetch-Site": "none", - "Sec-Fetch-User": "?1", - Connection: "keep-alive", - }, - }); + const headResponse = await fetch(url, { method: "HEAD", redirect: "manual", headers }); + const redirect = extractRedirectLocation(url, headResponse); + if (redirect) return redirect; - if (response.status >= 300 && response.status < 400) { - const location = response.headers.get("Location"); - if (location) { - // Handle relative redirects - if (location.startsWith("/")) { - const baseUrl = new URL(url); - return `${baseUrl.protocol}//${baseUrl.host}${location}`; - } - return location; - } + if (headResponse.status === 404 || headResponse.status === 405) { + const getResponse = await fetch(url, { method: "GET", redirect: "manual", headers }); + return extractRedirectLocation(url, getResponse); } return null; diff --git a/test/index.spec.ts b/test/index.spec.ts index a250137..ba4176c 100644 --- a/test/index.spec.ts +++ b/test/index.spec.ts @@ -1,5 +1,5 @@ -import { env, createExecutionContext, waitOnExecutionContext, SELF } from "cloudflare:test"; -import { describe, it, expect, beforeAll } from "vitest"; +import { env, createExecutionContext, waitOnExecutionContext, fetchMock, SELF } from "cloudflare:test"; +import { describe, it, expect, beforeAll, beforeEach, afterEach } from "vitest"; import worker, { RulesCache } from "../src/index"; import type { ClearURLsRules } from "../src/types"; @@ -93,6 +93,24 @@ beforeAll(() => { }); describe("URL Cleaner worker", () => { + beforeEach(() => { + fetchMock.activate(); + fetchMock.disableNetConnect(); + for (const origin of [ + "https://example.com", + "https://youtube.com", + "https://amazon.com", + "https://google.com", + "https://tiktok.com", + "https://unknown-site.com", + "https://nonexistent.com", + ]) { + fetchMock.get(origin).intercept({ path: /.*/, method: "HEAD" }).reply(200).persist(); + } + }); + + afterEach(() => fetchMock.deactivate()); + it("cleans global tracking parameters", async () => { const testUrl = "https://example.com?utm_source=test&utm_medium=email&normal=keep"; const request = new IncomingRequest(`http://example.com/?url=${encodeURIComponent(testUrl)}`); @@ -206,3 +224,81 @@ describe("URL Cleaner worker", () => { expect(await putResponse.text()).toBe("Method not allowed"); }); }); + +describe("Redirect following", () => { + beforeEach(() => { + fetchMock.activate(); + fetchMock.disableNetConnect(); + }); + + afterEach(() => fetchMock.deactivate()); + + it("follows redirect when HEAD returns 3xx", async () => { + fetchMock + .get("https://short.test") + .intercept({ path: "/abc", method: "HEAD" }) + .reply(301, "", { headers: { Location: "https://no-redirect.test/target" } }); + + const testUrl = "https://short.test/abc"; + const response = await SELF.fetch(`https://example.com/?url=${encodeURIComponent(testUrl)}`); + expect(await response.text()).toBe("https://no-redirect.test/target"); + }); + + it("falls back to GET when HEAD returns 404", async () => { + const pool = fetchMock.get("https://amzn-short.test"); + pool.intercept({ path: "/d/abc123", method: "HEAD" }).reply(404); + pool + .intercept({ path: "/d/abc123", method: "GET" }) + .reply(301, "", { headers: { Location: "https://amazon.com/dp/B123?ref_=tracking&tag=mytag" } }); + + const testUrl = "https://amzn-short.test/d/abc123"; + const response = await SELF.fetch(`https://example.com/?url=${encodeURIComponent(testUrl)}`); + expect(await response.text()).toBe("https://amazon.com/dp/B123"); + }); + + it("falls back to GET when HEAD returns 405", async () => { + const pool = fetchMock.get("https://head-unsupported.test"); + pool.intercept({ path: "/link", method: "HEAD" }).reply(405); + pool + .intercept({ path: "/link", method: "GET" }) + .reply(302, "", { headers: { Location: "https://no-redirect.test/destination" } }); + + const testUrl = "https://head-unsupported.test/link"; + const response = await SELF.fetch(`https://example.com/?url=${encodeURIComponent(testUrl)}`); + expect(await response.text()).toBe("https://no-redirect.test/destination"); + }); + + it("does not fall back to GET when HEAD returns 200", async () => { + fetchMock + .get("https://no-redirect.test") + .intercept({ path: "/page?utm_source=test", method: "HEAD" }) + .reply(200); + + const testUrl = "https://no-redirect.test/page?utm_source=test"; + const response = await SELF.fetch(`https://example.com/?url=${encodeURIComponent(testUrl)}`); + expect(await response.text()).toBe("https://no-redirect.test/page"); + }); + + it("cleans parameters on the redirect target", async () => { + fetchMock + .get("https://redir.test") + .intercept({ path: "/go", method: "HEAD" }) + .reply(301, "", { headers: { Location: "https://youtube.com/watch?v=xyz&feature=share&si=track" } }); + + const testUrl = "https://redir.test/go"; + const response = await SELF.fetch(`https://example.com/?url=${encodeURIComponent(testUrl)}`); + expect(await response.text()).toBe("https://youtube.com/watch?v=xyz"); + }); + + it("handles relative redirect locations", async () => { + const pool = fetchMock.get("https://relative.test"); + pool + .intercept({ path: "/short", method: "HEAD" }) + .reply(301, "", { headers: { Location: "/full/path" } }); + pool.intercept({ path: "/full/path", method: "HEAD" }).reply(200); + + const testUrl = "https://relative.test/short"; + const response = await SELF.fetch(`https://example.com/?url=${encodeURIComponent(testUrl)}`); + expect(await response.text()).toBe("https://relative.test/full/path"); + }); +});