fall back to GET when HEAD fails for redirect detection
some URL shorteners (e.g. amzn.eu) return 404 for HEAD requests but 301 for GET. try HEAD first, fall back to GET on 404/405. also replace the old test guard hack with proper fetchMock usage.
This commit is contained in:
@@ -29,17 +29,19 @@ export async function cleanUrl(inputUrl: string, rules: ClearURLsRules, maxRedir
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function followRedirect(url: string) {
|
function extractRedirectLocation(originalUrl: string, response: Response) {
|
||||||
// @ts-ignore - Skip redirect following in tests to avoid external HTTP calls
|
if (response.status < 300 || response.status >= 400) return null;
|
||||||
if (typeof global !== "undefined" && global.process?.env?.NODE_ENV === "test") {
|
const location = response.headers.get("Location");
|
||||||
return null;
|
if (!location) return null;
|
||||||
|
if (location.startsWith("/")) {
|
||||||
|
const base = new URL(originalUrl);
|
||||||
|
return `${base.protocol}//${base.host}${location}`;
|
||||||
}
|
}
|
||||||
|
return location;
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
async function followRedirect(url: string) {
|
||||||
const response = await fetch(url, {
|
const headers: HeadersInit = {
|
||||||
method: "HEAD",
|
|
||||||
redirect: "manual",
|
|
||||||
headers: {
|
|
||||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:143.0) Gecko/20100101 Firefox/143.0",
|
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:143.0) Gecko/20100101 Firefox/143.0",
|
||||||
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||||
"Accept-Language": "en-US,en;q=0.5",
|
"Accept-Language": "en-US,en;q=0.5",
|
||||||
@@ -51,19 +53,16 @@ async function followRedirect(url: string) {
|
|||||||
"Sec-Fetch-Site": "none",
|
"Sec-Fetch-Site": "none",
|
||||||
"Sec-Fetch-User": "?1",
|
"Sec-Fetch-User": "?1",
|
||||||
Connection: "keep-alive",
|
Connection: "keep-alive",
|
||||||
},
|
};
|
||||||
});
|
|
||||||
|
|
||||||
if (response.status >= 300 && response.status < 400) {
|
try {
|
||||||
const location = response.headers.get("Location");
|
const headResponse = await fetch(url, { method: "HEAD", redirect: "manual", headers });
|
||||||
if (location) {
|
const redirect = extractRedirectLocation(url, headResponse);
|
||||||
// Handle relative redirects
|
if (redirect) return redirect;
|
||||||
if (location.startsWith("/")) {
|
|
||||||
const baseUrl = new URL(url);
|
if (headResponse.status === 404 || headResponse.status === 405) {
|
||||||
return `${baseUrl.protocol}//${baseUrl.host}${location}`;
|
const getResponse = await fetch(url, { method: "GET", redirect: "manual", headers });
|
||||||
}
|
return extractRedirectLocation(url, getResponse);
|
||||||
return location;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
import { env, createExecutionContext, waitOnExecutionContext, SELF } from "cloudflare:test";
|
import { env, createExecutionContext, waitOnExecutionContext, fetchMock, SELF } from "cloudflare:test";
|
||||||
import { describe, it, expect, beforeAll } from "vitest";
|
import { describe, it, expect, beforeAll, beforeEach, afterEach } from "vitest";
|
||||||
|
|
||||||
import worker, { RulesCache } from "../src/index";
|
import worker, { RulesCache } from "../src/index";
|
||||||
import type { ClearURLsRules } from "../src/types";
|
import type { ClearURLsRules } from "../src/types";
|
||||||
@@ -93,6 +93,24 @@ beforeAll(() => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
describe("URL Cleaner worker", () => {
|
describe("URL Cleaner worker", () => {
|
||||||
|
beforeEach(() => {
|
||||||
|
fetchMock.activate();
|
||||||
|
fetchMock.disableNetConnect();
|
||||||
|
for (const origin of [
|
||||||
|
"https://example.com",
|
||||||
|
"https://youtube.com",
|
||||||
|
"https://amazon.com",
|
||||||
|
"https://google.com",
|
||||||
|
"https://tiktok.com",
|
||||||
|
"https://unknown-site.com",
|
||||||
|
"https://nonexistent.com",
|
||||||
|
]) {
|
||||||
|
fetchMock.get(origin).intercept({ path: /.*/, method: "HEAD" }).reply(200).persist();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
afterEach(() => fetchMock.deactivate());
|
||||||
|
|
||||||
it("cleans global tracking parameters", async () => {
|
it("cleans global tracking parameters", async () => {
|
||||||
const testUrl = "https://example.com?utm_source=test&utm_medium=email&normal=keep";
|
const testUrl = "https://example.com?utm_source=test&utm_medium=email&normal=keep";
|
||||||
const request = new IncomingRequest(`http://example.com/?url=${encodeURIComponent(testUrl)}`);
|
const request = new IncomingRequest(`http://example.com/?url=${encodeURIComponent(testUrl)}`);
|
||||||
@@ -206,3 +224,81 @@ describe("URL Cleaner worker", () => {
|
|||||||
expect(await putResponse.text()).toBe("Method not allowed");
|
expect(await putResponse.text()).toBe("Method not allowed");
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe("Redirect following", () => {
|
||||||
|
beforeEach(() => {
|
||||||
|
fetchMock.activate();
|
||||||
|
fetchMock.disableNetConnect();
|
||||||
|
});
|
||||||
|
|
||||||
|
afterEach(() => fetchMock.deactivate());
|
||||||
|
|
||||||
|
it("follows redirect when HEAD returns 3xx", async () => {
|
||||||
|
fetchMock
|
||||||
|
.get("https://short.test")
|
||||||
|
.intercept({ path: "/abc", method: "HEAD" })
|
||||||
|
.reply(301, "", { headers: { Location: "https://no-redirect.test/target" } });
|
||||||
|
|
||||||
|
const testUrl = "https://short.test/abc";
|
||||||
|
const response = await SELF.fetch(`https://example.com/?url=${encodeURIComponent(testUrl)}`);
|
||||||
|
expect(await response.text()).toBe("https://no-redirect.test/target");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("falls back to GET when HEAD returns 404", async () => {
|
||||||
|
const pool = fetchMock.get("https://amzn-short.test");
|
||||||
|
pool.intercept({ path: "/d/abc123", method: "HEAD" }).reply(404);
|
||||||
|
pool
|
||||||
|
.intercept({ path: "/d/abc123", method: "GET" })
|
||||||
|
.reply(301, "", { headers: { Location: "https://amazon.com/dp/B123?ref_=tracking&tag=mytag" } });
|
||||||
|
|
||||||
|
const testUrl = "https://amzn-short.test/d/abc123";
|
||||||
|
const response = await SELF.fetch(`https://example.com/?url=${encodeURIComponent(testUrl)}`);
|
||||||
|
expect(await response.text()).toBe("https://amazon.com/dp/B123");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("falls back to GET when HEAD returns 405", async () => {
|
||||||
|
const pool = fetchMock.get("https://head-unsupported.test");
|
||||||
|
pool.intercept({ path: "/link", method: "HEAD" }).reply(405);
|
||||||
|
pool
|
||||||
|
.intercept({ path: "/link", method: "GET" })
|
||||||
|
.reply(302, "", { headers: { Location: "https://no-redirect.test/destination" } });
|
||||||
|
|
||||||
|
const testUrl = "https://head-unsupported.test/link";
|
||||||
|
const response = await SELF.fetch(`https://example.com/?url=${encodeURIComponent(testUrl)}`);
|
||||||
|
expect(await response.text()).toBe("https://no-redirect.test/destination");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("does not fall back to GET when HEAD returns 200", async () => {
|
||||||
|
fetchMock
|
||||||
|
.get("https://no-redirect.test")
|
||||||
|
.intercept({ path: "/page?utm_source=test", method: "HEAD" })
|
||||||
|
.reply(200);
|
||||||
|
|
||||||
|
const testUrl = "https://no-redirect.test/page?utm_source=test";
|
||||||
|
const response = await SELF.fetch(`https://example.com/?url=${encodeURIComponent(testUrl)}`);
|
||||||
|
expect(await response.text()).toBe("https://no-redirect.test/page");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("cleans parameters on the redirect target", async () => {
|
||||||
|
fetchMock
|
||||||
|
.get("https://redir.test")
|
||||||
|
.intercept({ path: "/go", method: "HEAD" })
|
||||||
|
.reply(301, "", { headers: { Location: "https://youtube.com/watch?v=xyz&feature=share&si=track" } });
|
||||||
|
|
||||||
|
const testUrl = "https://redir.test/go";
|
||||||
|
const response = await SELF.fetch(`https://example.com/?url=${encodeURIComponent(testUrl)}`);
|
||||||
|
expect(await response.text()).toBe("https://youtube.com/watch?v=xyz");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("handles relative redirect locations", async () => {
|
||||||
|
const pool = fetchMock.get("https://relative.test");
|
||||||
|
pool
|
||||||
|
.intercept({ path: "/short", method: "HEAD" })
|
||||||
|
.reply(301, "", { headers: { Location: "/full/path" } });
|
||||||
|
pool.intercept({ path: "/full/path", method: "HEAD" }).reply(200);
|
||||||
|
|
||||||
|
const testUrl = "https://relative.test/short";
|
||||||
|
const response = await SELF.fetch(`https://example.com/?url=${encodeURIComponent(testUrl)}`);
|
||||||
|
expect(await response.text()).toBe("https://relative.test/full/path");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|||||||
Reference in New Issue
Block a user