Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 70 additions & 1 deletion packages/gitbook/src/lib/data/urls.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { describe, expect, it } from 'bun:test';

import { getURLLookupAlternatives, normalizeURL } from './urls';
import { getURLLookupAlternatives, normalizeURL, decodeURLPath } from './urls';

describe('getURLLookupAlternatives', () => {
it('should return all URLs up to the root', () => {
Expand Down Expand Up @@ -406,3 +406,72 @@ describe('normalizeURL', () => {
);
});
});

describe('decodeURLPath', () => {
it('should decode encoded path components', () => {
const url = new URL('https://docs.mycompany.com/helloworld/tes%74');
const result = decodeURLPath(url);
expect(result.pathname).toBe('/helloworld/test');
expect(result.toString()).toBe('https://docs.mycompany.com/helloworld/test');
});

it('should handle multiple levels of encoding', () => {
// Double encoded: %2574 = %74 (letter t)
const url = new URL('https://docs.mycompany.com/helloworld/tes%74');
const result = decodeURLPath(url);
expect(result.pathname).toBe('/helloworld/test');

// Triple encoded: %252574 = %2574 = %74 (letter t)
const url2 = new URL('https://docs.mycompany.com/helloworld/tes%74');
const result2 = decodeURLPath(url2);
expect(result2.pathname).toBe('/helloworld/test');
});

it('should throw an error for invalid characters in the path', () => {
expect(() => {
decodeURLPath(new URL('https://docs.mycompany.com/hello:world'));
}).toThrow('URL path contains invalid characters');

expect(() => {
decodeURLPath(new URL('https://docs.mycompany.com/hello%3Btest'));
}).toThrow('URL path contains invalid characters');

expect(() => {
decodeURLPath(new URL('https://docs.mycompany.com/hello%40anchor'));
}).toThrow('URL path contains invalid characters');

//TODO: should we also throw for spaces? Maybe make an exception for spaces only?
expect(() => {
decodeURLPath(new URL('https://docs.mycompany.com/hello%20world'));
}).toThrow();
});

it('should limit decoding iterations for nested % encodings to prevent DoS', () => {
// 4 levels of encoding should throw an error
// %25252525 = %252525 = %2525 = %25 = %
const url = new URL('https://docs.mycompany.com/%25252525');
expect(() => {
decodeURLPath(url);
}).toThrow('URL path is malformed');

const deepUrl = new URL('https://docs.mycompany.com/%2525252525252525');
expect(() => {
decodeURLPath(deepUrl);
}).toThrow('URL path is malformed');
});

// TODO: should we do that actually?
it('should throw an error if the encoded path contains /', () => {
expect(() => {
decodeURLPath(new URL('https://docs.mycompany.com/hello%2Fworld'));
}).toThrow('URL path contains invalid characters');
});

it('should not decode search params or hash fragments', () => {
const url = new URL('https://docs.mycompany.com/helloworld/tes%74?query=%74est#sec%74ion');
const result = decodeURLPath(url);
expect(result.pathname).toBe('/helloworld/test');
expect(result.search).toBe('?query=%74est');
expect(result.hash).toBe('#sec%74ion');
});
})
43 changes: 43 additions & 0 deletions packages/gitbook/src/lib/data/urls.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { isProxyRootRequest } from '../proxy';
import { DataFetcherError } from './errors';

/**
* For a given GitBook URL, return a list of alternative URLs that could be matched against to lookup the content.
Expand Down Expand Up @@ -127,6 +128,48 @@ export function normalizeURL(url: URL) {
return result;
}

/**
* This function checks if the URL path contains invalid characters that should not be present in a valid URL path.
* If such characters are found, it indicates that the URL is malformed or potentially malicious and should be rejected.
* https://developer.mozilla.org/en-US/docs/Glossary/Percent-encoding
* "%" itself is excluded because it could be multiple encoding.
*/
function containInvalidURLCharacters(segment: string): boolean {
const invalidCharacters = [':', '/', '?', '#', '[', ']', '@', '!', '$', '&', "'", '(', ')', '*', '+', ',', ';', '='];
return invalidCharacters.some((char) => segment.includes(char));
}

/**
* Decode the url path component, we redirect URLs with encoded path components
* so that we don't end up with multiple URLs for the same content (especially important because of caching).
* If after decoding the path contains invalid characters, we throw an error.
* We limit the number of decoding iterations to avoid potential DoS attacks with double-encoding.
*/
export function decodeURLPath(url: URL, i = 0): URL {
const decoded = new URL(url);
decoded.pathname = url.pathname.split('/').map(segment => {
const result = decodeURIComponent(segment)
if (containInvalidURLCharacters(result)) {
throw new DataFetcherError(`URL path contains invalid characters: ${url.toString()}`, 400);
}
return result;
}).join('/');
let result = decoded;
if(decoded.pathname !== url.pathname) {
result = normalizeURL(decoded);
}
//TODO: Do we want to allow more than 3 iterations? Maybe even only 1?
// We limit to 3 iterations of decoding to avoid potential DoS attacks with double-encoding
if(result.pathname.includes('%') && i < 3) {
return decodeURLPath(result, i + 1);
}
// If we are over 3 iterations and still have encoded characters, we consider the URL invalid
if(result.pathname.includes('%') && i >= 3) {
throw new DataFetcherError(`URL path is malformed: ${url.toString()}`, 400);
}
return result;
}

/**
* Strip the search params from a URL
*/
Expand Down
7 changes: 7 additions & 0 deletions packages/gitbook/src/middleware.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import { getContentSecurityPolicy } from '@/lib/csp';
import { validateSerializedCustomization } from '@/lib/customization';
import {
DataFetcherError,
decodeURLPath,
getVisitorAuthBasePath,
lookupPublishedContentByUrl,
normalizeURL,
Expand Down Expand Up @@ -46,6 +47,12 @@ export async function middleware(request: NextRequest) {
return NextResponse.redirect(normalized.toString());
}

// If the URL path is encoded, decode it and redirect to the decoded URL
const decoded = decodeURLPath(requestURL);
if (decoded.toString() !== requestURL.toString()) {
return NextResponse.redirect(decoded.toString());
}

// Reject malicious requests
const rejectResponse = await validateServerActionRequest(request);
if (rejectResponse) {
Expand Down
Loading