From 5061879649bb59df593423b1bc782dff734fd13d Mon Sep 17 00:00:00 2001 From: Abhishek Chauhan <60182103+abhu85@users.noreply.github.com> Date: Tue, 5 May 2026 22:53:16 +0530 Subject: [PATCH] fix: update fromDataURI regex to match RFC 2397 (#10829) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: update fromDataURI regex to match RFC 2397 Update the DATA_URL_PATTERN regex to correctly match all valid RFC 2397 data URIs. The previous regex required a semicolon-terminated media type segment, which rejected valid data URIs like `data:;base64,MTIz` and `data:application/octet-stream,123`. Fixes #10808 * fix: normalize omitted mediatype to text/plain per RFC 2397 When a data URI has parameters but no mediatype (e.g. data:;charset=UTF-8,...), prepend text/plain as the default per RFC 2397 section 3. * fix: use stricter RFC 2397 regex and expand test matrix - Switch to type/subtype-aware regex from #10808 - Require name=value parameters, separate ;base64 group - Add tests: charset param, URL-encoded body, Blob type preservation, datax: rejection, missing comma rejection - Normalize omitted mediatype to text/plain per RFC 2397 ยง3 * chore: apply small nits --------- Co-authored-by: Abhishek Chauhan Co-authored-by: Jay --- lib/helpers/fromDataURI.js | 23 ++++++++--- tests/unit/fromDataURI.test.js | 75 ++++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+), 5 deletions(-) diff --git a/lib/helpers/fromDataURI.js b/lib/helpers/fromDataURI.js index 989a4178..73195889 100644 --- a/lib/helpers/fromDataURI.js +++ b/lib/helpers/fromDataURI.js @@ -4,7 +4,9 @@ import AxiosError from '../core/AxiosError.js'; import parseProtocol from './parseProtocol.js'; import platform from '../platform/index.js'; -const DATA_URL_PATTERN = /^(?:([^;]+);)?(?:[^;]+;)?(base64|),([\s\S]*)$/; +// RFC 2397: data:[][;base64], +// mediatype = type/subtype followed by optional ;name=value parameters +const DATA_URL_PATTERN = /^([^,;]+\/[^,;]+)?((?:;[^,;=]+=[^,;]+)*)(;base64)?,([\s\S]*)$/; /** * Parse data uri to a Buffer or Blob @@ -33,10 +35,21 @@ export default function fromDataURI(uri, asBlob, options) { throw new AxiosError('Invalid URL', AxiosError.ERR_INVALID_URL); } - const mime = match[1]; - const isBase64 = match[2]; - const body = match[3]; - const buffer = Buffer.from(decodeURIComponent(body), isBase64 ? 'base64' : 'utf8'); + const type = match[1]; + const params = match[2]; + const encoding = match[3] ? 'base64' : 'utf8'; + const body = match[4]; + + // RFC 2397 section 3: default mediatype is text/plain;charset=US-ASCII + // Bare `data:,` leaves mime undefined; Blob normalises that to "" per spec. + let mime; + if (type) { + mime = params ? type + params : type; + } else if (params) { + mime = 'text/plain' + params; + } + + const buffer = Buffer.from(decodeURIComponent(body), encoding); if (asBlob) { if (!_Blob) { diff --git a/tests/unit/fromDataURI.test.js b/tests/unit/fromDataURI.test.js index ef4f1e37..c55172c1 100644 --- a/tests/unit/fromDataURI.test.js +++ b/tests/unit/fromDataURI.test.js @@ -10,4 +10,79 @@ describe('helpers::fromDataURI', () => { assert.deepStrictEqual(fromDataURI(dataURI, false), buffer); }); + + it('should parse data URI with no mediatype and base64', () => { + const buffer = Buffer.from('123'); + const dataURI = 'data:;base64,' + buffer.toString('base64'); + + assert.deepStrictEqual(fromDataURI(dataURI, false), buffer); + }); + + it('should parse data URI with mediatype and no base64', () => { + const buffer = Buffer.from('123'); + const dataURI = 'data:application/octet-stream,123'; + + assert.deepStrictEqual(fromDataURI(dataURI, false), buffer); + }); + + it('should parse full form data URI with text/plain and base64', () => { + const buffer = Buffer.from('hello'); + const dataURI = 'data:text/plain;base64,' + buffer.toString('base64'); + + assert.deepStrictEqual(fromDataURI(dataURI, false), buffer); + }); + + it('should parse minimal valid data URI', () => { + const buffer = Buffer.from(''); + const dataURI = 'data:,'; + + assert.deepStrictEqual(fromDataURI(dataURI, false), buffer); + }); + + it('should parse data URI with spaces in data', () => { + const buffer = Buffer.from('hello world'); + const dataURI = 'data:text/plain,hello world'; + + assert.deepStrictEqual(fromDataURI(dataURI, false), buffer); + }); + + it('should parse canonical RFC example with charset parameter', () => { + const buffer = Buffer.from('123'); + const dataURI = 'data:text/plain;charset=US-ASCII,123'; + + assert.deepStrictEqual(fromDataURI(dataURI, false), buffer); + }); + + it('should decode URL-encoded body', () => { + const buffer = Buffer.from('hello world'); + const dataURI = 'data:text/plain,hello%20world'; + + assert.deepStrictEqual(fromDataURI(dataURI, false), buffer); + }); + + it('should preserve full content type with parameters in Blob', () => { + const dataURI = 'data:text/plain;charset=utf-8;base64,' + Buffer.from('hello').toString('base64'); + const blob = fromDataURI(dataURI, true, { Blob }); + + assert.strictEqual(blob.type, 'text/plain;charset=utf-8'); + }); + + it('should normalize omitted mediatype to text/plain per RFC 2397', () => { + const dataURI = 'data:;charset=UTF-8,hello'; + const blob = fromDataURI(dataURI, true, { Blob }); + + assert.strictEqual(blob.type, 'text/plain;charset=utf-8'); + }); + + it('should reject data URI with unsupported protocol prefix', () => { + assert.throws(() => { + fromDataURI('datax:,hi', false); + }, (err) => err.code === 'ERR_NOT_SUPPORT' && err.message.includes('Unsupported protocol')); + }); + + it('should reject data URI without comma separator', () => { + assert.throws(() => { + fromDataURI('data:hi', false); + }, (err) => err.code === 'ERR_INVALID_URL'); + }); });