2
0
mirror of https://github.com/tenrok/axios.git synced 2026-06-17 19:21:29 +03:00

fix: update fromDataURI regex to match RFC 2397 (#10829)

* fix: update fromDataURI regex to match RFC 2397

Update the DATA_URL_PATTERN regex to correctly match all valid RFC 2397
data URIs. The previous regex required a semicolon-terminated media type
segment, which rejected valid data URIs like `data:;base64,MTIz` and
`data:application/octet-stream,123`.

Fixes #10808

* fix: normalize omitted mediatype to text/plain per RFC 2397

When a data URI has parameters but no mediatype (e.g. data:;charset=UTF-8,...),
prepend text/plain as the default per RFC 2397 section 3.

* fix: use stricter RFC 2397 regex and expand test matrix

- Switch to type/subtype-aware regex from #10808
- Require name=value parameters, separate ;base64 group
- Add tests: charset param, URL-encoded body, Blob type
  preservation, datax: rejection, missing comma rejection
- Normalize omitted mediatype to text/plain per RFC 2397 §3

* chore: apply small nits

---------

Co-authored-by: Abhishek Chauhan <abhishek-chauhan@outlook.com>
Co-authored-by: Jay <jasonsaayman@gmail.com>
This commit is contained in:
Abhishek Chauhan
2026-05-05 22:53:16 +05:30
committed by GitHub
parent 78e8dcf875
commit 5061879649
2 changed files with 93 additions and 5 deletions
+18 -5
View File
@@ -4,7 +4,9 @@ import AxiosError from '../core/AxiosError.js';
import parseProtocol from './parseProtocol.js';
import platform from '../platform/index.js';
const DATA_URL_PATTERN = /^(?:([^;]+);)?(?:[^;]+;)?(base64|),([\s\S]*)$/;
// RFC 2397: data:[<mediatype>][;base64],<data>
// mediatype = type/subtype followed by optional ;name=value parameters
const DATA_URL_PATTERN = /^([^,;]+\/[^,;]+)?((?:;[^,;=]+=[^,;]+)*)(;base64)?,([\s\S]*)$/;
/**
* Parse data uri to a Buffer or Blob
@@ -33,10 +35,21 @@ export default function fromDataURI(uri, asBlob, options) {
throw new AxiosError('Invalid URL', AxiosError.ERR_INVALID_URL);
}
const mime = match[1];
const isBase64 = match[2];
const body = match[3];
const buffer = Buffer.from(decodeURIComponent(body), isBase64 ? 'base64' : 'utf8');
const type = match[1];
const params = match[2];
const encoding = match[3] ? 'base64' : 'utf8';
const body = match[4];
// RFC 2397 section 3: default mediatype is text/plain;charset=US-ASCII
// Bare `data:,` leaves mime undefined; Blob normalises that to "" per spec.
let mime;
if (type) {
mime = params ? type + params : type;
} else if (params) {
mime = 'text/plain' + params;
}
const buffer = Buffer.from(decodeURIComponent(body), encoding);
if (asBlob) {
if (!_Blob) {
+75
View File
@@ -10,4 +10,79 @@ describe('helpers::fromDataURI', () => {
assert.deepStrictEqual(fromDataURI(dataURI, false), buffer);
});
it('should parse data URI with no mediatype and base64', () => {
const buffer = Buffer.from('123');
const dataURI = 'data:;base64,' + buffer.toString('base64');
assert.deepStrictEqual(fromDataURI(dataURI, false), buffer);
});
it('should parse data URI with mediatype and no base64', () => {
const buffer = Buffer.from('123');
const dataURI = 'data:application/octet-stream,123';
assert.deepStrictEqual(fromDataURI(dataURI, false), buffer);
});
it('should parse full form data URI with text/plain and base64', () => {
const buffer = Buffer.from('hello');
const dataURI = 'data:text/plain;base64,' + buffer.toString('base64');
assert.deepStrictEqual(fromDataURI(dataURI, false), buffer);
});
it('should parse minimal valid data URI', () => {
const buffer = Buffer.from('');
const dataURI = 'data:,';
assert.deepStrictEqual(fromDataURI(dataURI, false), buffer);
});
it('should parse data URI with spaces in data', () => {
const buffer = Buffer.from('hello world');
const dataURI = 'data:text/plain,hello world';
assert.deepStrictEqual(fromDataURI(dataURI, false), buffer);
});
it('should parse canonical RFC example with charset parameter', () => {
const buffer = Buffer.from('123');
const dataURI = 'data:text/plain;charset=US-ASCII,123';
assert.deepStrictEqual(fromDataURI(dataURI, false), buffer);
});
it('should decode URL-encoded body', () => {
const buffer = Buffer.from('hello world');
const dataURI = 'data:text/plain,hello%20world';
assert.deepStrictEqual(fromDataURI(dataURI, false), buffer);
});
it('should preserve full content type with parameters in Blob', () => {
const dataURI = 'data:text/plain;charset=utf-8;base64,' + Buffer.from('hello').toString('base64');
const blob = fromDataURI(dataURI, true, { Blob });
assert.strictEqual(blob.type, 'text/plain;charset=utf-8');
});
it('should normalize omitted mediatype to text/plain per RFC 2397', () => {
const dataURI = 'data:;charset=UTF-8,hello';
const blob = fromDataURI(dataURI, true, { Blob });
assert.strictEqual(blob.type, 'text/plain;charset=utf-8');
});
it('should reject data URI with unsupported protocol prefix', () => {
assert.throws(() => {
fromDataURI('datax:,hi', false);
}, (err) => err.code === 'ERR_NOT_SUPPORT' && err.message.includes('Unsupported protocol'));
});
it('should reject data URI without comma separator', () => {
assert.throws(() => {
fromDataURI('data:hi', false);
}, (err) => err.code === 'ERR_INVALID_URL');
});
});