๐Ÿ“ฆ kitten / multitars

๐Ÿ“„ multipartInput.ts ยท 319 lines
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319import {
  ReadableStreamBlockReader,
  bytesToSkipTable,
  readUntilBoundary,
} from './reader';
import { decodeName } from './multipartEncoding';
import { ReadableStreamLike } from './conversions';
import { MultipartHeaders, MultipartPart } from './multipartShared';

const BLOCK_SIZE = 4_096; /*4KiB*/

const CRLF = new Uint8Array([13, 10]);
const CRLF_SKIP_TABLE = bytesToSkipTable(CRLF);
const MAX_PREAMBLE_SIZE = 16_000; /*16kB*/
const MAX_HEADER_SIZE = 16_000; /*16kB*/
const MAX_HEADERS_SIZE = 32_000; /*32kB*/
const boundaryHeaderRe = /boundary="?([^=";]+)"?/i;
const encoder = new TextEncoder();

function utf8Encode(
  content: string | ArrayBufferView | ArrayBufferLike
): Uint8Array {
  return typeof content === 'string'
    ? encoder.encode(content)
    : new Uint8Array('buffer' in content ? content.buffer : content);
}

function parseContentLength(contentLength: string | undefined): number | null {
  if (contentLength) {
    const size = parseInt(contentLength, 10);
    return Number.isSafeInteger(size) && size > 0 ? size : null;
  } else {
    return null;
  }
}

interface ContentDisposition {
  name: string | null;
  filename: string | null;
}

function parseContentDisposition(
  contentDisposition: string | undefined
): ContentDisposition | null {
  let startIdx = 0,
    endIdx = -1;
  if (
    !contentDisposition ||
    (startIdx = contentDisposition.indexOf(';')) < 0 ||
    contentDisposition.slice(0, startIdx).trimEnd() !== 'form-data'
  ) {
    return null;
  }
  const disposition: ContentDisposition = { name: null, filename: null };
  do {
    endIdx = contentDisposition.indexOf(';', startIdx);
    const entry = contentDisposition.slice(
      startIdx,
      endIdx > -1 ? endIdx : undefined
    );
    startIdx = endIdx + 1;
    const equalIdx = entry.indexOf('=');
    if (equalIdx > -1) {
      const key = entry.slice(0, equalIdx).trim();
      let value = entry.slice(equalIdx + 1).trim();
      if (key !== 'name' && key !== 'filename') {
        // NOTE: We don't handle the special `filename*=UTF-8` case
        continue;
      } else if (value[0] === '"' && value[value.length - 1] === '"') {
        disposition[key] = decodeName(value.slice(1, -1));
      } else {
        disposition[key] = decodeName(value);
      }
    }
  } while (endIdx > 0);
  return disposition;
}

interface Boundary {
  /** Initial boundary at the beginning of the multipart stream after the preamble */
  raw: Uint8Array;
  rawSkipTable: Uint8Array;
  /** Trailer boundary after every multipart part.
   * @remarks
   * After every multipart part that isn't the initial one we expect a leading CRLF sequence
   */
  trailer: Uint8Array;
  trailerSkipTable: Uint8Array;
}

/** Create boundary patterns from `contentType` boundary parameter.
 * @remarks
 * - The leading boundary is `--${boundary}\r\n` followed by the first set of headers (leading CRLF is optional)
 * - Subsequent boundary trailers are `\r\n--${boundary}\r\n` followed by headers
 * - The closing boundary is `\r\n--${boundary}--\r\n`
 * Because of the variability of `--` or `\r\n` following the main boundary, we don't add it
 * to the pattern. Instead, we handle `--` in `decodeHeaders`.
 * We have two patterns because the leading CRLF is optional in the first boundary. Afterwards,
 * we use `Boundary.trailer` to include the leading CRLF to remove it from content parts.
 */
function convertToBoundaryBytes(contentType: string): Boundary {
  const boundaryHeader = contentType.match(boundaryHeaderRe);
  const boundaryRaw = `--${boundaryHeader?.[1] || '-'}`;
  const boundaryTrailer = `\r\n${boundaryRaw}`;
  const raw = utf8Encode(boundaryRaw);
  const trailer = utf8Encode(boundaryTrailer);
  return {
    raw,
    rawSkipTable: bytesToSkipTable(raw),
    trailer,
    trailerSkipTable: bytesToSkipTable(trailer),
  };
}

async function expectPreamble(
  reader: ReadableStreamBlockReader,
  boundary: Boundary
): Promise<void> {
  let byteLength = 0;
  for await (const chunk of readUntilBoundary(
    reader,
    boundary.raw,
    boundary.rawSkipTable
  )) {
    if (chunk == null) {
      throw new Error('Invalid Multipart Preamble: Unexpected EOF');
    } else if ((byteLength += chunk?.byteLength) > MAX_PREAMBLE_SIZE) {
      throw new Error(
        'Invalid Multipart Preamble: Boundary not found within the first 16kB'
      );
    }
  }
}

async function expectTrailer(
  reader: ReadableStreamBlockReader,
  boundary: Boundary
): Promise<void> {
  const chunk = await reader.pull(boundary.trailer.byteLength);
  for (let idx = 0; idx < boundary.trailer.byteLength; idx++) {
    if (chunk == null || chunk[idx] !== boundary.trailer[idx]) {
      throw new Error('Invalid Multipart Part: Expected trailing boundary');
    }
  }
}

async function decodeHeaders(
  reader: ReadableStreamBlockReader
): Promise<MultipartHeaders | null> {
  // NOTE: The characters we're decoding in headers is restricted, and we're therefore
  // more strict here. The `stream` option is also omitted below
  let byteLength = 0;
  const decoder = new TextDecoder('utf-8', { fatal: true, ignoreBOM: true });
  const headers: MultipartHeaders = Object.create(null);
  while (byteLength < MAX_HEADERS_SIZE) {
    let header = '';
    for await (const chunk of readUntilBoundary(
      reader,
      CRLF,
      CRLF_SKIP_TABLE
    )) {
      if (chunk == null) {
        throw new Error('Invalid Multipart Headers: Unexpected EOF');
      } else if (
        !byteLength &&
        !header &&
        chunk[0] === 45 /*'-'*/ &&
        chunk[1] === 45 /*'-'*/
      ) {
        // If the first chunk we receive here is `--`, this is the multipart data's closing delimiter
        // This means the multipart stream has ended
        return null;
      } else {
        header += decoder.decode(chunk);
        if (header.length > MAX_HEADER_SIZE)
          throw new Error(
            'Invalid Multipart Headers: A header exceeded its maximum length of 16kB'
          );
      }
    }

    if (header) {
      const colonIdx = header.indexOf(':');
      if (colonIdx > -1) {
        const headerName = header.slice(0, colonIdx).trim().toLowerCase();
        const headerValue = header.slice(colonIdx + 1).trim();
        if (headerValue) headers[headerName] = headerValue;
        byteLength += header.length + CRLF.byteLength;
      } else {
        throw new Error(
          'Invalid Multipart Headers: Invalid header value missing `:`'
        );
      }
    } else if (byteLength > 0) {
      // The first and last CRLF sections are expected to be empty (headerByteLength === 0)
      // since headers start and end with an additional CRLF sequence
      // Hence, if we have already seen some header bytes (byteLenght > 0) we stop decoding headers
      break;
    }
  }
  if (byteLength > MAX_HEADER_SIZE) {
    throw new Error(
      'Invalid Multipart Headers: Headers exceeded their maximum length of 32kB'
    );
  }
  return headers;
}

interface ParseMultipartParams {
  /** The `Content-Type` header value */
  contentType: string;
}

/** Provide tar entry iterator */
export async function* parseMultipart(
  stream: ReadableStreamLike<Uint8Array>,
  params: ParseMultipartParams
): AsyncGenerator<MultipartPart> {
  const boundary = convertToBoundaryBytes(params.contentType);
  const reader = new ReadableStreamBlockReader(stream, BLOCK_SIZE);
  const streamParams = new ByteLengthQueuingStrategy({ highWaterMark: 0 });

  await expectPreamble(reader, boundary);

  let headers: MultipartHeaders | null;
  while ((headers = await decodeHeaders(reader))) {
    const type = headers['content-type'];
    const disposition = parseContentDisposition(headers['content-disposition']);
    const name = disposition?.filename || disposition?.name;
    const size = parseContentLength(headers['content-length']);
    if (!name) {
      throw new Error(
        'Invalid Multipart Part: Missing Content-Disposition name or filename parameter'
      );
    }

    let reachedEnd = false;
    let remaining = 0;
    let stream: ReadableStream;
    let cancel: () => Promise<void>;
    if (size !== null) {
      // With a known size, we output a sized stream (similar to tar files)
      remaining = size;
      stream = new ReadableStream(
        {
          expectedLength: size,
          cancel: (cancel = async function cancel() {
            if (remaining > 0) {
              remaining = await reader.skip(remaining);
              if (remaining > 0)
                throw new Error('Invalid Multipart Part: Unexpected EOF');
            }
            if (!reachedEnd) {
              await expectTrailer(reader, boundary);
              reachedEnd = true;
            }
          }),
          async pull(controller) {
            if (remaining) {
              const buffer = await reader.pull(remaining);
              if (!buffer)
                throw new Error('Invalid Multipart Part: Unexpected EOF');
              remaining -= buffer.byteLength;
              controller.enqueue(buffer.slice());
            }
            if (!remaining) {
              await expectTrailer(reader, boundary);
              reachedEnd = true;
              controller.close();
            }
          },
        },
        streamParams
      );
    } else {
      // Without a size, we instead output a stream that ends at the multipart boundary
      const iterator = readUntilBoundary(
        reader,
        boundary.trailer,
        boundary.trailerSkipTable
      );
      stream = new ReadableStream(
        {
          cancel: (cancel = async function cancel() {
            for await (const chunk of iterator) {
              if (!chunk) {
                throw new Error('Invalid Multipart Part: Unexpected EOF');
              }
            }
            reachedEnd = true;
          }),
          async pull(controller) {
            const result = await iterator.next();
            if (result.done) {
              controller.close();
              reachedEnd = true;
            } else if (!result.value) {
              throw new Error('Invalid Multipart Part: Unexpected EOF');
            } else {
              controller.enqueue(result.value.slice());
            }
          },
        },
        streamParams
      );
    }

    yield new MultipartPart(stream, name, {
      type: type ?? undefined,
      size: size ?? undefined,
      headers,
    });

    if (remaining > 0 || !reachedEnd) {
      await (stream.locked ? cancel() : stream.cancel());
    }
  }
}