diff --git a/README.md b/README.md index 904696a..884a02e 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,31 @@ It's an implementation based on the `ustar` format. This package only provides low-level API's. +### Usage + +```ts +// packing +import { createPack, createExtract } from 'tar-mini' + +const pack = createPack() + +pack.add(new Uint8Array(512), { + // options +}) + +pack.done() + +// extracting + +const extract = createExtract() + +extract.on('entry', (head, file) => { + // todo +}) + +pack.receiver.pipe(extract.receiver) +``` + ### Sponsors

diff --git a/__tests__/header.spec.ts b/__tests__/header.spec.ts index 87a017f..18a9de6 100644 --- a/__tests__/header.spec.ts +++ b/__tests__/header.spec.ts @@ -1,5 +1,5 @@ import { describe, expect, it } from 'vitest' -import { ERROR_MESSAGES, F_MODE, TypeFlag, encode } from '../src' +import { ERROR_MESSAGES, F_MODE, TypeFlag, decodePax, decode as decodeTar, encode, encodePax } from '../src' import type { EncodingHeadOptions } from '../src' function randomDir(len: number) { @@ -57,7 +57,7 @@ describe('Headers', () => { const decode = decoder.decode.bind(decoder) const mtime = Math.floor(Date.now() / 1000) it('Normal', () => { - const header = { + const header = { name: 'foo.tsx', uid: 0, gid: 0, @@ -70,9 +70,8 @@ describe('Headers', () => { mode: F_MODE, uname: 'nonzzz', gname: 'admin' - - } - + } + const block = encode(header) expect(block.length).toBe(512) expect(decode(block.subarray(0, 100)).replace(/\0+$/, '')).toBe('foo.tsx') @@ -80,7 +79,7 @@ describe('Headers', () => { expect(decode(block.subarray(297, 297 + 32)).replace(/\0+$/, '')).toBe('admin') }) it('Directory', () => { - const header = { + const header = { name: 'nao', uid: 0, gid: 0, @@ -101,7 +100,7 @@ describe('Headers', () => { }) it('Long Name File But Not Direcotry', () => { const filename = 'a'.repeat(98) + '.tsx' - const header = { + const header = { name: filename, uid: 0, gid: 0, @@ -121,7 +120,7 @@ describe('Headers', () => { const dir = randomDir(100) const filename = 'nonzzz.tsx' const { prefix, name } = getPrefixAndName(dir + filename) - const header = { + const header = { name: dir + filename, uid: 0, gid: 0, @@ -135,12 +134,39 @@ describe('Headers', () => { uname: 'nonzzz', gname: 'admin' } - + const block = encode(header) expect(block.length).toBe(512) expect(decode(block.subarray(0, 100)).replace(/\0+$/, '')).toBe(name) expect(decode(block.subarray(345, 345 + 155)).replace(/\0+$/, '')).toBe(prefix) }) + it('Large File', () => { + const size = Math.pow(2, 33) + const header = { + name: 'nonzzz.tsx', + uid: 0, + gid: 0, + size, + mtime, + typeflag: TypeFlag.AREG_TYPE, + linkname: '', + devmajor: 0, + devminor: 0, + mode: F_MODE, + uname: 'nonzzz', + gname: 'admin' + } + const block = encode(header) + const { size: decodeSize } = decodeTar(block) + expect(decodeSize).toBe(size) + }) + it('Pax Header', () => { + const binary = encodePax({ name: 'nonzzz.tsx', linkname: '1', pax: { kanno: 'hello world' } }) + const pax = decodePax(binary) + expect(pax.path).toBe('nonzzz.tsx') + expect(pax.kanno).toBe('hello world') + expect(pax.linkpath).toBe('1') + }) }) }) }) diff --git a/__tests__/stream.spec.ts b/__tests__/stream.spec.ts index bc05e85..286bc09 100644 --- a/__tests__/stream.spec.ts +++ b/__tests__/stream.spec.ts @@ -48,6 +48,8 @@ describe('Stream', () => { }) } + pack.done() + extract.on('entry', (head, file) => { const content = assets[head.name] expect(content).toBe(textDecode.decode(file)) diff --git a/dprint.json b/dprint.json index 5f99566..0c26d79 100644 --- a/dprint.json +++ b/dprint.json @@ -1,18 +1,31 @@ { "json": { }, + "lineWidth": 140, + "typescript": { + "semiColons": "asi", + "indentWidth": 2, + "quoteStyle": "preferSingle", + "useTabs": false, + "trailingCommas": "never", + "module.sortImportDeclarations": "maintain", + "importDeclaration.sortNamedImports": "maintain", + "operatorPosition": "maintain", + "jsx.quoteStyle": "preferDouble", + "jsx.bracketPosition": "maintain", + "functionDeclaration.spaceBeforeParentheses": false + }, "markdown": { }, "toml": { }, "excludes": [ "**/node_modules", - "**/*-lock.json", - ".yarn/*" + "**/*-lock.json" ], "plugins": [ - "https://plugins.dprint.dev/json-0.19.3.wasm", - "https://plugins.dprint.dev/markdown-0.17.1.wasm", - "https://plugins.dprint.dev/toml-0.6.2.wasm" + "https://plugins.dprint.dev/typescript-0.90.5.wasm", + "https://plugins.dprint.dev/json-0.19.2.wasm", + "https://plugins.dprint.dev/markdown-0.17.0.wasm" ] } diff --git a/src/head.ts b/src/head.ts index c56a28c..8ec19d2 100644 --- a/src/head.ts +++ b/src/head.ts @@ -1,3 +1,4 @@ +/* eslint-disable stylistic/indent */ /* eslint-disable no-labels */ // https://www.gnu.org/software/tar/manual/html_node/Standard.html // https://www.gnu.org/software/tar/manual/html_node/Portability.html#Portability @@ -23,6 +24,11 @@ export interface Head { prefix: uint8 // 155 } +export interface PaxHead extends Head { + atime: uint8 + linkpath: uint8 +} + export const Mode = { TS_UID: 0o4000, TS_GID: 0o2000, @@ -39,10 +45,10 @@ export const Mode = { } as const export const F_MODE = Mode.TU_READ | Mode.TU_WRITE | Mode.TG_READ | Mode.TO_READ - + export const D_MODE = Mode.TU_READ | Mode.TU_WRITE | Mode.TU_EXEC | Mode.TG_READ | Mode.TG_EXEC | Mode.TO_READ | - Mode.TO_EXEC - + Mode.TO_EXEC + export type Mode = typeof Mode[keyof typeof Mode] export const TypeFlag = { @@ -54,43 +60,56 @@ export const TypeFlag = { BLK_TYPE: '4', DIR_TYPE: '5', FIFO_TYPE: '6', - CONT_TYPE: '7' + CONT_TYPE: '7', + // For Pax + XHD_TYPE: 'x', + XGL_TYPE: 'g' } as const - + export type TypeFlag = typeof TypeFlag[keyof typeof TypeFlag] export const Magic = { T_MAGIC: 'ustar', T_VERSION: '00', WHITE_SPACE: 32, // ascii code + EQ_CHAR: 61, // ascii code NULL_CHAR: 0, // ascii code + NEW_LINE: 10, // ascii code NEGATIVE_256: 0xFF, POSITIVE_256: 0x80 } export interface EncodingHeadOptions { name: string - mode: number, - uid: number, - gid: number, - size: number, - mtime: number, - typeflag: TypeFlag, - linkname?: string, - uname?: string, - gname?: string, - devmajor: number, - devminor: number, + mode: number + uid: number + gid: number + size: number + mtime: number + typeflag: TypeFlag + linkname?: string + uname?: string + gname?: string + devmajor: number + devminor: number +} + +export interface EncodingHeadPaxOptions { + name: string + linkname: string + pax?: Record } +export type EncodingHeadOptionsWithPax = EncodingHeadOptions & Pick + export interface DecodingHeadOptions { filenameEncoding?: string } export const ERROR_MESSAGES = { - INVALID_ENCODING_NAME: 'Invalid name. Invalid name. Please check \'name\' is a directory type.', - INVALID_ENCODING_NAME_LEN: 'Invalid name. Please check \'name\' length is less than 255 byte.', - INVALID_ENCODING_LINKNAME: 'Invalid linkname. Please check \'linkname\' length is less than 100 byte.', + INVALID_ENCODING_NAME: "Invalid name. Invalid name. Please check 'name' is a directory type.", + INVALID_ENCODING_NAME_LEN: "Invalid name. Please check 'name' length is less than 255 byte.", + INVALID_ENCODING_LINKNAME: "Invalid linkname. Please check 'linkname' length is less than 100 byte.", INVALID_BASE256: 'Invalid base256 format', INVALID_OCTAL_FORMAT: 'Invalid octal format', NOT_INIT: 'Not init', @@ -99,7 +118,7 @@ export const ERROR_MESSAGES = { // For most scens. format ustar is useful, but when we meet the large file, we should fallback to the old gnu format. -const enc =/* @__PURE__ */ new TextEncoder() +const enc = /* @__PURE__ */ new TextEncoder() const encodeString = enc.encode.bind(enc) @@ -127,8 +146,9 @@ function encodeOctal(b: number, fixed?: number) { // https://www.gnu.org/software/tar/manual/html_node/Extensions.html function parse256(b: Uint8Array) { const positive = b[0] === Magic.POSITIVE_256 ? true : false - return b.reduceRight((acc, cur, i) => { - return acc += cur * Math.pow(256, b.length - i - 1) + return b.slice(1).reduceRight((acc, cur, i) => { + const byte = positive ? cur : Magic.NEGATIVE_256 - cur + return acc += byte * Math.pow(256, b.length - i - 2) }, 0) * (positive ? 1 : -1) } @@ -167,6 +187,23 @@ function chksum(b: Uint8Array) { }, 0) } +// "%d %s=%s\n", , , +function paxTemplate(keyword: string, value: string) { + const template = ' ' + keyword + '=' + value + '\n' + const binary = encodeString(template) + return (binary.length + String(binary.length).length) + template +} + +// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_03 +// Encode implements the Basic ustar format. But when the size is over 2^33, it will fallback to the posix pax format. + +// | ustar Header[typeFlag=g] | +// | Global Extended Header Data | +// | ustar Header[typeFlag=x] | +// | Extended Header Data | +// | ustar Header[typeFlag=0] | +// | File Data | +// ... export function encode(options: EncodingHeadOptions) { const block = new Uint8Array(512) let name = options.name @@ -175,7 +212,7 @@ export function encode(options: EncodingHeadOptions) { } let prefix = '' let invalidate = false - loop: + loop: while (name.length > 100) { const spec = name.indexOf('/') switch (spec) { @@ -193,6 +230,7 @@ export function encode(options: EncodingHeadOptions) { if (invalidate) { throw new Error(ERROR_MESSAGES.INVALID_ENCODING_NAME) } + const binaryName = encodeString(name) if (binaryName.length + prefix.length > 255) { throw new Error(ERROR_MESSAGES.INVALID_ENCODING_NAME_LEN) @@ -206,13 +244,19 @@ export function encode(options: EncodingHeadOptions) { block.set(encodeString(encodeOctal(options.gid, 6)), 116) // size - - if (encodeOctal(options.size).length > 11) { - throw new Error('Invalid size. Please check \'size\' is less than 8 byte.') + // octal max is 7777777... + if (options.size.toString(8).length > 11) { + let s = options.size + const bb = [Magic.POSITIVE_256] + for (let i = 11; i > 0; i--) { + bb[i] = s & Magic.NEGATIVE_256 + s = Math.floor(s / 256) + } + block.set(bb, 124) + } else { + block.set(encodeString(encodeOctal(options.size, 11)), 124) } - block.set(encodeString(encodeOctal(options.size, 11)), 124) - block.set(encodeString(encodeOctal(options.mtime, 11)), 136) block.set(encodeString(options.typeflag), 156) @@ -242,6 +286,18 @@ export function encode(options: EncodingHeadOptions) { return block } +export function encodePax(options: EncodingHeadPaxOptions) { + let p = '' + p += paxTemplate('path', options.name) + p += paxTemplate('linkpath', options.linkname) + if (options.pax && typeof options.pax === 'object') { + for (const key in options.pax) { + p += paxTemplate(key, options.pax[key]) + } + } + return encodeString(p) +} + const defaultDecodeOptions = { filenameEncoding: 'utf-8' } @@ -256,18 +312,24 @@ export function decode(b: Uint8Array, options?: DecodingHeadOptions) { const size = decodeOctal(b, 124, 12) const mtime = decodeOctal(b, 136, 12) // convert as enum - let typeflag = b[156] === 0 ? TypeFlag.AREG_TYPE : (b[156] - 48) + '' as unknown as TypeFlag + let typeflag = b[156] === 0 + ? TypeFlag.AREG_TYPE + : b[156] === 120 + ? TypeFlag.XHD_TYPE + : b[156] === 103 + ? TypeFlag.XGL_TYPE + : (b[156] - 48) + '' as unknown as TypeFlag const linkname = b[157] === Magic.NULL_CHAR ? null : decodeString(b, 157, 100, filenameEncoding) const uname = decodeString(b, 265, 32) const gname = decodeString(b, 297, 32) const devmajor = decodeOctal(b, 329, 8) const devminor = decodeOctal(b, 337, 8) - const c = chksum(b) + const c = chksum(b) if (c === 256) throw new Error(ERROR_MESSAGES.NOT_INIT) if (c !== decodeOctal(b, 148, 8)) { throw new Error(ERROR_MESSAGES.INVALID_CHKSUM) } - // + // if (Magic.T_MAGIC === decodeString(b, 257, 6)) { if (b[345]) { name = decodeString(b, 345, 155, filenameEncoding) + '/' + name @@ -277,7 +339,7 @@ export function decode(b: Uint8Array, options?: DecodingHeadOptions) { if (typeflag === TypeFlag.REG_TYPE && name[name.length - 1] === '/') { typeflag = TypeFlag.DIR_TYPE } - + return { name, mode, @@ -291,6 +353,45 @@ export function decode(b: Uint8Array, options?: DecodingHeadOptions) { gname, devmajor, devminor - } } + +export function decodePax(b: Uint8Array) { + const pax: Record = {} + const matrix: Array = [] + let cap = b.length + let line = 0 + if (!matrix[line]) { + matrix[line] = [] + } + + let start = 0 + while (cap > 0) { + matrix[line].push(b[start]) + if (b[start] === Magic.NEW_LINE) { + if (start + 1 === b.length) break + line++ + matrix[line] = [] + start++ + continue + } + start++ + cap-- + } + + for (let i = 0; i < matrix.length; i++) { + const item = matrix[i] + let pos = 0 + while (item[pos] !== Magic.WHITE_SPACE && pos < item.length) { + pos++ + } + const bb = new Uint8Array(item) + const len = parseInt(decodeString(bb, 0, pos)) - 1 + const content = bb.subarray(pos + 1, len) + const eqPos = content.indexOf(Magic.EQ_CHAR) + Object.assign(pax, { + [decodeString(content, 0, eqPos)]: decodeString(content, eqPos + 1, content.length) + }) + } + return pax +} diff --git a/src/list.ts b/src/list.ts index 43950b8..8529d60 100644 --- a/src/list.ts +++ b/src/list.ts @@ -1,5 +1,5 @@ /* eslint-disable no-use-before-define */ -class Elt { +class Elt { items: Array pos: number mask: number @@ -43,7 +43,7 @@ export class List { length: number constructor(cap: number = 16) { this.cap = cap - this.length = 0 + this.length = 0 this.head = new Elt(this.cap) this.tail = this.head } @@ -55,7 +55,7 @@ export class List { this.head = prev.next } this.head.push(elt) - this.length++ + this.length++ } shift() { diff --git a/src/stream.ts b/src/stream.ts index 9e94386..dbf7079 100644 --- a/src/stream.ts +++ b/src/stream.ts @@ -1,12 +1,13 @@ import { Readable, Writable } from 'stream' import type { ReadableOptions, WritableOptions } from 'stream' -import { F_MODE, TypeFlag, encode } from './head' -import { DecodingHeadOptions, EncodingHeadOptions, decode } from './head' +import { F_MODE, TypeFlag, decode, decodePax, encode, encodePax } from './head' +import type { DecodingHeadOptions, EncodingHeadOptions, EncodingHeadOptionsWithPax } from './head' import { List, createList } from './list' import { noop } from './shared' export type PackOptions = Partial> & { filename: string + pax?: Record } function createReadbleStream(options?: ReadableOptions) { @@ -18,7 +19,7 @@ function createWriteableStream(options?: WritableOptions) { } const PACK_ERROR_MESSAGES = { - HAS_DONE: 'Can\'t add new entry after calling done()' + HAS_DONE: "Can't add new entry after calling done()" } // New archives should be created using REGTYPE. @@ -41,7 +42,7 @@ export class Pack { this.finished = false } - private resolveHeadOptions(size: number, options: PackOptions): EncodingHeadOptions { + private resolveHeadOptions(size: number, options: PackOptions): EncodingHeadOptionsWithPax { const { filename, ...rest } = options return { ...defaultPackOptions, ...rest, name: filename, mtime: Math.floor(Date.now() / 1000), size } @@ -67,12 +68,24 @@ export class Pack { if (padding > 0) this.reader.push(new Uint8Array(padding)) } - private transport(binary: Uint8Array, resolvedOptions: EncodingHeadOptions) { + private transport(binary: Uint8Array, resolvedOptions: EncodingHeadOptionsWithPax) { + const consume = (chunk: Uint8Array) => { + if (resolvedOptions.pax) { + const paxHead = encodePax({ name: resolvedOptions.name, linkname: resolvedOptions.linkname || '', pax: { ...resolvedOptions.pax } }) + const head = encode({ ...resolvedOptions, name: 'PaxHeader', typeflag: TypeFlag.XHD_TYPE, size: paxHead.length }) + this.reader.push(head) + this.reader.push(paxHead) + this.reader.push(this.fix(paxHead.length)) + resolvedOptions.name = 'PaxHeader' + } + this.reader.push(encode(resolvedOptions)) + this.reader.push(chunk) + } + const writer = createWriteableStream({ write: (chunk, _, callback) => { try { - this.reader.push(encode(resolvedOptions)) - this.reader.push(chunk) + consume(chunk) callback() } catch (error) { callback(error as Error) @@ -92,9 +105,6 @@ export class Pack { } get receiver() { - if (!this.finished) { - this.done() - } return this.reader } } @@ -111,7 +121,7 @@ class FastBytes { private queue: List bytesLen: number insertedBytesLen: number - + constructor() { this.queue = createList() this.bytesLen = 0 @@ -127,7 +137,7 @@ class FastBytes { shift(size: number) { if (size > this.bytesLen) { throw new Error(FAST_BYTES_ERROR_MESSAGES.EXCEED_BYTES_LEN) - } + } if (size === 0) { return new Uint8Array(0) } @@ -158,6 +168,8 @@ export class Extract { private flag: boolean private elt: Uint8Array | null private total: number + private isPax: boolean + private paxMeta: Record constructor(options: DecodingHeadOptions) { this.decodeOptions = options this.matrix = new FastBytes() @@ -167,6 +179,8 @@ export class Extract { this.offset = 0 this.elt = null this.total = 0 + this.isPax = false + this.paxMeta = Object.create(null) this.writer = createWriteableStream({ write: (chunk, _, callback) => { this.matrix.push(chunk) @@ -190,6 +204,18 @@ export class Extract { const decodeHead = () => { try { this.head = decode(this.matrix.shift(512), this.decodeOptions) + if (this.head.typeflag === TypeFlag.XHD_TYPE) { + this.isPax = true + return true + } + if (Object.keys(this.paxMeta).length > 0) { + this.head.name = this.paxMeta.path + this.head.linkname = this.paxMeta.linkpath + // @ts-expect-error + this.head.pax = { ...this.paxMeta } + this.paxMeta = Object.create(null) + } + this.missing = this.head.size this.elt = new Uint8Array(this.head.size) this.flag = true @@ -211,13 +237,25 @@ export class Extract { return } this.elt!.set(this.matrix.shift(this.missing), this.offset) - this.total += this.elt!.length + 512 this.writer.emit('entry', this.head, this.elt!) this.flag = false } + const handlePax = () => { + const c = this.matrix.shift(this.head.size) + const paxHead = decodePax(c) + this.paxMeta = { ...this.paxMeta, ...paxHead } + this.total += this.head.size + 512 + this.isPax = false + } + while (this.matrix.bytesLen > 0) { + if (this.isPax) { + handlePax() + continue + } + if (this.flag) { consume() continue