diff --git a/.gitignore b/.gitignore index b282bac..f614a28 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,8 @@ # tools .vscode/ -.idea/ +.idea/* -!.idea/runConfigurations/* +!.idea/runConfigurations/ # builds lib/ lib.es2015/ diff --git a/.idea/runConfigurations/Wallaby.xml b/.idea/runConfigurations/Wallaby.xml new file mode 100644 index 0000000..983ade3 --- /dev/null +++ b/.idea/runConfigurations/Wallaby.xml @@ -0,0 +1,5 @@ + + + + + \ No newline at end of file diff --git a/README.md b/README.md index f982f77..7961c12 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,33 @@ console.log(serialization.encode([{a: "k", b: 5}, {a: "c", b: 9}])); // Ak5c9 ## Tiny compressor: can only compress 1 character values, but produces tiny payloads, +## Tiny Number Compressor +This compressor only numbers by serializing into base 36 values saving some space, +But in order to serialize and deserialize, it requires to know the number of spaces to allocate in advance. + +Tiny number compressor accepts a key to char length factory, which is to provide number of spaces to allocate for a given key, + +For our example, let's say our object looks like this: +`{a: 100, b: 1, c: 2}` and we know for a fact that b and c won't go above the value 35, then we can use the following + +```typescript +import {Searilie, ValueType} from "./src/Searilie"; +import {TinyNumberCompressor} from "./src/adapters/TinyNumberCompressor" +const tinyNumberCompressor = new TinyNumberCompressor((key) => key === "a" ? 2 : 1); +const searlie = new Searilie(tinyNumberCompressor); +searlie.encode([{a: 100, b: 25, c: 9}]); // C2sp9 +searlie.decode("C2sp9", {a: ValueType.Number, b: ValueType.Number, c: ValueType.Number}); // [{a: 100, b: 25, c: 9}] +``` + +#### Choosing correct space values: +Number of spaces are determined by ((36 ^ N) - 1) `(36 ** n) - 1` where N is number of spaces, for a quick references here are first 5 values: + +- n = 1 gets you from 0 - 35 +- n = 2 gets you from 0 - 1295 +- n = 3 gets you from 0 - 46655 +- n = 4 gets you from 0 - 1679615 +- n = 5 gets you from 0 - 60466175 + ## CSVCompressor separates data using , and ; producing larger payloads, but it can support more than 1 character payloads: ```typescript diff --git a/src/adapters/TinyCompressor.ts b/src/adapters/TinyCompressor.ts index bea6605..4673718 100644 --- a/src/adapters/TinyCompressor.ts +++ b/src/adapters/TinyCompressor.ts @@ -1,8 +1,8 @@ import {IAdapter, IObject, ISchema, TIdentifier, ValueType} from "../Searilie"; +import {chunkText} from "../utils/ChunkText"; import {Validator} from "../validation/Validator"; export class TinyCompressor implements IAdapter { - private static isValid(object: IObject[]): boolean { return Validator.validateArray(object, (value) => value.toString().length === 1); } @@ -11,11 +11,6 @@ export class TinyCompressor implements IAdapter { return Object.keys(obj).sort().map((x) => obj[x]).join(""); } - private static chunkText(text: string, length: number): string[] { - const regexChunk = new RegExp(`.{1,${length}}`, "g"); - return text.match(regexChunk)!; - } - private static parseText(text: string, schema: ISchema): IObject { const object: IObject = {}; Object.keys(schema).sort().forEach((key: string, index: number) => { @@ -33,7 +28,7 @@ export class TinyCompressor implements IAdapter { if (text.length % numOfKeysInSchema !== 0) { throw new Error("invalid text"); } - const parts = TinyCompressor.chunkText(text, numOfKeysInSchema); + const parts = chunkText(text, numOfKeysInSchema); return parts.map((part) => TinyCompressor.parseText(part, schema)); } diff --git a/src/adapters/TinyNumberCompressor.spec.ts b/src/adapters/TinyNumberCompressor.spec.ts new file mode 100644 index 0000000..700e3fc --- /dev/null +++ b/src/adapters/TinyNumberCompressor.spec.ts @@ -0,0 +1,70 @@ +import {ValueType} from "../Searilie"; +import {TinyNumberCompressor} from "./TinyNumberCompressor"; + +describe("TinyNumberCompressor", () => { + it("should be defined", () => { + expect(TinyNumberCompressor).toBeDefined(); + }); + + it("[important] should return an identifier", () => { + // DO NOT CHANGE THIS TEST CASE once we change the identifier, all old values can't be deserialized + const adapter = new TinyNumberCompressor(jest.fn()); + expect(adapter.getIdentifier()).toBe("C"); + }); + + describe("constructor", () => { + it("should accept keyLengthFactoryFunction", () => { + expect(() => new TinyNumberCompressor(jest.fn())).not.toThrow(); + }); + }); + + describe("validation", () => { + it("should throw error for string values", () => { + const mockFn = jest.fn(); + const tinyNumberCompressor = new TinyNumberCompressor(mockFn); + expect(() => tinyNumberCompressor.serialize([{a: "something"}])).toThrow(); + expect(() => tinyNumberCompressor.serialize([{a: "s"}])).toThrow(); + }); + + it("should throw error if number doesn't fit in given number of spaces", () => { + const mockFn = jest.fn(() => 1); + const tinyNumberCompressor = new TinyNumberCompressor(mockFn); + expect(() => tinyNumberCompressor.serialize([{a: 36}])).toThrow(); + expect(() => tinyNumberCompressor.serialize([{a: 35}])).not.toThrow(); + expect(mockFn).toHaveBeenCalled(); + // if mock returns 2 it can store upto 1295 + mockFn.mockImplementation(() => 2); + expect(() => tinyNumberCompressor.serialize([{a: 36}])).not.toThrow(); + expect(() => tinyNumberCompressor.serialize([{a: 1295}])).not.toThrow(); + expect(() => tinyNumberCompressor.serialize([{a: 1296}])).toThrow(); + }); + }); + describe("TinyNumberCompressor encoding", () => { + it("should be able to encode data", () => { + const mockFn = jest.fn(() => 1); + const tinyNumberCompressor = new TinyNumberCompressor(mockFn); + expect(tinyNumberCompressor.serialize([])).toBe(""); + expect(tinyNumberCompressor.serialize([{a: 29, b: 18, c: 23, d: 34}])).toBe("tiny"); + mockFn.mockImplementation(() => 2); + expect(tinyNumberCompressor.serialize([{a: 29, b: 18, c: 23, d: 34}])).toBe("0t0i0n0y"); + mockFn.mockImplementation((...args: any[]) => args[0] === "a" ? 2 : 1); + expect(tinyNumberCompressor.serialize([{a: 29, b: 18, c: 23, d: 34}])).toBe("0tiny"); + expect(tinyNumberCompressor.serialize([{a: 630, b: 16, c: 17, d: 5}])).toBe("high5"); + }); + }); + describe("deserialization", () => { + it("should throw error if length is mismatched", () => { + const tinyNumberCompressor = new TinyNumberCompressor(jest.fn(() => 2)); + // we have 2 spaces, if we pass 1 or 3 character, it should be invalid + expect(() => tinyNumberCompressor.deserialize("s", {a: ValueType.Number})).toThrow("invalid data"); + expect(() => tinyNumberCompressor.deserialize("dog", {a: ValueType.Number})).toThrow("invalid data"); + expect(() => tinyNumberCompressor.deserialize("dog", {a: ValueType.Number, b: ValueType.Number})).toThrow("invalid data"); + expect(() => tinyNumberCompressor.deserialize("dogs", {a: ValueType.Number, b: ValueType.Number})).not.toThrow("invalid data"); + }); + it("should deserialize correctly", () => { + const tinyNumberCompressor = new TinyNumberCompressor(jest.fn(() => 2)); + // we have 2 spaces, if we pass 1 or 3 character, it should be invalid + expect(tinyNumberCompressor.deserialize("dogs", {a: ValueType.Number, b: ValueType.Number})).toStrictEqual([{a: 492, b: 604}]); + }); + }); +}); diff --git a/src/adapters/TinyNumberCompressor.ts b/src/adapters/TinyNumberCompressor.ts new file mode 100644 index 0000000..0c4d0b0 --- /dev/null +++ b/src/adapters/TinyNumberCompressor.ts @@ -0,0 +1,72 @@ +import {IAdapter, IObject, ISchema, TIdentifier} from "../Searilie"; +import {chunkText, ExtractText} from "../utils/ChunkText"; +import {Validator} from "../validation/Validator"; + +type KeyLengthFactory = (key: string) => number; + +export class TinyNumberCompressor implements IAdapter { + constructor(private keyLengthFactory: KeyLengthFactory) { + this.encodeObject = this.encodeObject.bind(this); + } + + public static getMaxNumberForNumberOfCharacters(numOfChars: number): number { + return (36 ** numOfChars) - 1; + } + + public deserialize(text: string, schema: ISchema): IObject[] { + const charLengthForSchema = this.getCharLengthForSchema(schema); + if (text.length % charLengthForSchema !== 0) { + throw new Error("invalid data"); + } + // chunk items + const chunks = chunkText(text, charLengthForSchema); + // each chunk needs to be deserialized and returned + // this means it's valid + return chunks.map((x) => this.decodeChunk(x, schema)); + } + + public getIdentifier(): TIdentifier { + return "C"; + } + + public serialize(object: IObject[]): string { + // validate first, + if (!this.isValid(object)) { + throw new Error("invalid data"); + } + return object.map(this.encodeObject).join(""); + } + + private getCharLengthForSchema(schema: ISchema): number { + return Object.keys(schema).map((x) => this.keyLengthFactory(x)).reduce((a, b) => a + b, 0); + } + + private encodeObject(object: IObject): string { + return Object.keys(object).sort().map((x) => { + const length = this.keyLengthFactory(x); + return (object[x] as number).toString(36).padStart(length, "0"); + }).join(""); + } + + private isValid(object: IObject[]): boolean { + return Validator.validateArray(object, (value, key) => { + if (typeof value !== "number") { + return false; + } + const spaceAllocatedForKey = this.keyLengthFactory(key); + const maxValueForKeySize = TinyNumberCompressor.getMaxNumberForNumberOfCharacters(spaceAllocatedForKey); + return value <= maxValueForKeySize; + }); + } + + private decodeChunk(chunk: string, schema: ISchema): IObject { + const object: IObject = {}; + const textExtractor = new ExtractText(chunk); + const keys = Object.keys(schema).sort(); + for (const key of keys) { + const length = this.keyLengthFactory(key); + object[key] = parseInt(textExtractor.extract(length), 36); + } + return object; + } +} diff --git a/src/utils/ChunkText.spec.ts b/src/utils/ChunkText.spec.ts new file mode 100644 index 0000000..c12550b --- /dev/null +++ b/src/utils/ChunkText.spec.ts @@ -0,0 +1,18 @@ +import {chunkText, ExtractText} from "./ChunkText"; + +describe("ChunkText", () => { + it("should be able to chunk text into parts", () => { + expect(chunkText("abcdef", 3)).toStrictEqual(["abc", "def"]); + expect(chunkText("abcde", 3)).toStrictEqual(["abc", "de"]); + }); +}); + +describe("ExtractText", () => { + it("should be able to extract one by one", () => { + const textExtractor = new ExtractText("testText"); + expect(textExtractor.extract(2)).toBe("te"); + expect(textExtractor.extract(2)).toBe("st"); + expect(textExtractor.extract(1)).toBe("T"); + expect(textExtractor.extract(5)).toBe("ext"); + }); +}); diff --git a/src/utils/ChunkText.ts b/src/utils/ChunkText.ts new file mode 100644 index 0000000..c56d59d --- /dev/null +++ b/src/utils/ChunkText.ts @@ -0,0 +1,14 @@ +export const chunkText = (text: string, length: number): string[] => { + const regexChunk = new RegExp(`.{1,${length}}`, "g"); + return text.match(regexChunk)!; +}; + +export class ExtractText { + constructor(private text: string) { + } + public extract(length: number): string { + const text = this.text.substr(0, length); + this.text = this.text.substr(length); + return text; + } +} diff --git a/src/validation/Validator.ts b/src/validation/Validator.ts index adc1a11..9d2b491 100644 --- a/src/validation/Validator.ts +++ b/src/validation/Validator.ts @@ -2,12 +2,12 @@ import {IObject} from "../Searilie"; const yes = () => true; -type TValueValidator = (value: string | number) => boolean; +type TValueValidator = (value: string | number, key: string) => boolean; // tslint:disable-next-line:no-unnecessary-class export class Validator { public static isSupported(object: IObject, valueValidator: TValueValidator = yes): boolean { return Object.keys(object).every((x) => { - return (typeof object[x] === "string" || typeof object[x] === "number") && valueValidator(object[x]); + return (typeof object[x] === "string" || typeof object[x] === "number") && valueValidator(object[x], x); }); } public static validateArray(object: IObject[], valueValidator?: TValueValidator): boolean { diff --git a/tsconfig.json b/tsconfig.json index 44a5c27..14331ca 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -3,7 +3,7 @@ "target": "ES5", "module": "commonjs", "esModuleInterop": true, - "lib": ["es5", "scripthost", "dom", "es6", "es7"], + "lib": ["es5", "scripthost", "dom", "es6", "es7", "es2017"], "downlevelIteration": true, "noImplicitAny": true, "noImplicitReturns": true,