buttoned up hash and ast
This commit is contained in:
parent
b1e617e74a
commit
e88d727d8e
5 changed files with 261 additions and 255 deletions
|
@ -0,0 +1,38 @@
|
|||
import { describe, expect } from "vitest";
|
||||
import { it } from "~/test-helpers";
|
||||
import { createElement, splitBy, mergeNodes } from './ast';
|
||||
|
||||
describe('ast', () => {
|
||||
describe('createElement', () => {
|
||||
it('should ____', () => {
|
||||
// Arrange
|
||||
|
||||
// Act
|
||||
|
||||
// Assert
|
||||
expect(true).toEqual(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('splitBy', () => {
|
||||
it('should ____', () => {
|
||||
// Arrange
|
||||
|
||||
// Act
|
||||
|
||||
// Assert
|
||||
expect(true).toEqual(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('mergeNodes', () => {
|
||||
it('should ____', () => {
|
||||
// Arrange
|
||||
|
||||
// Act
|
||||
|
||||
// Assert
|
||||
expect(true).toEqual(true);
|
||||
});
|
||||
});
|
||||
});
|
|
@ -1,7 +1,7 @@
|
|||
import type { Node, Text, Parent, RootContent } from 'hast';
|
||||
import { find } from 'unist-util-find';
|
||||
import { visit } from 'unist-util-visit';
|
||||
import { hash } from './temp';
|
||||
import { hash } from './hash';
|
||||
|
||||
export const createElement = (tagName: string, children: any[], properties: object = {}) => ({ type: 'element', tagName, children, properties });
|
||||
|
||||
|
@ -14,7 +14,6 @@ export const splitBy = (tree: Parent, splitPoints: SplitPoint[]): RootContent[][
|
|||
const result: RootContent[][] = [];
|
||||
let remaining: RootContent[] = Object.hasOwn(tree, 'children') ? (tree as Parent).children : [];
|
||||
|
||||
console.log('kaas');
|
||||
// console.log(Object.groupBy(splitPoints, p => hash(p.node)));
|
||||
|
||||
for (const { node, offset } of splitPoints) {
|
||||
|
|
54
src/features/editor/hash.spec.ts
Normal file
54
src/features/editor/hash.spec.ts
Normal file
|
@ -0,0 +1,54 @@
|
|||
import { describe, expect } from "vitest";
|
||||
import { it } from "~/test-helpers";
|
||||
import { hash } from "./hash";
|
||||
|
||||
const DEFAULT_DATA = {
|
||||
prop_object: {
|
||||
is: 'some prop',
|
||||
},
|
||||
|
||||
prop_boolean: false,
|
||||
prop_bigint: 1_000_000_000_000n,
|
||||
prop_null: null,
|
||||
prop_undefined: undefined,
|
||||
prop_function: () => { },
|
||||
prop_symbol: Symbol('symbol'),
|
||||
|
||||
uint8array: new Uint8Array([0xff, 0x00, 0xff, 0x00]),
|
||||
uint32array: new Uint32Array([0xff00ff00]),
|
||||
};
|
||||
|
||||
describe('hash', () => {
|
||||
it('should hash a value with sha-1 algorithm', () => {
|
||||
// Arrange
|
||||
const expected = '6fe383b712ec74177f7714a3f5db5416accef8b';
|
||||
|
||||
// Act
|
||||
const actual = hash(DEFAULT_DATA);
|
||||
|
||||
// Assert
|
||||
expect(actual).toEqual(expected);
|
||||
});
|
||||
|
||||
it('should be stable over multiple runs', () => {
|
||||
// Arrange
|
||||
|
||||
// Act
|
||||
const run1 = hash(DEFAULT_DATA);
|
||||
const run2 = hash(DEFAULT_DATA);
|
||||
|
||||
// Assert
|
||||
expect(run1).toEqual(run2);
|
||||
});
|
||||
|
||||
// I can't seem to actually create a dataset that is large enough in order to test this.
|
||||
// So, for now, I will consider this unreachable code.
|
||||
it('should error if the input is too large', () => {
|
||||
// Arrange
|
||||
|
||||
// Act
|
||||
|
||||
// Assert
|
||||
expect(true).toEqual(true);
|
||||
});
|
||||
});
|
168
src/features/editor/hash.ts
Normal file
168
src/features/editor/hash.ts
Normal file
|
@ -0,0 +1,168 @@
|
|||
import { installIntoGlobal } from "iterator-helpers-polyfill";
|
||||
|
||||
installIntoGlobal();
|
||||
|
||||
const CHUNK_SIZE = 16;
|
||||
const UINT32_BYTE_SIZE = 4;
|
||||
const HASH_NUMBER_OF_UINT32 = 5;
|
||||
const HASH_SIZE = HASH_NUMBER_OF_UINT32 * UINT32_BYTE_SIZE;
|
||||
const initalizationVector /* 20 bytes */ = Object.freeze([0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0] as const);
|
||||
const hashKey /* 16 bytes */ = Object.freeze([0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6] as const);
|
||||
|
||||
type Word = number & {}; // union with empty object so typescript show this as 'Word' and not as 'number'
|
||||
type HashBytes = Uint32Array & { length: typeof HASH_NUMBER_OF_UINT32 };
|
||||
|
||||
export const hash = (data: any) => {
|
||||
const buffer = typeof data === 'object' && data instanceof Uint32Array ? data : new Uint32Array(toBinary(data));
|
||||
|
||||
if (!Number.isSafeInteger(buffer.length)) {
|
||||
throw new Error('Cannot hash more than 2^53 - 1 bits');
|
||||
}
|
||||
|
||||
// prepare blocks
|
||||
const output = new Uint32Array(initalizationVector) as HashBytes;
|
||||
const blocks = range(0, buffer.length, CHUNK_SIZE).map(i => {
|
||||
const view = buffer.subarray(i, i + 16);
|
||||
const words = Array<Word>(80);
|
||||
|
||||
words[0] = view[0];
|
||||
words[1] = view[1];
|
||||
words[2] = view[2];
|
||||
words[3] = view[3];
|
||||
words[4] = view[4];
|
||||
|
||||
return words;
|
||||
});
|
||||
|
||||
// apply blocks
|
||||
for (const words of blocks) {
|
||||
let [a, b, c, d, e] = output;
|
||||
|
||||
for (let i = 0; i < 80; i++) {
|
||||
if (i >= 16) {
|
||||
words[i] = circularShiftLeft(1, words[i - 3] ^ words[i - 8] ^ words[i - 14] ^ words[i - 16]);
|
||||
}
|
||||
|
||||
const tmp = (
|
||||
circularShiftLeft(a, HASH_NUMBER_OF_UINT32) +
|
||||
logicalHashFunctions(i, b, c, d) +
|
||||
e +
|
||||
words[i] +
|
||||
hashKey[Math.floor(i / HASH_SIZE)]
|
||||
);
|
||||
|
||||
e = d;
|
||||
d = c;
|
||||
c = circularShiftLeft(b, 30);
|
||||
b = a;
|
||||
a = tmp;
|
||||
}
|
||||
|
||||
output[0] = (output[0] + a) | 0;
|
||||
output[1] = (output[1] + b) | 0;
|
||||
output[2] = (output[2] + c) | 0;
|
||||
output[3] = (output[3] + d) | 0;
|
||||
output[4] = (output[4] + e) | 0;
|
||||
}
|
||||
|
||||
return output.values().map(word => (word >>> 0).toString(16)).join('');
|
||||
};
|
||||
|
||||
const circularShiftLeft = (subject: number, offset: number): number => {
|
||||
return ((subject << offset) | (subject >>> 32 - offset)) & (0xFFFFFFFF);
|
||||
};
|
||||
|
||||
const logicalHashFunctions = (index: number, b: Word, c: Word, d: Word): Word => {
|
||||
if (index < HASH_SIZE) {
|
||||
return (b & c) | (~b & d);
|
||||
}
|
||||
else if (index < (2 * HASH_SIZE)) {
|
||||
return b ^ c ^ d;
|
||||
}
|
||||
else if (index < (3 * HASH_SIZE)) {
|
||||
return (b & c) | (b & d) | (c & d);
|
||||
}
|
||||
else if (index < (4 * HASH_SIZE)) {
|
||||
return b ^ c ^ d;
|
||||
}
|
||||
|
||||
throw new Error('Unreachable code');
|
||||
};
|
||||
|
||||
const range = function* (start: number, end: number, step: number): Iterator<number> {
|
||||
for (let i = start; i <= end; i += step) {
|
||||
yield i;
|
||||
}
|
||||
};
|
||||
|
||||
const toBinary = function*<T>(data: T): Generator<number, void, unknown> {
|
||||
switch (typeof data) {
|
||||
case 'function':
|
||||
case 'symbol':
|
||||
case 'undefined':
|
||||
break;
|
||||
|
||||
case 'string':
|
||||
yield* compact(new TextEncoder().encode(data));
|
||||
break;
|
||||
|
||||
case 'number':
|
||||
yield data;
|
||||
break;
|
||||
|
||||
case 'boolean':
|
||||
yield Number(data);
|
||||
break;
|
||||
|
||||
case 'bigint':
|
||||
let value: bigint = data;
|
||||
// limit the iteration to 10 cycles.
|
||||
// This covers 10*32 bits, which in al honesty should be enough no?
|
||||
const ITERATION_LIMIT = 10;
|
||||
|
||||
for (let i = 0; i < ITERATION_LIMIT && value > 0; i++) {
|
||||
yield Number((value & 0xffffffffn));
|
||||
value >>= 32n;
|
||||
|
||||
if (i === 10) {
|
||||
throw new Error('Iteration limit in bigint serialization reached');
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case 'object':
|
||||
if (data === null) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (data instanceof Uint8Array) {
|
||||
yield* compact(data);
|
||||
}
|
||||
|
||||
if (data instanceof Uint32Array) {
|
||||
yield* data;
|
||||
}
|
||||
|
||||
for (const item of Object.values(data)) {
|
||||
yield* toBinary(item);
|
||||
}
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
const compact = function* (source: Iterable<number>): Generator<number, void, unknown> {
|
||||
let i = 0;
|
||||
let buffer = 0;
|
||||
|
||||
for (const value of source) {
|
||||
buffer |= (value & 0xff) << (8 * i);
|
||||
|
||||
if (i === 3) {
|
||||
yield buffer;
|
||||
buffer = 0;
|
||||
}
|
||||
|
||||
i = (i + 1) % 4;
|
||||
}
|
||||
};
|
||||
|
|
@ -1,253 +0,0 @@
|
|||
const bit = {
|
||||
get(subject: number, index: number) {
|
||||
return Boolean((subject >> index) & 1);
|
||||
},
|
||||
|
||||
set(subject: number, index: number, value?: boolean) {
|
||||
if (value !== undefined) {
|
||||
return this.clear(subject, index) | ((value ? 1 : 0) << index);
|
||||
}
|
||||
|
||||
return subject | (1 << index)
|
||||
},
|
||||
|
||||
clear(subject: number, index: number) {
|
||||
return subject & ~(1 << index);
|
||||
},
|
||||
|
||||
toggle(subject: number, index: number) {
|
||||
return subject ^ (1 << index);
|
||||
},
|
||||
};
|
||||
|
||||
interface BitArray {
|
||||
[index: number]: boolean;
|
||||
length: number;
|
||||
}
|
||||
|
||||
const ITEM_BIT_SIZE = 64;
|
||||
const createBitArray = (data: boolean[] = []) => {
|
||||
const store: number[] = [];
|
||||
const populated: number[] = [];
|
||||
let length = 0;
|
||||
|
||||
const parseIndex = (key: string) => {
|
||||
const value = Number.parseInt(key);
|
||||
|
||||
if (Number.isNaN(value) || !Number.isFinite(value)) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
return value;
|
||||
};
|
||||
|
||||
const convert = (index: number) => [
|
||||
Math.floor(index / ITEM_BIT_SIZE),
|
||||
index % ITEM_BIT_SIZE,
|
||||
] as const;
|
||||
|
||||
const get = (index: number) => {
|
||||
if (index >= length) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const [arrayIndex, bitIndex] = convert(index);
|
||||
|
||||
if (bit.get(populated[arrayIndex], bitIndex) === false) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
return bit.get(store[arrayIndex], bitIndex);
|
||||
}
|
||||
|
||||
const set = (index: number, value: boolean) => {
|
||||
const [arrayIndex, bitIndex] = convert(index);
|
||||
|
||||
store[arrayIndex] = bit.set((store[arrayIndex] ?? 0), bitIndex, value);
|
||||
populated[arrayIndex] = bit.set((populated[arrayIndex] ?? 0), bitIndex);
|
||||
length = Math.max(length, index + 1);
|
||||
};
|
||||
|
||||
const clear = (index: number) => {
|
||||
const [arrayIndex, bitIndex] = convert(index);
|
||||
|
||||
// I think I can skip the store because it is covered by the populated list
|
||||
// store[arrayIndex] = bit.set((store[arrayIndex] ?? 0), bitIndex, false);
|
||||
populated[arrayIndex] = bit.set((populated[arrayIndex] ?? 0), bitIndex, false);
|
||||
length = Math.max(length, index);
|
||||
}
|
||||
|
||||
// initial population of array
|
||||
for (const [i, v] of data.entries()) {
|
||||
set(i, v);
|
||||
}
|
||||
|
||||
return new Proxy<BitArray>([], {
|
||||
get(target, property, receiver) {
|
||||
if (property === Symbol.species) {
|
||||
return 'BitArray'
|
||||
}
|
||||
|
||||
if (typeof property === 'symbol') {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const index = parseIndex(property);
|
||||
|
||||
if (index) {
|
||||
console.log(store.map(i => i.toString(2)), populated.map(i => i.toString(2)));
|
||||
|
||||
return get(index);
|
||||
}
|
||||
|
||||
console.log(property, index);
|
||||
},
|
||||
|
||||
set(target, property, value, receiver) {
|
||||
if (typeof property === 'symbol') {
|
||||
return false;
|
||||
}
|
||||
|
||||
const index = parseIndex(property);
|
||||
|
||||
if (index) {
|
||||
if (typeof value !== 'boolean') {
|
||||
throw new Error(`Only able to set boolean values on indices, received '${typeof value}' instead`)
|
||||
}
|
||||
|
||||
set(index, value);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
},
|
||||
|
||||
deleteProperty(target, property) {
|
||||
if (typeof property === 'symbol') {
|
||||
return false;
|
||||
}
|
||||
|
||||
const index = parseIndex(property);
|
||||
|
||||
if (index) {
|
||||
clear(index);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
},
|
||||
});
|
||||
};
|
||||
|
||||
const BLOCK_SIZE = 512;
|
||||
const CHUNK_SIZE = 16;
|
||||
const UINT32_BYTE_SIZE = 4;
|
||||
const HASH_NUMBER_OF_UINT32 = 5;
|
||||
const HASH_SIZE = HASH_NUMBER_OF_UINT32 * UINT32_BYTE_SIZE;
|
||||
const initalizationVector /* 20 bytes */ = [0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0] as const;
|
||||
const hashKey /* 16 bytes */ = [0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6] as const;
|
||||
|
||||
type Word = number & {}; // union with empty object so typescript show this as 'Word' and not as 'number'
|
||||
type Chunk = Iterable<Word> & { length: typeof HASH_NUMBER_OF_UINT32 };
|
||||
type HashBytes = Uint32Array & { length: typeof HASH_NUMBER_OF_UINT32 };
|
||||
|
||||
const _hash = (data: string | Uint8Array | Uint32Array) => {
|
||||
// Normalize data to byte array
|
||||
if (typeof data === 'string') {
|
||||
data = new TextEncoder().encode(data);
|
||||
}
|
||||
|
||||
// Normalize to Uint32Array
|
||||
if (data instanceof Uint8Array) {
|
||||
data = new Uint32Array(data.buffer, data.byteOffset, data.byteLength / 4);
|
||||
}
|
||||
|
||||
if (!Number.isSafeInteger(data.length)) {
|
||||
throw new Error('Cannot hash more than 2^53 - 1 bits');
|
||||
}
|
||||
|
||||
// prepare blocks
|
||||
const output = new Uint32Array(initalizationVector) as HashBytes;
|
||||
const blocks = range(0, data.length, CHUNK_SIZE, true).map(i => {
|
||||
const view = data.subarray(i, i + 16);
|
||||
const words = Array<Word>(80);
|
||||
|
||||
words[0] = view[0];
|
||||
words[1] = view[1];
|
||||
words[2] = view[2];
|
||||
words[3] = view[3];
|
||||
words[4] = view[4];
|
||||
|
||||
return words;
|
||||
});
|
||||
|
||||
// apply blocks
|
||||
for (const words of blocks) {
|
||||
let [a, b, c, d, e] = output;
|
||||
|
||||
for (const index of range(0, 80)) {
|
||||
if (index >= 16) {
|
||||
words[index] = circularShiftLeft(1, words[index - 3] ^ words[index - 8] ^ words[index - 14] ^ words[index - 16]);
|
||||
}
|
||||
|
||||
const tmp = (
|
||||
circularShiftLeft(a, HASH_NUMBER_OF_UINT32) +
|
||||
logicalHashFunctions(index, b, c, d) +
|
||||
e +
|
||||
words[index] +
|
||||
hashKey[Math.floor(index / HASH_SIZE)]
|
||||
);
|
||||
|
||||
e = d;
|
||||
d = c;
|
||||
c = circularShiftLeft(b, 30);
|
||||
b = a;
|
||||
a = tmp;
|
||||
}
|
||||
|
||||
output[0] = (output[0] + a) | 0;
|
||||
output[1] = (output[1] + b) | 0;
|
||||
output[2] = (output[2] + c) | 0;
|
||||
output[3] = (output[3] + d) | 0;
|
||||
output[4] = (output[4] + e) | 0;
|
||||
}
|
||||
|
||||
return output.values().map(word => (word >>> 0).toString(16)).join('');
|
||||
};
|
||||
|
||||
const circularShiftLeft = (subject: number, offset: number): number => {
|
||||
return ((subject << offset) | (subject >>> 32 - offset)) & (0xFFFFFFFF);
|
||||
};
|
||||
|
||||
const logicalHashFunctions = (index: number, b: Word, c: Word, d: Word): Word => {
|
||||
if (index < HASH_SIZE) {
|
||||
return (b & c) | (~b & d);
|
||||
}
|
||||
else if (index < (2 * HASH_SIZE)) {
|
||||
return b ^ c ^ d;
|
||||
}
|
||||
else if (index < (3 * HASH_SIZE)) {
|
||||
return (b & c) | (b & d) | (c & d);
|
||||
}
|
||||
else if (index < (4 * HASH_SIZE)) {
|
||||
return b ^ c ^ d;
|
||||
}
|
||||
|
||||
throw new Error('Unreachable code');
|
||||
};
|
||||
|
||||
const range = function* (start: number, end: number, step: number = 1, inclusive: boolean = false): Iterator<number> {
|
||||
for (let i = start; inclusive ? (i <= end) : (i < end); i += (step ?? 1)) {
|
||||
yield i;
|
||||
}
|
||||
};
|
||||
|
||||
export const hash = (data: any): string => {
|
||||
if (typeof data === 'string' || (typeof data === 'object' && (data instanceof Uint8Array || data instanceof Uint32Array))) {
|
||||
return _hash(data);
|
||||
}
|
||||
|
||||
return _hash(JSON.stringify(data));
|
||||
};
|
Loading…
Add table
Add a link
Reference in a new issue