scry/lib/recognition/perceptualHash.ts
Chris Kruining 83ab4df537
Migrate from .NET MAUI to Expo + Convex
Complete rewrite of Scry using TypeScript stack:

- Expo/React Native with Expo Router (file-based routing)
- Convex backend (serverless functions + real-time database)
- Adaptive camera system (expo-camera in Expo Go, Vision Camera in production)
- React Native Skia + fast-opencv for image processing
- GDPR-compliant auth setup with Zitadel OIDC (pending configuration)

Key features:
- Card recognition pipeline ported to TypeScript
- Perceptual hashing (192-bit color pHash)
- CLAHE preprocessing for lighting normalization
- Local SQLite cache with Convex sync
- Collection management with offline support

Removes all .NET/MAUI code (src/, test/, tools/).

💘 Generated with Crush

Assisted-by: Claude Opus 4.5 via Crush <crush@charm.land>
2026-02-09 16:16:34 +01:00

211 lines
5.3 KiB
TypeScript

/**
* Perceptual hashing implementation using DCT (Discrete Cosine Transform).
* Computes a 192-bit (24 byte) color hash from an image.
*
* The hash is computed by:
* 1. Resizing to 32x32
* 2. For each RGB channel:
* - Apply 2D DCT
* - Extract 8x8 low-frequency coefficients (skip DC)
* - Compare each to median -> 63 bits per channel
* 3. Concatenate R, G, B hashes -> 24 bytes (192 bits)
*/
const DCT_SIZE = 32;
const HASH_SIZE = 8;
const BITS_PER_CHANNEL = 63; // 8x8 - 1 (skip DC)
/**
* Precomputed cosine values for DCT.
*/
const cosineCache: number[][] = [];
function initCosineCache(): void {
if (cosineCache.length > 0) return;
for (let i = 0; i < DCT_SIZE; i++) {
cosineCache[i] = [];
for (let j = 0; j < DCT_SIZE; j++) {
cosineCache[i][j] = Math.cos((Math.PI / DCT_SIZE) * (j + 0.5) * i);
}
}
}
/**
* Apply 2D DCT to a matrix.
*/
function applyDCT2D(matrix: number[][]): number[][] {
initCosineCache();
const result: number[][] = [];
for (let u = 0; u < DCT_SIZE; u++) {
result[u] = [];
for (let v = 0; v < DCT_SIZE; v++) {
let sum = 0;
for (let i = 0; i < DCT_SIZE; i++) {
for (let j = 0; j < DCT_SIZE; j++) {
sum += matrix[i][j] * cosineCache[u][i] * cosineCache[v][j];
}
}
const cu = u === 0 ? 1 / Math.sqrt(2) : 1;
const cv = v === 0 ? 1 / Math.sqrt(2) : 1;
result[u][v] = (cu * cv * sum) / 4;
}
}
return result;
}
/**
* Get the median of an array of numbers.
*/
function getMedian(values: number[]): number {
const sorted = [...values].sort((a, b) => a - b);
const mid = Math.floor(sorted.length / 2);
return sorted.length % 2 !== 0
? sorted[mid]
: (sorted[mid - 1] + sorted[mid]) / 2;
}
/**
* Convert a BigInt to a Uint8Array of specified length.
*/
function bigintToBytes(value: bigint, length: number): Uint8Array {
const bytes = new Uint8Array(length);
for (let i = 0; i < length; i++) {
bytes[i] = Number((value >> BigInt(i * 8)) & 0xFFn);
}
return bytes;
}
/**
* Compute hash for a single color channel.
*/
function computeChannelHash(channel: number[][]): Uint8Array {
const dct = applyDCT2D(channel);
// Extract 8x8 low-frequency coefficients, skip DC (0,0)
const lowFreq: number[] = [];
for (let i = 0; i < HASH_SIZE; i++) {
for (let j = 0; j < HASH_SIZE; j++) {
if (i === 0 && j === 0) continue; // Skip DC component
lowFreq.push(dct[i][j]);
}
}
const median = getMedian(lowFreq);
// Generate 63-bit hash
let bits = 0n;
for (let i = 0; i < lowFreq.length; i++) {
if (lowFreq[i] > median) {
bits |= 1n << BigInt(i);
}
}
return bigintToBytes(bits, 8);
}
/**
* Extract a color channel from RGBA pixel data.
* @param pixels RGBA pixel data (width * height * 4)
* @param width Image width
* @param height Image height
* @param channel 0=R, 1=G, 2=B
*/
function extractChannel(
pixels: Uint8Array | Uint8ClampedArray,
width: number,
height: number,
channel: 0 | 1 | 2
): number[][] {
const matrix: number[][] = [];
for (let y = 0; y < height; y++) {
matrix[y] = [];
for (let x = 0; x < width; x++) {
const idx = (y * width + x) * 4;
matrix[y][x] = pixels[idx + channel];
}
}
return matrix;
}
/**
* Compute a 192-bit perceptual color hash from RGBA pixel data.
* The image should already be resized to 32x32.
*
* @param pixels RGBA pixel data (32 * 32 * 4 = 4096 bytes)
* @returns 24-byte hash (8 bytes per RGB channel)
*/
export function computeColorHash(pixels: Uint8Array | Uint8ClampedArray): Uint8Array {
if (pixels.length !== DCT_SIZE * DCT_SIZE * 4) {
throw new Error(`Expected ${DCT_SIZE * DCT_SIZE * 4} bytes, got ${pixels.length}`);
}
const rChannel = extractChannel(pixels, DCT_SIZE, DCT_SIZE, 0);
const gChannel = extractChannel(pixels, DCT_SIZE, DCT_SIZE, 1);
const bChannel = extractChannel(pixels, DCT_SIZE, DCT_SIZE, 2);
const rHash = computeChannelHash(rChannel);
const gHash = computeChannelHash(gChannel);
const bHash = computeChannelHash(bChannel);
// Combine all channels
const combined = new Uint8Array(24);
combined.set(rHash, 0);
combined.set(gHash, 8);
combined.set(bHash, 16);
return combined;
}
/**
* Compute Hamming distance between two hashes.
* Lower distance = more similar.
*/
export function hammingDistance(a: Uint8Array, b: Uint8Array): number {
if (a.length !== b.length) {
throw new Error(`Hash length mismatch: ${a.length} vs ${b.length}`);
}
let distance = 0;
for (let i = 0; i < a.length; i++) {
let xor = a[i] ^ b[i];
while (xor) {
distance += xor & 1;
xor >>>= 1;
}
}
return distance;
}
/**
* Convert hash to hex string for display/storage.
*/
export function hashToHex(hash: Uint8Array): string {
return Array.from(hash)
.map((b) => b.toString(16).padStart(2, "0"))
.join("");
}
/**
* Convert hex string back to hash.
*/
export function hexToHash(hex: string): Uint8Array {
const bytes = new Uint8Array(hex.length / 2);
for (let i = 0; i < bytes.length; i++) {
bytes[i] = parseInt(hex.substr(i * 2, 2), 16);
}
return bytes;
}
// Hash algorithm version for migrations
export const HASH_VERSION = 1;
// Matching thresholds
export const MATCH_THRESHOLD = 25; // Max Hamming distance for a match
export const HASH_BITS = 192; // Total bits in hash