got to a nice point, right now I can do bi-directional transformations, and also have my spelling and grammar error markers that are also cleaned up so they don't end up in the source text

This commit is contained in:
Chris Kruining 2025-02-14 16:19:48 +11:00
parent 8e0eee5847
commit f4d59b30f5
No known key found for this signature in database
GPG key ID: EB894A3560CCCAD2
20 changed files with 414 additions and 279 deletions

View file

@ -48,9 +48,16 @@ export function Grid(props: { class?: string, rows: Entry[], locales: string[],
id: lang,
label: lang,
renderer: ({ row, column, value, mutate }) => {
const entry = rows.value[row]!;
const lang = String(column);
const { key } = rows.value[row]!;
return <TextArea row={row} key={entry.key} lang={String(column)} value={value ?? ''} oninput={e => mutate(e.data ?? '')} />;
return <Textarea
class={css.textarea}
value={value ?? ''}
lang={lang}
oninput={next => mutate(next)}
placeholder={`${key} in ${lang}`}
/>
},
}))
]);
@ -95,7 +102,7 @@ export function Grid(props: { class?: string, rows: Entry[], locales: string[],
return <GridComp data={rows} columns={columns()} api={setApi} />;
};
const TextArea: Component<{ row: number, key: string, lang: string, value: string, oninput?: (event: InputEvent) => any }> = (props) => {
const TextArea: Component<{ row: number, key: string, lang: string, value: string, oninput?: (next: string) => any }> = (props) => {
return <Textarea
class={css.textarea}
value={props.value}

View file

@ -7,7 +7,7 @@ interface Contents extends Map<string, Map<string, string>> { }
export const read = (file: File): Promise<Map<string, string> | undefined> => {
switch (file.type) {
case 'application/json': return json.load(file.stream())
case 'application/json': return json.load(file.stream());
default: return Promise.resolve(undefined);
}

View file

@ -1,3 +1,4 @@
import { decode } from "~/utilities";
export async function load(stream: ReadableStream<Uint8Array>): Promise<Map<string, string>> {
return new Map(await Array.fromAsync(parse(stream), ({ key, value }) => [key, value]));
@ -67,7 +68,7 @@ const states = {
expect: 'key',
});
next.entry = { key: path.join('.'), value: token.value };
next.entry = { key: path.join('.'), value: decode(token.value) };
return next
}

View file

@ -1,6 +1,4 @@
export type { Source } from './source';
export { createParser as createHtmlParser } from './parser/html';
export { createParser as createMarkdownParser } from './parser/markdown';
export { createSource } from './source';

View file

@ -1,28 +0,0 @@
enum Decoration {
None = 0,
Bold = 1,
Italic = 2,
Underline = 4,
StrikeThrough = 8,
}
interface TextNode {
type: 'text';
decoration: Decoration;
nodes: (string | Node)[];
}
interface HeaderNode {
type: 'header';
nodes: Node[];
}
type Node = TextNode | HeaderNode;
export interface RichTextAST {
nodes: Node[];
}
export interface Parser {
parse(source: string): RichTextAST;
}

View file

@ -1,9 +0,0 @@
import { Parser } from "../parser";
export function createParser(): Parser {
return {
parse(value) {
return {};
},
};
}

View file

@ -1,80 +0,0 @@
import { Parser } from "../parser";
export function createParser(): Parser {
return {
parse(source) {
// console.log(source);
for (const token of tokenize(source)) {
console.log(token);
}
return {
nodes: [],
};
},
};
}
// const states = {
// none(): State {
// },
// } as const;
type Token = { start: number, length: number } & (
| { kind: 'bold' }
| { kind: 'italic' }
| { kind: 'underline' }
| { kind: 'strikethrough' }
| { kind: 'header', level: number }
| { kind: 'text', value: string }
);
function* tokenize(characters: string): Generator<Token, void, unknown> {
let buffer: string = '';
let clearBuffer = false;
let start = 0;
let i = 0;
for (const character of characters) {
if (buffer.length === 0) {
start = i;
}
buffer += character;
const length = buffer.length;
if (buffer === '**') {
yield { kind: 'bold', start, length };
clearBuffer = true;
}
else if (buffer === '') {
yield { kind: 'italic', start, length };
clearBuffer = true;
}
else if (buffer === ':') {
yield { kind: 'underline', start, length };
clearBuffer = true;
}
else if (buffer === ':') {
yield { kind: 'strikethrough', start, length };
clearBuffer = true;
}
else if (buffer.length > 1 && buffer.startsWith('#') && buffer.endsWith(' ')) {
yield { kind: 'header', start, length, level: buffer.length - 1 };
clearBuffer = true;
}
else if (buffer.length > 1 && buffer.startsWith('"') && buffer.endsWith('"')) {
yield { kind: 'text', start, length, value: buffer.slice(1, buffer.length - 1) };
clearBuffer = true;
}
if (clearBuffer) {
buffer = '';
clearBuffer = false;
}
i++;
}
}

View file

@ -1,31 +1,136 @@
import { createEffect, createSignal, Signal } from "solid-js";
import { Parser, RichTextAST } from "./parser";
import { onMount } from "solid-js";
import { createStore } from "solid-js/store";
import { unified, Transformer } from 'unified'
import { Node, Text, Element } from 'hast'
import { visit } from "unist-util-visit";
import remarkParse from 'remark-parse'
import remarkRehype from 'remark-rehype'
import remarkStringify from 'remark-stringify'
import rehypeParse from 'rehype-dom-parse'
import rehypeRemark from 'rehype-remark'
import rehypeStringify from 'rehype-dom-stringify'
export interface Source<TIn extends Parser, TOut extends Parser> {
readonly in: Signal<string>;
readonly out: Signal<string>;
export interface Source {
in: string;
out: string;
}
export function createSource<TIn extends Parser, TOut extends Parser>(inParser: TIn, outParser: TOut, initalValue: string): Source<TIn, TOut> {
const [inValue, setIn] = createSignal<string>(initalValue);
const [outValue, setOut] = createSignal<string>('');
// TODO :: make this configurable, right now we can only do markdown <--> html.
const inToOutProcessor = unified().use(remarkParse).use(remarkRehype).use(addErrors).use(rehypeStringify);
const outToInProcessor = unified().use(rehypeParse).use(clearErrors).use(rehypeRemark).use(remarkStringify, { bullet: '-' });
const [ast, setAst] = createSignal<RichTextAST>();
export function createSource(initalValue: string): Source {
const [store, setStore] = createStore({ in: initalValue, out: '' });
createEffect(() => {
setAst(inParser.parse(inValue()));
});
createEffect(() => {
setAst(outParser.parse(outValue()));
onMount(() => {
setStore('out', String(inToOutProcessor.processSync(initalValue)));
});
return {
get in() {
return [inValue, setIn] as Signal<string>;
return store.in;
},
set in(next) {
setStore({
in: next,
out: String(inToOutProcessor.processSync(next)),
});
},
get out() {
return [outValue, setOut] as Signal<string>;
return store.out;
},
set out(next) {
setStore({
in: String(outToInProcessor.processSync(next)).trim(),
out: next,
});
},
};
}
function addErrors(): Transformer {
const wrapInMarker = (text: Text, type: string): Element => ({
type: 'element',
tagName: 'span',
properties: {
dataMarker: type,
},
children: [
text
]
});
return function (tree) {
visit(tree, n => n.type === 'text', (n, i, p: Element) => {
if (typeof i !== 'number' || p === undefined) {
return;
}
const errors = grammarChecker(n.value, 'en-GB');
if (errors.length === 0) {
return;
}
p.children.splice(i, 1, ...errors.map(([isHit, value]) => {
const textNode: Text = { type: 'text', value };
return isHit ? wrapInMarker(textNode, 'grammar') : textNode;
}))
});
visit(tree, n => n.type === 'text', (n, i, p: Element) => {
if (typeof i !== 'number' || p === undefined) {
return;
}
const errors = spellChecker(n.value, 'en-GB');
if (errors.length === 0) {
return;
}
p.children.splice(i, 1, ...errors.map(([isHit, value]) => {
const textNode: Text = { type: 'text', value };
return isHit ? wrapInMarker(textNode, 'spelling') : textNode;
}))
});
}
}
function clearErrors(): Transformer {
const test = (n: Node) => n.type === 'element' && Object.hasOwn(n.properties, 'dataMarker');
return function (tree) {
visit(tree, test, (n, i, p: Element) => {
if (typeof i !== 'number' || p === undefined) {
return;
}
p.children.splice(i, 1, ...n.children);
})
}
}
const spellChecker = checker(/\w+/gi);
const grammarChecker = checker(/\w+\s+\w+/gi);
function checker(regex: RegExp) {
return (subject: string, lang: string): (readonly [boolean, string])[] => {
let lastIndex = 0;
return Array.from<RegExpExecArray>(subject.matchAll(regex)).filter(() => Math.random() >= .5).flatMap<readonly [boolean, string]>(({ 0: match, index }) => {
const end = index + match.length;
const result = [
[false, subject.slice(lastIndex, index)],
[true, subject.slice(index, end)],
] as const;
lastIndex = end;
return result;
}).concat([[false, subject.slice(lastIndex, subject.length)]]);
}
}