416 lines
11 KiB
JavaScript
416 lines
11 KiB
JavaScript
/**
|
|
* Handles non-English input and generates the final suggestion list.
|
|
* Translates localized words to English before parsing, then converts
|
|
* suggestion labels back to the user's language for display.
|
|
*/
|
|
|
|
import {
|
|
WEEKDAY_MAP,
|
|
MONTH_MAP,
|
|
UNIT_MAP,
|
|
WORD_NUMBER_MAP,
|
|
RELATIVE_DAY_MAP,
|
|
TIME_OF_DAY_MAP,
|
|
sanitize,
|
|
stripNoise,
|
|
normalizeDigits,
|
|
} from './tokenMaps';
|
|
|
|
import { parseDateFromText } from './parser';
|
|
import { buildSuggestionCandidates, MAX_SUGGESTIONS } from './suggestions';
|
|
|
|
// ─── English Reference Data ─────────────────────────────────────────────────
|
|
|
|
const EN_WEEKDAYS_LIST = [
|
|
'monday',
|
|
'tuesday',
|
|
'wednesday',
|
|
'thursday',
|
|
'friday',
|
|
'saturday',
|
|
'sunday',
|
|
];
|
|
|
|
const EN_MONTHS_LIST = [
|
|
'january',
|
|
'february',
|
|
'march',
|
|
'april',
|
|
'may',
|
|
'june',
|
|
'july',
|
|
'august',
|
|
'september',
|
|
'october',
|
|
'november',
|
|
'december',
|
|
];
|
|
|
|
const EN_DEFAULTS = {
|
|
UNITS: {
|
|
MINUTE: 'minute',
|
|
MINUTES: 'minutes',
|
|
HOUR: 'hour',
|
|
HOURS: 'hours',
|
|
DAY: 'day',
|
|
DAYS: 'days',
|
|
WEEK: 'week',
|
|
WEEKS: 'weeks',
|
|
MONTH: 'month',
|
|
MONTHS: 'months',
|
|
YEAR: 'year',
|
|
YEARS: 'years',
|
|
},
|
|
RELATIVE: {
|
|
TOMORROW: 'tomorrow',
|
|
DAY_AFTER_TOMORROW: 'day after tomorrow',
|
|
NEXT_WEEK: 'next week',
|
|
NEXT_MONTH: 'next month',
|
|
THIS_WEEKEND: 'this weekend',
|
|
NEXT_WEEKEND: 'next weekend',
|
|
},
|
|
TIME_OF_DAY: {
|
|
MORNING: 'morning',
|
|
AFTERNOON: 'afternoon',
|
|
EVENING: 'evening',
|
|
NIGHT: 'night',
|
|
NOON: 'noon',
|
|
MIDNIGHT: 'midnight',
|
|
},
|
|
WORD_NUMBERS: {
|
|
ONE: 'one',
|
|
TWO: 'two',
|
|
THREE: 'three',
|
|
FOUR: 'four',
|
|
FIVE: 'five',
|
|
SIX: 'six',
|
|
SEVEN: 'seven',
|
|
EIGHT: 'eight',
|
|
NINE: 'nine',
|
|
TEN: 'ten',
|
|
TWELVE: 'twelve',
|
|
FIFTEEN: 'fifteen',
|
|
TWENTY: 'twenty',
|
|
THIRTY: 'thirty',
|
|
},
|
|
ORDINALS: {
|
|
FIRST: 'first',
|
|
SECOND: 'second',
|
|
THIRD: 'third',
|
|
FOURTH: 'fourth',
|
|
FIFTH: 'fifth',
|
|
},
|
|
MERIDIEM: { AM: 'am', PM: 'pm' },
|
|
HALF: 'half',
|
|
NEXT: 'next',
|
|
THIS: 'this',
|
|
AT: 'at',
|
|
IN: 'in',
|
|
OF: 'of',
|
|
AFTER: 'after',
|
|
WEEK: 'week',
|
|
DAY: 'day',
|
|
FROM_NOW: 'from now',
|
|
NEXT_YEAR: 'next year',
|
|
};
|
|
|
|
const STRUCTURAL_WORDS = [
|
|
'at',
|
|
'in',
|
|
'next',
|
|
'this',
|
|
'from',
|
|
'now',
|
|
'after',
|
|
'half',
|
|
'same',
|
|
'time',
|
|
'weekend',
|
|
'end',
|
|
'of',
|
|
'the',
|
|
'eod',
|
|
'am',
|
|
'pm',
|
|
'week',
|
|
'day',
|
|
'first',
|
|
'second',
|
|
'third',
|
|
'fourth',
|
|
'fifth',
|
|
];
|
|
|
|
const ENGLISH_VOCAB = new Set([
|
|
...Object.keys(WEEKDAY_MAP),
|
|
...Object.keys(MONTH_MAP),
|
|
...Object.keys(UNIT_MAP),
|
|
...Object.keys(WORD_NUMBER_MAP),
|
|
...Object.keys(RELATIVE_DAY_MAP),
|
|
...Object.keys(TIME_OF_DAY_MAP),
|
|
...EN_WEEKDAYS_LIST,
|
|
...EN_MONTHS_LIST,
|
|
...STRUCTURAL_WORDS,
|
|
]);
|
|
|
|
// ─── Regex for token replacement ────────────────────────────────────────────
|
|
|
|
const MONTH_NAMES = Object.keys(MONTH_MAP).join('|');
|
|
const MONTH_NAME_RE = new RegExp(`\\b(?:${MONTH_NAMES})\\b`, 'i');
|
|
const NUM_TOD_RE =
|
|
/\b(\d{1,2}(?::\d{2})?)\s+(morning|noon|afternoon|evening|night)\b/g;
|
|
const TOD_TO_MERIDIEM = {
|
|
morning: 'am',
|
|
noon: 'pm',
|
|
afternoon: 'pm',
|
|
evening: 'pm',
|
|
night: 'pm',
|
|
};
|
|
const CJK_CHAR_RE =
|
|
/[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}]/u;
|
|
|
|
// ─── Translation Cache ──────────────────────────────────────────────────────
|
|
|
|
const safeString = v => (v == null ? '' : String(v));
|
|
const MAX_PAIRS_CACHE = 20;
|
|
const pairsCache = new Map();
|
|
const CACHE_SECTIONS = [
|
|
'UNITS',
|
|
'RELATIVE',
|
|
'TIME_OF_DAY',
|
|
'WORD_NUMBERS',
|
|
'ORDINALS',
|
|
'MERIDIEM',
|
|
];
|
|
const SINGLE_KEYS = [
|
|
'HALF',
|
|
'NEXT',
|
|
'THIS',
|
|
'AT',
|
|
'IN',
|
|
'OF',
|
|
'AFTER',
|
|
'WEEK',
|
|
'DAY',
|
|
'FROM_NOW',
|
|
'NEXT_YEAR',
|
|
];
|
|
|
|
/** Create a string key from translations so we can cache results. */
|
|
const translationSignature = translations => {
|
|
if (!translations || typeof translations !== 'object') return 'none';
|
|
return [
|
|
...CACHE_SECTIONS.flatMap(section => {
|
|
const values = translations[section] || {};
|
|
return Object.keys(values)
|
|
.sort()
|
|
.map(k => `${section}.${k}:${safeString(values[k]).toLowerCase()}`);
|
|
}),
|
|
...SINGLE_KEYS.map(
|
|
k => `${k}:${safeString(translations[k]).toLowerCase()}`
|
|
),
|
|
].join('|');
|
|
};
|
|
|
|
/** Build a list of [localWord, englishWord] pairs from the translations and browser locale. */
|
|
const buildReplacementPairsUncached = (translations, locale) => {
|
|
const pairs = [];
|
|
const seen = new Set();
|
|
const t = translations || {};
|
|
|
|
const addPair = (local, en) => {
|
|
const l = sanitize(safeString(local));
|
|
const e = safeString(en).toLowerCase();
|
|
const key = `${l}\0${e}`;
|
|
if (l && e && l !== e && !seen.has(key)) {
|
|
seen.add(key);
|
|
pairs.push([l, e]);
|
|
}
|
|
};
|
|
|
|
CACHE_SECTIONS.forEach(section => {
|
|
const localSection = t[section] || {};
|
|
const enSection = EN_DEFAULTS[section] || {};
|
|
Object.keys(enSection).forEach(key => {
|
|
addPair(localSection[key], enSection[key]);
|
|
});
|
|
});
|
|
|
|
SINGLE_KEYS.forEach(key => addPair(t[key], EN_DEFAULTS[key]));
|
|
|
|
try {
|
|
const wdFmt = new Intl.DateTimeFormat(locale, { weekday: 'long' });
|
|
// Jan 1, 2024 is a Monday — aligns with EN_WEEKDAYS_LIST[0]='monday'
|
|
EN_WEEKDAYS_LIST.forEach((en, i) => {
|
|
addPair(wdFmt.format(new Date(2024, 0, i + 1)), en);
|
|
});
|
|
} catch {
|
|
/* locale not supported */
|
|
}
|
|
|
|
try {
|
|
const moFmt = new Intl.DateTimeFormat(locale, { month: 'long' });
|
|
EN_MONTHS_LIST.forEach((en, i) => {
|
|
addPair(moFmt.format(new Date(2024, i, 1)), en);
|
|
});
|
|
} catch {
|
|
/* locale not supported */
|
|
}
|
|
|
|
pairs.sort((a, b) => b[0].length - a[0].length);
|
|
return pairs;
|
|
};
|
|
|
|
/** Same as above but cached. Keeps up to 20 entries to avoid rebuilding every call. */
|
|
const buildReplacementPairs = (translations, locale) => {
|
|
const cacheKey = `${locale || ''}:${translationSignature(translations)}`;
|
|
if (pairsCache.has(cacheKey)) return pairsCache.get(cacheKey);
|
|
const pairs = buildReplacementPairsUncached(translations, locale);
|
|
if (pairsCache.size >= MAX_PAIRS_CACHE)
|
|
pairsCache.delete(pairsCache.keys().next().value);
|
|
pairsCache.set(cacheKey, pairs);
|
|
return pairs;
|
|
};
|
|
|
|
// ─── Token Replacement ──────────────────────────────────────────────────────
|
|
|
|
const escapeRegex = s => s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
|
|
/** Swap localized words for their English versions in the text. */
|
|
const substituteLocalTokens = (text, pairs) => {
|
|
let r = text;
|
|
pairs.forEach(([local, en]) => {
|
|
if (CJK_CHAR_RE.test(local)) {
|
|
const re = new RegExp(escapeRegex(local), 'g');
|
|
r = r.replace(re, ` ${en} `);
|
|
} else {
|
|
const re = new RegExp(`(?<=^|\\s)${escapeRegex(local)}(?=\\s|$)`, 'g');
|
|
r = r.replace(re, en);
|
|
}
|
|
});
|
|
return r;
|
|
};
|
|
|
|
/** Drop any words the parser wouldn't understand (keeps English words and numbers). */
|
|
const filterToEnglishVocab = text =>
|
|
normalizeDigits(text)
|
|
.replace(/(\d+)h\b/g, '$1:00')
|
|
.split(/\s+/)
|
|
.filter(w => /[\d:]/.test(w) || ENGLISH_VOCAB.has(w.toLowerCase()))
|
|
.join(' ')
|
|
.replace(/\s+/g, ' ')
|
|
.trim();
|
|
|
|
/** Move "next year" to the right spot so the parser can read it (after the month, before time). */
|
|
const repositionNextYear = text => {
|
|
if (!MONTH_NAME_RE.test(text)) return text;
|
|
let r = text.replace(/\b(?:next\s+)?year\b/i, m =>
|
|
/next/i.test(m) ? m : 'next year'
|
|
);
|
|
if (!/\bnext\s+year\b/i.test(r)) return r;
|
|
const withoutNY = r.replace(/\bnext\s+year\b/i, '').trim();
|
|
const timeRe = /(?:(?:at\s+)?\d{1,2}(?::\d{2})?\s*(?:am|pm)?)\s*$/i;
|
|
const timePart = withoutNY.match(timeRe);
|
|
if (timePart) {
|
|
const beforeTime = withoutNY.slice(0, timePart.index).trim();
|
|
r = `${beforeTime} next year ${timePart[0].trim()}`;
|
|
} else {
|
|
r = `${withoutNY} next year`;
|
|
}
|
|
return r;
|
|
};
|
|
|
|
/** Run the full translation pipeline: swap tokens, filter, fix am/pm, reposition "next year". */
|
|
const replaceTokens = (text, pairs) => {
|
|
const substituted = substituteLocalTokens(text, pairs);
|
|
const filtered = filterToEnglishVocab(substituted);
|
|
const fixed = filtered.replace(
|
|
NUM_TOD_RE,
|
|
(_, t, tod) => `${t}${TOD_TO_MERIDIEM[tod]}`
|
|
);
|
|
return stripNoise(repositionNextYear(fixed));
|
|
};
|
|
|
|
/** Convert English words back to the user's language for display. */
|
|
const reverseTokens = (text, pairs) =>
|
|
pairs.reduce(
|
|
(r, [local, en]) =>
|
|
r.replace(
|
|
new RegExp(`(?<=^|\\s)${escapeRegex(en)}(?=\\s|$)`, 'g'),
|
|
local
|
|
),
|
|
text
|
|
);
|
|
|
|
// ─── Main Suggestion Generator ──────────────────────────────────────────────
|
|
|
|
/**
|
|
* Generate snooze suggestions from what the user has typed so far.
|
|
* Works with any language if translations are provided. Returns up to 5
|
|
* unique results, each with a label, date, and unix timestamp.
|
|
*
|
|
* @param {string} text - what the user typed
|
|
* @param {Date} [referenceDate] - treat as "now" (defaults to current time)
|
|
* @param {{ translations?: object, locale?: string }} [options] - i18n config
|
|
* @returns {Array<{ label: string, date: Date, unix: number }>}
|
|
*/
|
|
export const generateDateSuggestions = (
|
|
text,
|
|
referenceDate = new Date(),
|
|
{ translations, locale } = {}
|
|
) => {
|
|
if (!text || typeof text !== 'string') return [];
|
|
const normalized = sanitize(text);
|
|
if (!normalized) return [];
|
|
|
|
const stripped = stripNoise(normalized);
|
|
const pairs =
|
|
locale && locale !== 'en'
|
|
? buildReplacementPairs(translations, locale)
|
|
: [];
|
|
|
|
// Try English parse first, then translated parse if we have locale pairs.
|
|
// This avoids the problem where a single overlapping word (e.g. "in" in German)
|
|
// would skip token translation entirely.
|
|
const directParse = parseDateFromText(stripped, referenceDate);
|
|
|
|
const translated = pairs.length ? replaceTokens(normalized, pairs) : null;
|
|
const translatedParse =
|
|
translated && translated !== stripped
|
|
? parseDateFromText(translated, referenceDate)
|
|
: null;
|
|
|
|
// Prefer direct English parse; fall back to translated parse
|
|
const useTranslated = !directParse && !!translatedParse;
|
|
const englishInput = useTranslated ? translated : stripped;
|
|
|
|
const seen = new Set();
|
|
const results = [];
|
|
|
|
const exact = directParse || translatedParse;
|
|
if (exact) {
|
|
seen.add(exact.unix);
|
|
const exactLabel =
|
|
useTranslated && pairs.length
|
|
? reverseTokens(englishInput, pairs)
|
|
: englishInput;
|
|
results.push({ label: exactLabel, query: englishInput, ...exact });
|
|
}
|
|
|
|
buildSuggestionCandidates(englishInput).some(candidate => {
|
|
if (results.length >= MAX_SUGGESTIONS) return true;
|
|
const result = parseDateFromText(candidate, referenceDate);
|
|
if (result && !seen.has(result.unix)) {
|
|
seen.add(result.unix);
|
|
const label =
|
|
useTranslated && pairs.length
|
|
? reverseTokens(candidate, pairs)
|
|
: candidate;
|
|
results.push({ label, query: candidate, ...result });
|
|
}
|
|
return false;
|
|
});
|
|
|
|
return results;
|
|
};
|