From f68d09e8ac6ad49ab43a02b7be751da273c9e2e6 Mon Sep 17 00:00:00 2001 From: Grahame Grieve Date: Sat, 21 Feb 2026 16:59:42 +1100 Subject: [PATCH 1/5] draft changes to cs-api for DB based provider --- tx/cs/cs-api.js | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/tx/cs/cs-api.js b/tx/cs/cs-api.js index ea6a050..fa1cd8d 100644 --- a/tx/cs/cs-api.js +++ b/tx/cs/cs-api.js @@ -498,14 +498,27 @@ class CodeSystemProvider { * */ async doesFilter(prop, op, value) { return false; } + /** + * @return true if the cs provider handles excludes when building filters. If true, and the value set is a clean include+exclude, + * the handleExclude will be called between getPrepContext and executeFilters + */ + handlesExcludes() { + return false; + } + + handlesOffset() { + + } /** * gets a single context in which filters will be evaluated. The application doesn't make use of this context; * it's only use is to be passed back to the CodeSystem provider so it can make use of it - if it wants * * @param {boolean} iterate true if the conceptSets that result from this will be iterated, and false if they'll be used to locate a single code + * @param {int} offset if handlesOffset() and !iterate, and if the value set is a simple one that only uses this provider, then this is the applicable offset + * @param {int} count if handlesOffset() and !iterate, and if the value set is a simple one that only uses this provider, then this is the applicable count * @returns {FilterExecutionContext} filter (or null, it no use for this) * */ - async getPrepContext(iterate) { return new FilterExecutionContext(iterate); } + async getPrepContext(iterate, offset = -1, count = -1) { return new FilterExecutionContext(iterate); } /** * executes a text search filter (whatever that means) and returns a FilterConceptSet @@ -532,7 +545,7 @@ class CodeSystemProvider { } // ? must override? /** - * Get a FilterConceptSet for a value set filter + * inform the CS provider about a filter * * throws an exception if the search filter can't be handled * @@ -543,6 +556,20 @@ class CodeSystemProvider { **/ async filter(filterContext, prop, op, value) { throw new Error("Must override"); } // well, only if any filters are actually supported + /** + * if handlesExcludes(), then inform the CS provider about an applicable set of exclude filters + * + * this might be called more than once. For each iteration, all of the filters apply + * + * the objects each have prop, op, and value. + * + * throws an exception if the search filter can't be handled + * + * @param {FilterExecutionContext} filterContext filtering context + * @param {Object[]} prop + **/ + async filterExclude(filterContext, prop, op, value) { throw new Error("Must override"); } // well, only if any filters are actually supported + /** * called once all the filters have been handled, and iteration is about to happen. * this function returns one more filters. If there were multiple filters, but only From 592656ee463497fe4346e42175afb83521227065 Mon Sep 17 00:00:00 2001 From: Grahame Grieve Date: Sat, 21 Feb 2026 23:37:49 +1100 Subject: [PATCH 2/5] use new filter functionality --- tx/cs/cs-api.js | 4 +- tx/workers/expand.js | 92 +++++++++++++++++++++++++++++++++++++------- 2 files changed, 80 insertions(+), 16 deletions(-) diff --git a/tx/cs/cs-api.js b/tx/cs/cs-api.js index fa1cd8d..bdc508c 100644 --- a/tx/cs/cs-api.js +++ b/tx/cs/cs-api.js @@ -566,9 +566,9 @@ class CodeSystemProvider { * throws an exception if the search filter can't be handled * * @param {FilterExecutionContext} filterContext filtering context - * @param {Object[]} prop + * @param {Object[]} filters **/ - async filterExclude(filterContext, prop, op, value) { throw new Error("Must override"); } // well, only if any filters are actually supported + async filterExclude(filterContext, filters) { throw new Error("Must override"); } // well, only if any filters are actually supported /** * called once all the filters have been handled, and iteration is about to happen. diff --git a/tx/workers/expand.js b/tx/workers/expand.js index b227e45..652396d 100644 --- a/tx/workers/expand.js +++ b/tx/workers/expand.js @@ -601,7 +601,7 @@ class ValueSetExpander { } } - async checkSource(cset, exp, filter, srcURL, ts) { + async checkSource(cset, exp, filter, srcURL, ts, vsInfo) { this.worker.deadCheck('checkSource'); Extensions.checkNoModifiers(cset, 'ValueSetExpander.checkSource', 'set'); let imp = false; @@ -628,6 +628,10 @@ class ValueSetExpander { if (cs == null) { // nothing } else { + if (vsInfo && vsInfo.isSimple) { + vsInfo.csDoExcludes = cs.handlesExcludes(); + vsInfo.csDoOffset = cs.handlesOffset(); + } if (cs.contentMode() !== 'complete') { if (cs.contentMode() === 'not-present') { throw new Issue('error', 'business-rule', null, null, 'The code system definition for ' + cset.system + ' has no content, so this expansion cannot be performed', 'invalid'); @@ -660,7 +664,7 @@ class ValueSetExpander { } } - async includeCodes(cset, path, vsSrc, filter, expansion, excludeInactive, notClosed) { + async includeCodes(cset, path, vsSrc, compose, filter, expansion, excludeInactive, notClosed, vsInfo) { this.worker.deadCheck('processCodes#1'); const valueSets = []; @@ -752,6 +756,7 @@ class ValueSetExpander { } const prep = await cs.getPrepContext(true); const ctxt = await cs.searchFilter(prep, filter, false); + await cs.filterExclude(prep, ) let set = await cs.executeFilters(prep); this.worker.opContext.log('iterate filters'); while (await cs.filterMore(ctxt, set)) { @@ -799,10 +804,15 @@ class ValueSetExpander { if (cset.filter) { this.worker.opContext.log('prepare filters'); const fcl = cset.filter; - const prep = await cs.getPrepContext(true); + const prep = vsInfo.csDoOffset ? await cs.getPrepContext(true, this.offset, this.count) : await cs.getPrepContext(true); if (!filter.isNull) { await cs.searchFilter(filter, prep, true); } + if (vsInfo.csDoExcludes) { + for (let exc of compose.exclude || []) { + await cs.filterExclude(prep, this.excludeFilterList(exc)); + } + } if (cs.specialEnumeration()) { Extensions.addString(expansion, "http://hl7.org/fhir/StructureDefinition/valueset-unclosed", 'The code System "' + cs.system() + " has a grammar and so has infinite members. This extension is based on " + cs.specialEnumeration()); @@ -1106,32 +1116,34 @@ class ValueSetExpander { } } - async handleCompose(source, filter, expansion, notClosed) { + async handleCompose(source, filter, expansion, notClosed, vsInfo) { this.worker.opContext.log('compose #1'); const ts = new Map(); for (const c of source.jsonObj.compose.include || []) { this.worker.deadCheck('handleCompose#2'); - await this.checkSource(c, expansion, filter, source.url, ts); + await this.checkSource(c, expansion, filter, source.url, ts, vsInfo); } for (const c of source.jsonObj.compose.exclude || []) { this.worker.deadCheck('handleCompose#3'); this.hasExclusions = true; - await this.checkSource(c, expansion, filter, source.url, ts); + await this.checkSource(c, expansion, filter, source.url, ts, null); } this.worker.opContext.log('compose #2'); - let i = 0; - for (const c of source.jsonObj.compose.exclude || []) { - this.worker.deadCheck('handleCompose#4'); - await this.excludeCodes(c, "ValueSet.compose.exclude["+i+"]", source, filter, expansion, this.excludeInactives(source), notClosed); + if (!vsInfo.csDoExcludes) { + let i = 0; + for (const c of source.jsonObj.compose.exclude || []) { + this.worker.deadCheck('handleCompose#4'); + await this.excludeCodes(c, "ValueSet.compose.exclude["+i+"]", source, source.jsonObj.compose, filter, expansion, this.excludeInactives(source), notClosed); + } } - i = 0; + let i = 0; for (const c of source.jsonObj.compose.include || []) { this.worker.deadCheck('handleCompose#5'); - await this.includeCodes(c, "ValueSet.compose.include["+i+"]", source, filter, expansion, this.excludeInactives(source), notClosed); + await this.includeCodes(c, "ValueSet.compose.include["+i+"]", source, filter, expansion, this.excludeInactives(source), notClosed, vsInfo); i++; } } @@ -1259,10 +1271,11 @@ class ValueSetExpander { let notClosed = { value : false}; + let vsInfo = this.scanValueSet(source.jsonObj.compose); try { if (source.jsonObj.compose && Extensions.checkNoModifiers(source.jsonObj.compose, 'ValueSetExpander.Expand', 'compose') && this.worker.checkNoLockedDate(source.url, source.jsonObj.compose)) { - await this.handleCompose(source, filter, exp, notClosed); + await this.handleCompose(source, filter, exp, notClosed, vsInfo); } const unused = new Set([...this.requiredSupplements].filter(s => !this.usedSupplements.has(s))); @@ -1338,7 +1351,7 @@ class ValueSetExpander { const c = list[i]; if (this.map.has(this.keyC(c))) { o++; - if (o > this.offset && (this.count < 0 || t < this.count)) { + if ((vsInfo.csDoOffset) || (o > this.offset && (this.count < 0 || t < this.count))) { t++; if (!exp.contains) { exp.contains = []; @@ -1533,6 +1546,57 @@ class ValueSetExpander { return undefined; } + /** + * we have a look at the value set compose to see what we have. + * If it's all one code system(|version), and has no value set dependencies, + * then we call it simple - this will affect how it can be handled later + * + * @param compose + * @returns {undefined} + */ + scanValueSet(compose) { + let result = { isSimple : false, hasExcludes : true, csset : new Set(), csDoExcludes : false, csDoOffset : false}; + let simple = true; + for (let inc of compose.include) { + if (!this.isSimpleInclude(inc, result.csset, false)) { + simple = false; + } + } + for (let exc of compose.exclude) { + if (!this.isSimpleInclude(exc, result.csset, true)) { + simple = false; + } + result.hasExcludes = true; + } + if (simple && result.csset.size == 1) { + result.isSimple = true; + } + return result; + } + + isSimpleInclude(inc, set, isExclude) { + set.add(inc.system+"|"+inc.version); + return (!inc.valueset || inc.valueset.length == 0) && ((inc.filter && inc.filter.length > 0) || (isExclude && inc.concept && inc.filter.concept > 0)); + } + + excludeFilterList(exc) { + const results = []; + + for (const f of exc.filter || []) { + results.push({ prop: f.property, op: f.op, value: f.value }); + } + + + if (exc.concept && exc.concept.length > 0) { + results.push({ + prop: 'code', + op: 'in', + value: exc.concept.map(c => c.code).join(',') + }); + } + + return results; + } } class ExpandWorker extends TerminologyWorker { From 6e7d39039bac740702d3cc3ac3acf29fb41f5e42 Mon Sep 17 00:00:00 2001 From: Grahame Grieve Date: Sun, 22 Feb 2026 08:37:36 +1100 Subject: [PATCH 3/5] clarifications --- tx/cs/cs-api.js | 33 ++++++++++++++++++++++++++------- tx/workers/expand.js | 4 +++- 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/tx/cs/cs-api.js b/tx/cs/cs-api.js index bdc508c..524bdd6 100644 --- a/tx/cs/cs-api.js +++ b/tx/cs/cs-api.js @@ -510,15 +510,34 @@ class CodeSystemProvider { } /** - * gets a single context in which filters will be evaluated. The application doesn't make use of this context; - * it's only use is to be passed back to the CodeSystem provider so it can make use of it - if it wants + * gets a single context in which filters will be evaluated. The server doesn't doesn't make use of this context; + * it's only use is to be passed back to the CodeSystem provider so it can make use of it to organise the filter process + * + * The function is passed several pieces of information about the use of the filters that can help it optimise the + * behaviour: + * - iterate: whether the value set is being expanded, or instead that membership is just being checked (expand vs validate-code). + * But note, though, that when iterating, only the first filter set (see executeFilters) will be iterated - the rest will + * have filterCheck called + * - excludeInactive: whether to exclude inactive codes from the results. Note that the expand worker will check this anyway, + * so it can be ignored, but it's more efficient to never return inactive codes if they're going to be ignored + * - params: a handle to the parameters passed from the client. The provider doesn't need to do anything because of these + * but it might decide how to optimise loading based on e.g languages, properties, designations, etc. The server will + * reprocess these anyway, so it can be ignored, but again, efficiency + * - offset & count: if the user is paging through the expansion, their offset and count request. Note that if the + * provider does anything with these, it needs to return true from handlesOffset() so the expand worker doesn't try + * to reprocess the offset and count. Note that there is information in the params about offset and count, but + * the provider should ignore these, as it only gets to check offset and count when the conditions are correct * * @param {boolean} iterate true if the conceptSets that result from this will be iterated, and false if they'll be used to locate a single code - * @param {int} offset if handlesOffset() and !iterate, and if the value set is a simple one that only uses this provider, then this is the applicable offset - * @param {int} count if handlesOffset() and !iterate, and if the value set is a simple one that only uses this provider, then this is the applicable count - * @returns {FilterExecutionContext} filter (or null, it no use for this) - * */ - async getPrepContext(iterate, offset = -1, count = -1) { return new FilterExecutionContext(iterate); } + * @param {TxParameters} params: information from the request that the user made, to help optimise loading + * @param {boolean} excludeInactive: whether the server will use inactive codes or not + * @param {int} offset if handlesOffset() and !iterate, and if the value set is a simple one that only uses this provider, then this is the applicable offset. -1 if not applicable + * @param {int} count if handlesOffset() and !iterate, and if the value set is a simple one that only uses this provider, then this is the applicable count. -1 if not applicable + * @returns {FilterExecutionContext} filter + * + **/ + async getPrepContext(iterate, params, excludeInactive, offset = -1, count = -1) { return new FilterExecutionContext(iterate); } + /** * executes a text search filter (whatever that means) and returns a FilterConceptSet diff --git a/tx/workers/expand.js b/tx/workers/expand.js index 652396d..d5578ab 100644 --- a/tx/workers/expand.js +++ b/tx/workers/expand.js @@ -804,7 +804,9 @@ class ValueSetExpander { if (cset.filter) { this.worker.opContext.log('prepare filters'); const fcl = cset.filter; - const prep = vsInfo.csDoOffset ? await cs.getPrepContext(true, this.offset, this.count) : await cs.getPrepContext(true); + const prep = await cs.getPrepContext(true, + this.params, excludeInactive, vsInfo.csDoOffset ? this.offset : -1, cs.handlesOffset() && vsInfo.csDoExcludes ? this.count : -1); + if (!filter.isNull) { await cs.searchFilter(filter, prep, true); } From c4d4590f160b75a3b047aefaeb15cde13a671027 Mon Sep 17 00:00:00 2001 From: Grahame Grieve Date: Sun, 22 Feb 2026 08:57:31 +1100 Subject: [PATCH 4/5] more clarifications --- tx/cs/cs-api.js | 10 +++++++++- tx/workers/expand.js | 20 ++++++++------------ 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/tx/cs/cs-api.js b/tx/cs/cs-api.js index 524bdd6..2448bad 100644 --- a/tx/cs/cs-api.js +++ b/tx/cs/cs-api.js @@ -587,7 +587,15 @@ class CodeSystemProvider { * @param {FilterExecutionContext} filterContext filtering context * @param {Object[]} filters **/ - async filterExclude(filterContext, filters) { throw new Error("Must override"); } // well, only if any filters are actually supported + async filterExcludeFilters(filterContext, filters) { throw new Error("Must override"); } // well, only if any filters are actually supported + + /** + * if handlesExcludes(), then inform the CS provider about an applicable set of excluded codes + * + * @param {FilterExecutionContext} }filterContext - filter context + * @param {String[]} code list of codes to exclude + */ + async filterExcludeConcepts(filterContext, code) { throw new Error("Must override"); } // well, only if any filters are actually supported /** * called once all the filters have been handled, and iteration is about to happen. diff --git a/tx/workers/expand.js b/tx/workers/expand.js index d5578ab..8ca3f14 100644 --- a/tx/workers/expand.js +++ b/tx/workers/expand.js @@ -812,7 +812,12 @@ class ValueSetExpander { } if (vsInfo.csDoExcludes) { for (let exc of compose.exclude || []) { - await cs.filterExclude(prep, this.excludeFilterList(exc)); + if (exc.filter) { + await cs.filterExcludeFilters(prep, this.excludeFilterList(exc)); + } + if (exc.concept) { + await cs.filterExcludeConcepts(prep, exc.concept.map(c => c.code)); + } } } @@ -1584,17 +1589,8 @@ class ValueSetExpander { excludeFilterList(exc) { const results = []; - for (const f of exc.filter || []) { - results.push({ prop: f.property, op: f.op, value: f.value }); - } - - - if (exc.concept && exc.concept.length > 0) { - results.push({ - prop: 'code', - op: 'in', - value: exc.concept.map(c => c.code).join(',') - }); + for (const f of exc.filter || []) { + results.push({ prop: f.property, op: f.op, value: f.value }); } return results; From fc82680ceebfc3cbc9abc6d5af54ab042fd9cfc4 Mon Sep 17 00:00:00 2001 From: Josh Mandel Date: Sat, 21 Feb 2026 18:19:15 -0600 Subject: [PATCH 5/5] SQLite v0 terminology providers with unified filter pipeline Add SQLite-backed code system providers for RxNorm, LOINC, and SNOMED CT that use a shared v0 schema with closure tables, FTS5 search indexes, and a unified SQL filter pipeline for both includes and excludes. Key features: - Single #buildV0FilterSql code path handles all filter types (concept hierarchy, property filters, code regex, value set membership) - Excludes reuse the same filter SQL wrapped in NOT EXISTS - Streaming pagination for large expansions (124K+ SNOMED codes) - Batch designation fetching for efficient display/property loading - SNOMED expression constraint language support via adapter - RxNorm archived concept import from RXNATOMARCHIVE - STY registered as filterable property for RxNorm - Opt-in perf counters (no-op when disabled) Integrates with Grahame's CS provider API (PR #133): - getPrepContext, filterExcludeFilters, filterExcludeConcepts - scanValueSet, handlesExcludes, handlesOffset - Unified intent path with includeConcepts + filter + exclude Also fixes method name bugs on legacy filter path (getExtensions -> extensions, getCodeStatus -> getStatus, getProperties -> properties). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- docs/open-questions.md | 199 + scripts/perf-new-paths.js | 492 +++ scripts/perf-rxnorm-codes.json | 1 + scripts/replay-sampled-terminology.js | 381 ++ scripts/sqlite-exclude-bench.js | 151 + scripts/sqlite-microbench.js | 464 +++ scripts/test-expand-cross-system.js | 1307 +++++++ scripts/test-expand-for-valueset.js | 467 +++ scripts/test-loinc-expand.js | 508 +++ scripts/test-snomed-expand.js | 543 +++ server.js | 19 + tx/cs/cs-api.js | 16 +- tx/cs/cs-sqlite-expression-adapter.js | 365 ++ tx/cs/cs-sqlite-runtime-v0.js | 3404 +++++++++++++++++ tx/cs/cs-sqlite-snomed-v0.js | 276 ++ tx/cs/cs-sqlite-v0-specializers.js | 86 + tx/importers/import-loinc-sqlite-v0.module.js | 367 ++ .../import-rxnorm-sqlite-v0.module.js | 354 ++ tx/importers/import-rxnorm.module.js | 1 + tx/importers/import-sct-sqlite-v0.module.js | 415 ++ tx/importers/sqlite-v2/README.md | 90 + tx/importers/sqlite-v2/import-loinc-v0.js | 1512 ++++++++ tx/importers/sqlite-v2/import-rxnorm-v0.js | 1343 +++++++ tx/importers/sqlite-v2/import-snomed-v0.js | 1167 ++++++ tx/importers/sqlite-v2/schema-v0.sql | 183 + tx/library.js | 25 + tx/params.js | 11 + tx/perf-counters.js | 49 + tx/tx.all-v0.yml | 7 + tx/tx.js | 5 +- tx/tx.loinc-only.yml | 5 + tx/tx.rxnorm-loinc-matched.yml | 6 + tx/tx.rxnorm-loinc-v0.yml | 6 + tx/tx.rxnorm-loinc.yml | 6 + tx/tx.rxnorm-only.yml | 5 + tx/tx.rxnorm-v0-only.yml | 5 + tx/tx.snomed-legacy-only.yml | 5 + tx/tx.snomed-v0-only.yml | 5 + tx/tx.test-lite.yml | 28 + tx/tx.upstream-baseline.yml | 7 + tx/workers/expand.js | 308 +- tx/workers/validate.js | 2 +- tx/workers/worker.js | 14 + 43 files changed, 14495 insertions(+), 115 deletions(-) create mode 100644 docs/open-questions.md create mode 100644 scripts/perf-new-paths.js create mode 100644 scripts/perf-rxnorm-codes.json create mode 100644 scripts/replay-sampled-terminology.js create mode 100644 scripts/sqlite-exclude-bench.js create mode 100644 scripts/sqlite-microbench.js create mode 100644 scripts/test-expand-cross-system.js create mode 100644 scripts/test-expand-for-valueset.js create mode 100644 scripts/test-loinc-expand.js create mode 100644 scripts/test-snomed-expand.js create mode 100644 tx/cs/cs-sqlite-expression-adapter.js create mode 100644 tx/cs/cs-sqlite-runtime-v0.js create mode 100644 tx/cs/cs-sqlite-snomed-v0.js create mode 100644 tx/cs/cs-sqlite-v0-specializers.js create mode 100644 tx/importers/import-loinc-sqlite-v0.module.js create mode 100644 tx/importers/import-rxnorm-sqlite-v0.module.js create mode 100644 tx/importers/import-sct-sqlite-v0.module.js create mode 100644 tx/importers/sqlite-v2/README.md create mode 100644 tx/importers/sqlite-v2/import-loinc-v0.js create mode 100644 tx/importers/sqlite-v2/import-rxnorm-v0.js create mode 100644 tx/importers/sqlite-v2/import-snomed-v0.js create mode 100644 tx/importers/sqlite-v2/schema-v0.sql create mode 100644 tx/perf-counters.js create mode 100644 tx/tx.all-v0.yml create mode 100644 tx/tx.loinc-only.yml create mode 100644 tx/tx.rxnorm-loinc-matched.yml create mode 100644 tx/tx.rxnorm-loinc-v0.yml create mode 100644 tx/tx.rxnorm-loinc.yml create mode 100644 tx/tx.rxnorm-only.yml create mode 100644 tx/tx.rxnorm-v0-only.yml create mode 100644 tx/tx.snomed-legacy-only.yml create mode 100644 tx/tx.snomed-v0-only.yml create mode 100644 tx/tx.test-lite.yml create mode 100644 tx/tx.upstream-baseline.yml diff --git a/docs/open-questions.md b/docs/open-questions.md new file mode 100644 index 0000000..029ba40 --- /dev/null +++ b/docs/open-questions.md @@ -0,0 +1,199 @@ +# Open Questions + +## Batch Designation Fetching + +**Status**: Implemented via `getPrepContext(iterate, params, ...)` — designation hints extracted from `params` +**Discovered**: 2026-02-21, during v0 query profiling +**Resolved**: 2026-02-21, batch pre-fetch in `executeFilters()` + +### Problem + +The worker iterates each included code individually, calling `designations(context)` per code, which runs a separate `SELECT FROM designation WHERE concept_id=?` query for each. For RxNorm SBD+SCD (27K codes), that's 27K individual queries; for SNOMED Clinical Findings (124K codes), 124K queries. This was the dominant cost in large expansions. + +The root cause: the worker's `listDisplaysFromProvider()` has a fast path (use `display` from context, no DB) and a full path (call `designations()`, per-code DB query). The fast path requires `workingLanguages` to be truthy. When no language header is sent (the default), every code takes the full path. + +### Solution + +Added designation hint extraction to `getPrepContext()` — when the worker passes `params` (TxParameters), the v0 provider reads `params.includeDesignations`, `params.workingLanguages()`, and `params.designations` to determine what designation data will be needed during iteration. The provider batch-fetches all designations in one query (batched in chunks of 500) and attaches them to the filter set. During iteration, `designations()` reads from the pre-fetched data instead of hitting the DB. + +### Results + +| Query | Before (per-code DB) | After (batch) | Speedup | +|-------|---------------------|---------------|---------| +| RxNorm SBD (9.7K codes) | ~2.2s | 0.24s | **9x** | +| RxNorm SBD+SCD (27K codes) | ~7s | 0.53s | **14x** | +| SNOMED Clinical Findings (124K codes) | ~7s | 2.4s | **3x** | + +## Worker-to-Provider Limit Passdown + +**Status**: Implemented +**Discovered**: 2026-02-21, during v0 query profiling +**Resolved**: 2026-02-21, expanded LIMIT passdown to all providers + +### Problem + +The worker's `getPrepContext(iterate, offset, count)` only passed offset/count when `vsInfo.csDoOffset` was true — which required the ValueSet to be "simple" (single code system). Cross-system ValueSets never got LIMIT, so providers fetched all matching rows even when the client asked for 10. + +### Solution + +Changed the gate from `vsInfo.csDoOffset` (only true for simple single-CS ValueSets) to `cs.handlesOffset()` (true for any provider that supports offset/count). Per-CS LIMIT is safe for cross-system ValueSets because excludes are system-scoped — an exclude on system B can't drain results from system A. The worker's overall count management handles cross-system totals. + +### Result + +Cross-system ValueSet expansion (e.g., RxNorm + LOINC include) dropped from ~1800ms to ~21ms (85x faster) with `count=10`. + +## Active vs Inactive Concepts in v0 Databases + +**Status**: Partially resolved — query-time filtering works; RxNorm re-import needed for STY + archived concepts +**Discovered**: 2026-02-21, during v0 vs upstream baseline comparison + +### What works now + +All three importers import inactive concepts with `active=0`: +- **RxNorm**: Suppressed atoms (SUPPRESS='O'|'E') → `active=0`. Non-suppressed → `active=1`. +- **SNOMED**: RF2 `active` column mapped directly. Both active and inactive concepts imported. +- **LOINC**: STATUS column mapped via `isActiveLoincStatus()`. Deprecated/Discouraged → `active=0`. + +The v0 provider combines `excludeInactive` (from `ValueSet.compose.inactive=false`) and `params.activeOnly` (from `$expand?activeOnly=true`) in `getPrepContext()`. "Exclude" wins — client can narrow but not widen. Applied as `AND c.active = 1` in SQL. + +### Remaining issues + +1. **RxNorm archived/merged concepts not imported**: Concepts that were merged (e.g., `197385→1668032`) only exist in `RXNATOMARCHIVE`. The v0 importer only reads `RXNCONSO`. An explicit code enumeration of a retired RXCUI won't find it. Fix: import from RXNATOMARCHIVE as `active=0` concepts. + +2. **RxNorm STY not registered as filterable**: The `runtime.filters.properties.byCode` only lists TTY. STY exists in `property_def` and has data in `concept_literal`, but the provider rejects it as a filter. Fix: add STY to `byCode` in the importer and re-import. + +3. **SNOMED skips inactive relationships** (import-snomed-v0.js line 556): `if (!active) continue` skips inactive RF2 relationships. This means hierarchy edges that are only present as inactive won't appear in the closure table. Whether this matters depends on whether FHIR `is-a` filters should traverse inactive edges. + +## SQLite Effort-Based Query Breaker + +**Status**: Open — needs upstream contribution or workaround +**Discovered**: 2026-02-21, during regex filter implementation + +### Problem + +A single malicious or pathological query (e.g., a catastrophic-backtracking regex, or a huge cross-join) can block the Node.js event loop indefinitely because better-sqlite3 executes synchronously. There's no way to interrupt a running query from JavaScript once it starts. + +### Root Cause + +SQLite's C API provides `sqlite3_progress_handler(db, N, callback, context)` which invokes a callback every N virtual machine instructions. If the callback returns non-zero, the statement is interrupted with `SQLITE_INTERRUPT`. This is the standard way to implement query timeouts or effort limits in SQLite. + +However, better-sqlite3 does not expose this API. The native C++ addon (`database.cpp`) never calls `sqlite3_progress_handler`. + +### Current Mitigation + +- REGEXP function has an effort counter (500k evaluation limit) that throws on exceeded +- Row iteration in `executeFilters()` could add a row-count cap + +### Proper Solution + +Contribute a PR to [better-sqlite3](https://github.com/WiseLibs/better-sqlite3) exposing `sqlite3_progress_handler` — e.g., `db.setProgressHandler(stepInterval, callback)`. This would allow: + +```js +const MAX_STEPS = 10_000_000; // ~10M VM instructions +let steps = 0; +syncDb.setProgressHandler(1000, () => { + if (++steps > MAX_STEPS) return 1; // interrupt + return 0; +}); +``` + +### Alternatives + +- Use a worker_thread for SQLite queries with a timeout on the main thread +- Fork better-sqlite3 and add `sqlite3_progress_handler` binding +- Switch to a different SQLite binding that exposes it (e.g., `sql.js` via Wasm, but that has its own perf tradeoffs) + +## CS Provider Interface Changes Beyond PR #133 + +**Status**: Documenting — these changes should be proposed upstream +**Discovered**: 2026-02-21, during v0 provider implementation + +### Context + +Grahame's PR #133 decomposed the CS provider API into a filter pipeline: `filter()` → `filterExcludeFilters()`/`filterExcludeConcepts()` → `executeFilters()` → `filterMore()`/`filterConcept()`. His updated branch also passes `params` (TxParameters) to `getPrepContext()`, giving the provider access to operation parameters. Our v0 provider builds on this but required additional interface methods. All changes are backward-compatible. + +### New Provider Methods + +#### 1. `includeConcepts(filterContext, codes: string[])` + +**Why**: PR #133 has no way for the provider to know about explicit concept lists from `compose.include[].concept`. The worker handles these in a separate per-code `locate()` loop, completely outside the filter pipeline. This means the provider can't batch-optimize concept lookups (one SQL query vs N). + +**What it does**: Records intent to include specific codes. No SQL execution — `executeFilters()` incorporates these as `WHERE code IN (...)` in the combined query. + +**Backward compatibility**: The worker checks `typeof cs.includeConcepts === 'function'` before calling. If absent, falls back to the original per-code `locate()` loop. + +### Worker Changes + +#### Unified concept + filter block + +**Why**: PR #133's worker has separate `if (cset.concept)` and `if (cset.filter)` blocks. The provider never sees the complete picture. This prevents the provider from building one optimal query. + +**What changed**: When the provider supports `includeConcepts`, the worker creates one prep context and registers all intent (concepts + filters + excludes) before calling `executeFilters()` once. It then iterates concept-list results first (preserving compose ordering + designation merging), then filter results (deduping codes already emitted from the concept list). + +**Backward compatibility**: When the provider doesn't support `includeConcepts`, the worker falls back to the original separate-block behavior. + +#### Skip `excludeCodes()` when `csDoExcludes` is true + +**Why**: The worker's `handleCompose()` iterated all excluded codes individually via `excludeCodes()` → `isExcluded()` per code — even when the provider's `handlesExcludes()` returned true and `filterExcludeFilters()`/`executeFilters()` handled excludes in SQL. For 14k+ excluded codes at ~0.05ms each, this added ~700ms of pure overhead. + +**What changed**: When `csDoExcludes` is true, `handleCompose()` skips the `excludeCodes()` iteration. If `filterExcludeFilters()` throws (unsupported exclude), the catch block runs `excludeCodes()` at that point as fallback. + +#### LIMIT passdown for cross-system ValueSets + +**Why**: PR #133's worker only passes `offset`/`count` to `getPrepContext()` when `vsInfo.csDoOffset` is true — which requires the ValueSet to be "simple" (single code system). Cross-system ValueSets (multiple includes with different systems) never got LIMIT, so every provider fetched its full result set even when the client requested `count=10`. + +**What changed**: The gate was changed from `vsInfo.csDoOffset` to `cs.handlesOffset()`. Per-CS LIMIT is safe because excludes are system-scoped (an exclude on system B can't drain results from system A), and the worker's overall count management handles cross-system totals. + +**Result**: Cross-system ValueSet expansion dropped from ~1800ms to ~21ms (85x) with `count=10`. + +#### Designation batch pre-fetch via `getPrepContext()` + +**Why**: The worker's `listDisplaysFromProvider()` calls `designations(context)` per code. For v0 providers this means one DB query per code. The provider had no way to know upfront whether designations would be needed. + +**What changed**: Grahame's updated `getPrepContext(iterate, params, excludeInactive, offset, count)` passes the full `TxParameters`. The v0 provider reads `params.includeDesignations`, `params.workingLanguages()`, and `params.designations` to determine designation needs, then `executeFilters()` batch-fetches them. + +#### Active-only filtering + +The v0 provider combines two sources of "exclude inactive" logic in `getPrepContext()`: +- `excludeInactive` (from `ValueSet.compose.inactive = false`) +- `params.activeOnly` (from `$expand?activeOnly=true`) + +Per FHIR semantics, "exclude" wins — the client can narrow but can't widen beyond the ValueSet's rule. The combined flag is applied as `AND c.active = 1` in SQL. + +### Summary Table + +| Method | PR #133 | Our addition | Why | +|--------|---------|--------------|-----| +| `getPrepContext()` | ✅ (updated signature) | Use `params` for designation hints + `excludeInactive` for active filtering | Subsumes `prepareDesignations()` | +| `filter()` | ✅ | Modified: pure intent, no SQL | Defer all execution to `executeFilters()` | +| `filterExcludeFilters()` | ✅ | Unchanged | — | +| `filterExcludeConcepts()` | ✅ | Unchanged | — | +| `executeFilters()` | ✅ | Modified: builds combined SQL from all intent + batch designation pre-fetch | One query instead of separate materialization | +| `includeConcepts()` | ❌ | New | Provider needs to see concept lists for batch SQL | + +## Expansion Size Limits (UPPER_LIMIT / INTERNAL_LIMIT) + +**Status**: Open — needs production tuning +**Changed**: 2026-02-21, raised from 100K to 1M for development/testing + +### Background + +The worker has three constants that cap expansion size: +- `UPPER_LIMIT_NO_TEXT` — max codes returned when no text filter is provided +- `UPPER_LIMIT_TEXT` — max codes returned with a text filter +- `INTERNAL_LIMIT` — hard internal cap passed to sub-expansions + +Upstream had these at 100,000. This blocks legitimate real-world ValueSets like FHIR R4 Clinical Findings (124K codes) and IPS Problems (132K codes) with a `too-costly` error. + +We raised all three to 1,000,000 for development to avoid failing on these cases. With our SQL-backed provider and batch designation pre-fetch, 124K codes expand in ~2.4s (down from ~7s before batch designations). + +### Questions + +1. **What should production limits be?** Options: + - **Keep 1M**: allows any realistic FHIR ValueSet. Memory concern: 124K codes × ~1KB each ≈ 120MB per response. + - **Make configurable**: server config or per-request parameter. The FHIR `count` parameter already handles client-side limiting. + - **Tiered**: higher limit for SQL-backed providers (fast), lower for legacy iterate-based providers (slow). + +2. **Should the limit apply to SQL row fetch or only to response size?** Currently the SQL uses `LIMIT` from the `count` parameter, but the worker's `limitCount` check applies to the accumulated `fullList` after iteration. With SQL providers, we could let SQL return all rows and only cap the response. + +3. **Memory pressure**: a 1M-code expansion would be ~1GB of JSON. Should we add a memory-based check instead of (or in addition to) a count-based one? diff --git a/scripts/perf-new-paths.js b/scripts/perf-new-paths.js new file mode 100644 index 0000000..a557711 --- /dev/null +++ b/scripts/perf-new-paths.js @@ -0,0 +1,492 @@ +#!/usr/bin/env node +/** + * Performance comparison script for incremental provider enhancements. + * + * Usage: + * node scripts/perf-new-paths.js [--branch=incremental|main|both] [--iterations=5] [--warmup=2] + * + * Measures HTTP latency for requests exercising the new code paths: + * - locateMany (inline concept[] lists in ValueSet) + * - filterPage (is-a filters with large result sets) + * - display fast path (workingLanguages only) + * - property skip (no property[] requested) + * - provider cache (repeated CodeSystem resolution) + * + * On the incremental branch, also reads /debug/perf-counters to verify path coverage. + * Runs serially (one server at a time) to avoid RAM contention. + */ + +const { execSync, spawn } = require('child_process'); +const http = require('http'); +const path = require('path'); + +// --- Configuration --- +const INCREMENTAL_DIR = '/home/jmandel/hobby/FHIRsmith-incremental'; +const MAIN_DIR = '/home/jmandel/hobby/FHIRsmith-main'; +const CONFIG = 'tx/tx.test-lite.yml'; +const PORT = 8099; +const BASE = `http://localhost:${PORT}/r4`; +const DEBUG_BASE = `http://localhost:${PORT}`; + +const args = process.argv.slice(2).reduce((m, a) => { + const [k, v] = a.replace(/^--/, '').split('='); + m[k] = v || true; + return m; +}, {}); +const BRANCH = args.branch || 'both'; +const ITERATIONS = parseInt(args.iterations) || 5; +const WARMUP = parseInt(args.warmup) || 2; + +// --- Test cases --- +// Each test targets a specific new code path at meaningful scale. + +// 1000 RxNorm ingredient codes — SQL IN(...) vs 1000 individual queries +const RXNORM_CODES = require('./perf-rxnorm-codes.json'); + +// 200 SNOMED codes for batch locate (in-memory binary search; less impact than RxNorm) +const SNOMED_CODES = [ + '404684003','38341003','73211009','386661006','84114007','233604007','49727002','25064002', + '128462008','44054006','267036007','68566005','13645005','22298006','233615004','195967001', + '43878008','271807003','56018004','40930008','22253000','70582006','64859006','10509002', + '73430006','414545008','82423001','3723001','29857009','86299006','59282003','197480006', + '65966004','409622000','126485001','301011002','62315008','39579001','41006004','26929004', + '50043002','60046008','75570004','118601006','95417003','73595000','47505003','78648007', + '309557009','399068003','84757009','271737000','161891005','3424008','13213009','44695005', + '36971009','111056004','2776000','87433001','23986001','302866003','33737001','59455009', + '239873007','26079004','55342001','127295002','185903001','312608009','225728007','386692008', + '44465007','43116000','254837009','372086001','128477000','23685000','128053003','74400008', + '36989005','118599009','128462008','44054006','267036007','68566005','13645005','22298006', + '233615004','195967001','43878008','271807003','56018004','40930008','22253000','70582006', + '64859006','10509002','73430006','414545008','82423001','3723001','29857009','86299006', + '59282003','197480006','65966004','409622000','126485001','301011002','62315008','39579001', + '41006004','26929004','50043002','60046008','75570004','118601006','95417003','73595000', + '47505003','78648007','309557009','399068003','84757009','271737000','161891005','3424008', + '13213009','44695005','36971009','111056004','2776000','87433001','23986001','302866003', + '33737001','59455009','239873007','26079004','55342001','127295002','185903001','312608009', + '225728007','386692008','44465007','43116000','254837009','372086001','128477000','23685000', + '128053003','74400008','36989005','118599009','84757009','271737000','161891005','3424008', + '13213009','44695005','36971009','111056004','2776000','87433001','23986001','302866003', + '33737001','59455009','239873007','26079004','55342001','127295002','185903001','312608009', + '225728007','386692008','44465007','43116000','254837009','372086001','128477000','23685000', + '128053003','74400008','36989005','118599009','404684003','38341003','73211009','386661006', + '84114007','233604007','49727002','25064002','128462008','44054006','267036007','68566005', +]; + +const TESTS = [ + { + name: 'locateMany-rxnorm-1k', + desc: 'RxNorm 1000 codes: 1 SQL IN(...) vs 1000 individual queries', + body: () => ({ + resourceType: 'Parameters', + parameter: [{ + name: 'valueSet', + resource: { + resourceType: 'ValueSet', + status: 'active', + compose: { + include: [{ + system: 'http://www.nlm.nih.gov/research/umls/rxnorm', + concept: RXNORM_CODES.map(c => ({ code: c })) + }] + } + } + }] + }) + }, + { + name: 'locateMany-snomed-200', + desc: 'SNOMED 200 codes: batch binary search', + body: () => ({ + resourceType: 'Parameters', + parameter: [{ + name: 'valueSet', + resource: { + resourceType: 'ValueSet', + status: 'active', + compose: { + include: [{ + system: 'http://snomed.info/sct', + concept: SNOMED_CODES.map(c => ({ code: c })) + }] + } + } + }] + }) + }, + { + name: 'filterPage-snomed-5k', + desc: 'SNOMED is-a filter, 5000 results: paged vs one-at-a-time', + body: () => ({ + resourceType: 'Parameters', + parameter: [ + { name: 'url', valueUri: 'http://hl7.org/fhir/ValueSet/device-kind' }, + { name: 'count', valueInteger: 5000 }, + ] + }) + }, + { + name: 'filterPage-snomed-uncapped', + desc: 'SNOMED is-a filter, uncapped: full iteration stress test', + body: () => ({ + resourceType: 'Parameters', + parameter: [ + { name: 'url', valueUri: 'http://hl7.org/fhir/ValueSet/device-kind' }, + ] + }) + }, + { + name: 'propSkip-large', + desc: 'Large expansion without property[]: skip N getProperties() calls', + body: () => ({ + resourceType: 'Parameters', + parameter: [ + { name: 'url', valueUri: 'http://hl7.org/fhir/ValueSet/device-kind' }, + { name: 'count', valueInteger: 2000 }, + ] + }) + }, + { + name: 'propLoad-large', + desc: 'Large expansion WITH property[]: load properties for comparison', + body: () => ({ + resourceType: 'Parameters', + parameter: [ + { name: 'url', valueUri: 'http://hl7.org/fhir/ValueSet/device-kind' }, + { name: 'count', valueInteger: 2000 }, + { name: 'property', valueString: 'inactive' }, + ] + }) + }, +]; + +// --- HTTP helpers --- +function httpPost(url, body) { + return new Promise((resolve, reject) => { + const u = new URL(url); + const data = JSON.stringify(body); + const req = http.request({ + hostname: u.hostname, port: u.port, path: u.pathname, + method: 'POST', + headers: { 'Content-Type': 'application/fhir+json', 'Content-Length': Buffer.byteLength(data) }, + timeout: 120000, + }, res => { + let body = ''; + res.on('data', d => body += d); + res.on('end', () => resolve({ status: res.statusCode, body })); + }); + req.on('error', reject); + req.on('timeout', () => { req.destroy(); reject(new Error('timeout')); }); + req.write(data); + req.end(); + }); +} + +function httpGet(url) { + return new Promise((resolve, reject) => { + const u = new URL(url); + http.get({ hostname: u.hostname, port: u.port, path: u.pathname, timeout: 10000 }, res => { + let body = ''; + res.on('data', d => body += d); + res.on('end', () => resolve({ status: res.statusCode, body })); + }).on('error', reject); + }); +} + +function httpPostSimple(url) { + return new Promise((resolve, reject) => { + const u = new URL(url); + const req = http.request({ + hostname: u.hostname, port: u.port, path: u.pathname, + method: 'POST', + headers: { 'Content-Length': 0 }, + timeout: 10000, + }, res => { + let body = ''; + res.on('data', d => body += d); + res.on('end', () => resolve({ status: res.statusCode, body })); + }); + req.on('error', reject); + req.write(''); + req.end(); + }); +} + +// --- Server lifecycle --- +function patchConfig(dir) { + const configPath = path.join(dir, 'data', 'config.json'); + const config = JSON.parse(require('fs').readFileSync(configPath, 'utf8')); + const origSource = config.modules.tx.librarySource; + config.modules.tx.librarySource = CONFIG; + config.modules.tx.host = `localhost:${PORT}`; + config.modules.tx.baseUrl = `http://localhost:${PORT}`; + require('fs').writeFileSync(configPath, JSON.stringify(config, null, 2)); + return origSource; +} + +function restoreConfig(dir, origSource) { + const configPath = path.join(dir, 'data', 'config.json'); + const config = JSON.parse(require('fs').readFileSync(configPath, 'utf8')); + config.modules.tx.librarySource = origSource; + config.modules.tx.host = config.modules.tx.host; // leave as-is + require('fs').writeFileSync(configPath, JSON.stringify(config, null, 2)); +} + +function startServer(dir) { + return new Promise((resolve, reject) => { + const child = spawn('node', ['server.js'], { + cwd: dir, + env: { ...process.env, PORT: String(PORT), NODE_ENV: 'test' }, + stdio: ['ignore', 'pipe', 'pipe'], + }); + + let output = ''; + const onData = (chunk) => { + output += chunk.toString(); + // Wait for server ready + if (output.includes('Server running on') || output.includes('Terminology module loaded')) { + child.stdout.removeListener('data', onData); + child.stderr.removeListener('data', onData); + resolve(child); + } + }; + child.stdout.on('data', onData); + child.stderr.on('data', onData); + child.on('error', reject); + child.on('exit', (code) => { + if (code) reject(new Error(`Server exited with code ${code}\n${output}`)); + }); + + // Fallback: poll health + const poll = setInterval(async () => { + try { + const r = await httpGet(`http://localhost:${PORT}/health`); + if (r.status === 200) { + clearInterval(poll); + child.stdout.removeListener('data', onData); + child.stderr.removeListener('data', onData); + resolve(child); + } + } catch { /* not ready */ } + }, 2000); + + // Timeout + setTimeout(() => { + clearInterval(poll); + reject(new Error(`Server start timeout.\nOutput: ${output}`)); + }, 300000); + }); +} + +function stopServer(child) { + return new Promise((resolve) => { + if (!child || child.killed) { resolve(); return; } + child.on('exit', () => resolve()); + child.kill('SIGTERM'); + setTimeout(() => { + if (!child.killed) child.kill('SIGKILL'); + resolve(); + }, 10000); + }); +} + +async function waitReady() { + // Give the server a moment to finish any lazy init + for (let i = 0; i < 30; i++) { + try { + const r = await httpGet(`http://localhost:${PORT}/health`); + if (r.status === 200) return; + } catch { /* not ready */ } + await new Promise(r => setTimeout(r, 1000)); + } + throw new Error('Server not ready after 30s'); +} + +// --- Measurement --- +async function runTest(test) { + const body = test.body(); + const url = `${BASE}/ValueSet/$expand`; + const t0 = performance.now(); + const result = await httpPost(url, body); + const elapsedMs = performance.now() - t0; + return { elapsedMs, status: result.status }; +} + +async function runSuite(label, dir, isIncremental) { + console.log(`\n${'='.repeat(60)}`); + console.log(` ${label}`); + console.log(` dir: ${dir}`); + console.log(`${'='.repeat(60)}\n`); + + // Patch config to use lite library + console.log('Patching config to use lite library...'); + const origSource = patchConfig(dir); + + const results = {}; + for (const test of TESTS) { + results[test.name] = { times: [], statuses: [] }; + } + let counters = null; + let child; + + try { + console.log('Starting server...'); + child = await startServer(dir); + console.log('Server started, waiting for ready...'); + await waitReady(); + console.log('Server ready.'); + + // Enable perf counters on incremental branch + if (isIncremental) { + try { + await httpPostSimple(`${DEBUG_BASE}/debug/perf-counters/enable`); + console.log('Perf counters enabled.'); + } catch (e) { + console.log('(perf counters endpoint not available)'); + } + } + + // Warm-up + console.log(`\nWarm-up (${WARMUP} iterations)...`); + for (let w = 0; w < WARMUP; w++) { + for (const test of TESTS) { + await runTest(test); + } + } + + // Reset counters after warmup + if (isIncremental) { + try { await httpPostSimple(`${DEBUG_BASE}/debug/perf-counters/reset`); } catch { /* ok */ } + } + + // Measured runs + console.log(`\nMeasuring (${ITERATIONS} iterations)...`); + for (let i = 0; i < ITERATIONS; i++) { + for (const test of TESTS) { + const r = await runTest(test); + results[test.name].times.push(r.elapsedMs); + results[test.name].statuses.push(r.status); + } + } + + // Read perf counters + if (isIncremental) { + try { + const r = await httpGet(`${DEBUG_BASE}/debug/perf-counters`); + counters = JSON.parse(r.body); + } catch { /* ok */ } + } + + await stopServer(child); + } finally { + // Ensure server is stopped even on error + if (child && !child.killed) { + try { await stopServer(child); } catch { /* ok */ } + } + // Restore original config + restoreConfig(dir, origSource); + console.log('Config restored.'); + } + + // Small delay to ensure port is released + await new Promise(r => setTimeout(r, 2000)); + + return { results, counters }; +} + +function stats(arr) { + const sorted = [...arr].sort((a, b) => a - b); + const sum = sorted.reduce((a, b) => a + b, 0); + const mean = sum / sorted.length; + const median = sorted[Math.floor(sorted.length / 2)]; + const p95 = sorted[Math.floor(sorted.length * 0.95)]; + const min = sorted[0]; + const max = sorted[sorted.length - 1]; + return { mean: +mean.toFixed(1), median: +median.toFixed(1), p95: +p95.toFixed(1), min: +min.toFixed(1), max: +max.toFixed(1) }; +} + +function printResults(label, data) { + console.log(`\n--- ${label} ---`); + console.log(`${'Test'.padEnd(30)} ${'Mean'.padStart(9)} ${'Median'.padStart(9)} ${'P95'.padStart(9)} ${'Min'.padStart(9)} ${'Max'.padStart(9)} Status`); + console.log('-'.repeat(90)); + for (const test of TESTS) { + const d = data.results[test.name]; + const s = stats(d.times); + const statusOk = d.statuses.every(x => x === 200) ? '✓ 200' : `✗ ${[...new Set(d.statuses)]}`; + console.log(`${test.name.padEnd(30)} ${(s.mean + 'ms').padStart(9)} ${(s.median + 'ms').padStart(9)} ${(s.p95 + 'ms').padStart(9)} ${(s.min + 'ms').padStart(9)} ${(s.max + 'ms').padStart(9)} ${statusOk}`); + } +} + +function printCounters(counters) { + if (!counters) { console.log('\n(no perf counters available)'); return; } + console.log('\n--- Code Path Coverage (incremental) ---'); + console.log('Counts:'); + for (const [k, v] of Object.entries(counters.counts || {}).sort()) { + console.log(` ${k}: ${v}`); + } + console.log('Timings:'); + for (const [k, v] of Object.entries(counters.timings || {}).sort()) { + console.log(` ${k}: ${v.calls} calls, ${v.totalMs}ms total, ${(v.totalMs / v.calls).toFixed(2)}ms avg`); + } +} + +function printComparison(mainData, incrData) { + console.log('\n--- Comparison (incremental vs main) ---'); + console.log(`${'Test'.padEnd(30)} ${'Main (ms)'.padStart(12)} ${'Incr (ms)'.padStart(12)} ${'Δ'.padStart(10)} ${'%'.padStart(8)}`); + console.log('-'.repeat(75)); + for (const test of TESTS) { + const mainMean = stats(mainData.results[test.name].times).mean; + const incrMean = stats(incrData.results[test.name].times).mean; + const delta = incrMean - mainMean; + const pct = mainMean > 0 ? ((delta / mainMean) * 100).toFixed(1) : 'N/A'; + const sign = delta < 0 ? '' : '+'; + console.log(`${test.name.padEnd(30)} ${(mainMean + '').padStart(12)} ${(incrMean + '').padStart(12)} ${(sign + delta.toFixed(1)).padStart(10)} ${(sign + pct + '%').padStart(8)}`); + } +} + +// --- Main --- +async function main() { + console.log('Performance comparison: incremental provider enhancements'); + console.log(`Iterations: ${ITERATIONS}, Warmup: ${WARMUP}`); + + let mainData = null; + let incrData = null; + + if (BRANCH === 'main' || BRANCH === 'both') { + mainData = await runSuite('BASELINE (upstream/main)', MAIN_DIR, false); + printResults('Baseline (upstream/main)', mainData); + } + + if (BRANCH === 'incremental' || BRANCH === 'both') { + incrData = await runSuite('INCREMENTAL (enhanced)', INCREMENTAL_DIR, true); + printResults('Incremental (enhanced)', incrData); + printCounters(incrData.counters); + } + + if (mainData && incrData) { + printComparison(mainData, incrData); + } + + // Verify code path coverage on incremental + if (incrData?.counters) { + console.log('\n--- Path Coverage Verification ---'); + const c = incrData.counters.counts || {}; + // display.fastPath requires workingLanguages to be English-only without + // implicit wildcard, which fromAcceptLanguage always adds — so it won't + // trigger via HTTP requests. Track it as informational, not required. + const expected = ['locate.batched', 'filter.paged', 'props.skipped', 'cache.hit']; + const informational = ['display.fastPath', 'display.fullPath']; + let allHit = true; + for (const name of expected) { + const hit = (c[name] || 0) > 0; + console.log(` ${hit ? '✓' : '✗'} ${name}: ${c[name] || 0}`); + if (!hit) allHit = false; + } + console.log(allHit ? '\n✓ All required code paths were exercised.' : '\n✗ Some required code paths were NOT exercised — check test cases.'); + console.log('Informational:'); + for (const name of informational) { + console.log(` ℹ ${name}: ${c[name] || 0}`); + } + } +} + +main().catch(e => { console.error(e); process.exit(1); }); diff --git a/scripts/perf-rxnorm-codes.json b/scripts/perf-rxnorm-codes.json new file mode 100644 index 0000000..f085462 --- /dev/null +++ b/scripts/perf-rxnorm-codes.json @@ -0,0 +1 @@ +["1000082", "1000492", "1000577", "1000581", "1001", "10014", "100213", "100272", "100278", "10030", "10032", "10046", "1005921", "1006258", "1006297", "1006310", "1006340", "1006371", "1006397", "1006469", "1006495", "1006506", "1006510", "1006619", "1006654", "10079", "1009", "1009342", "1009447", "1009451", "10098", "10104", "10106", "1010879", "10109", "1010915", "1010926", "1010932", "1010954", "1010965", "1010971", "1010999", "1011", "1011017", "1011021", "1011027", "1011033", "1011043", "1011049", "1011060", "1011066", "10114", "1011404", "1011412", "1011418", "1011426", "1011450", "1011627", "1011633", "1011650", "1012026", "1012134", "1012140", "1012179", "1012183", "1012190", "10122", "1012534", "1012890", "1012892", "1013913", "1013944", "1014191", "1014340", "1014360", "1014385", "1014391", "1014395", "1014399", "1014447", "1014690", "1014707", "1014711", "1014721", "1014731", "1014735", "1014739", "1014743", "1014749", "1014755", "1014762", "1014766", "1014774", "10154", "10156", "10159", "10167", "10168", "10169", "10171", "10172", "10173", "10174", "10175", "10176", "10178", "10179", "10180", "10181", "10183", "10184", "10188", "10193", "10205", "10206", "10207", "10212", "10223", "10231", "10237", "10239", "10240", "10245", "10255", "10256", "102792", "102846", "102848", "10288", "1029", "103", "10318", "10322", "10323", "10324", "10328", "10337", "10355", "10362", "10368", "1037042", "10378", "10379", "10390", "1039062", "10391", "10395", "1040004", "1040005", "1040028", "10402", "104125", "104129", "1043", "1043181", "10432", "1043631", "10437", "10438", "1044269", "1044584", "104486", "1044975", "1044977", "10450", "10454", "1045453", "10463", "10464", "1047072", "10471", "10472", "10473", "10485", "10493", "10498", "10502", "10510", "10517", "10524", "10528", "1054", "10553", "1056", "10561", "10562", "10565", "105655", "105669", "105673", "105694", "105695", "10572", "10579", "10580", "10582", "10588", "10591", "10594", "10597", "10600", "10603", "10612", "106212", "10627", "10633", "10634", "10635", "10636", "10637", "10638", "10639", "10689", "10691", "107056", "10734", "10735", "10737", "10753", "10756", "10757", "10759", "10763", "10767", "10772", "107770", "107771", "107784", "10795", "10798", "10800", "10803", "10804", "10805", "108074", "108088", "108091", "10811", "108118", "10814", "10819", "10824", "10825", "10826", "10827", "10828", "10829", "10834", "10844", "10847", "10849", "1085787", "1085955", "10864", "10865", "1086769", "10869", "10878", "1088438", "1088591", "10890", "10898", "10908", "1091", "1091643", "10917", "1091836", "1091919", "1092437", "10938", "1094060", "1094116", "1094193", "1094833", "10958", "10960", "10962", "10968", "1098", "1098118", "1098130", "1098183", "1098189", "1098200", "1098232", "1098247", "1098257", "1098274", "1098353", "1098413", "1098455", "1098619", "10991", "10995", "10996", "1099660", "10999", "1099933", "1099937", "110", "1100072", "11002", "1100699", "11017", "1102129", "1102188", "1102261", "1102270", "11055", "1110783", "1111", "1111024", "1111103", "11115", "11121", "11124", "1112973", "1112990", "11131", "1113697", "1114112", "1114195", "1114326", "1114345", "1114883", "11149", "1116238", "1116632", "1116977", "1116979", "11170", "1117083", "11194", "11196", "11198", "1119986", "11201", "11202", "11204", "11232", "11246", "11248", "11251", "11253", "11256", "11258", "11274", "11289", "11291", "11295", "11327", "113373", "113374", "11359", "113608", "11363", "11371", "11377", "11378", "113831", "114052", "11413", "11416", "114176", "114200", "114202", "114203", "11422", "11423", "114289", "11431", "114464", "114477", "1144960", "11454", "1145945", "11469", "1147220", "11473", "1147320", "11476", "1148138", "114817", "1148495", "114934", "114970", "114979", "1150096", "1151", "11516", "115238", "115243", "115264", "115552", "1156", "115698", "1157", "11636", "11645", "1169", "117055", "117466", "117896", "1182", "118886", "1191", "1191668", "119246", "1192621", "1192729", "1192968", "1192987", "1192992", "1192997", "1193003", "1193326", "119565", "119771", "1202", "120608", "12062", "121047", "121069", "121070", "121191", "121243", "12166", "1218", "1223", "1227", "1232150", "1232311", "1232405", "1232604", "1232630", "1232637", "1236136", "124150", "1242127", "1242806", "1242987", "1242999", "1243041", "124323", "1244014", "124427", "124431", "1244607", "12449", "1246101", "12473", "124848", "1248798", "1250203", "1251", "1251576", "1256", "12574", "125918", "125921", "125929", "125933", "126291", "1266", "1272", "1291", "1291301", "1291425", "1291609", "1292", "1292422", "1293254", "1293719", "1294580", "1294628", "1294630", "1294632", "1294634", "1297525", "1297527", "1297529", "1297531", "1297533", "1297535", "1297537", "1298944", "1299884", "1299981", "1300701", "1300786", "1301854", "1302966", "1303098", "1304974", "1305515", "1305516", "1305517", "1305530", "1305532", "1305536", "1305545", "1305547", "1305548", "1305549", "1305550", "1305551", "1305552", "1305553", "1305554", "1305555", "1305556", "1305559", "1305560", "1305564", "1305567", "1305574", "1305576", "1305578", "1305579", "1305581", "1305582", "1305585", "1305600", "1305601", "1305608", "1305630", "1305631", "1305633", "1305634", "1305635", "1305636", "1305638", "1305639", "1305640", "1305641", "1305642", "1305643", "1305648", "1305649", "1305650", "1305651", "1305652", "1305653", "1305654", "1305655", "1305656", "1305657", "1305658", "1305659", "1305660", "1305661", "1305662", "1305663", "1305664", "1305665", "1305669", "1305670", "1305715", "1305716", "1305719", "1305720", "1305724", "1305725", "1305727", "1305728", "1305738", "1305739", "1305743", "1305744", "1305745", "1305748", "1305749", "1305751", "1305758", "1305761", "1305762", "1305763", "1305896", "1305899", "1306057", "1306058", "1306059", "1306060", "1306061", "1306063", "1306066", "1306068", "1306094", "1306095", "1306096", "1306097", "1306098", "1306100", "1306107", "1306108", "1306111", "1306112", "1306115", "1306117", "1306118", "1306119", "1306120", "1306122", "1306124", "1306125", "1306128", "1306130", "1306131", "1306132", "1306133", "1306134", "1306135", "1306136", "1306137", "1306138", "1306144", "1306145", "1306146", "1306148", "1306149", "1306150", "1306151", "1306152", "1306153", "1306154", "1306155", "1306156", "1306157", "1306158", "1306159", "1306160", "1306161", "1306162", "1306163", "1306164", "1306165", "1306166", "1306167", "1306168", "1306169", "1306170", "1306171", "1306172", "1306173", "1306174", "1306175", "1306178", "1306179", "1306180", "1306181", "1306182", "1306183", "1306184", "1306185", "1306188", "1306189", "1306190", "1306191", "1306192", "1306193", "1306194", "1306195", "1306197", "1306198", "1306199", "1306200", "1306201", "1306203", "1306204", "1306205", "1306206", "1306207", "1306208", "1306209", "1306210", "1306211", "1306212", "1306235", "1306284", "1306286", "1306351", "1306881", "1306882", "1306883", "1306885", "1306886", "1306933", "1306935", "1306937", "1306938", "1306940", "1306941", "1306942", "1306943", "1306944", "1306945", "1306946", "1306947", "1306948", "1307035", "1307037", "1307038", "1307039", "1307040", "1307041", "1307042", "1307043", "1307044", "1307045", "1307046", "1307048", "1307049", "1307050", "1307079", "1307080", "1307081", "1307083", "1307085", "1307086", "1307087", "1307088", "1307090", "1307091", "1307092", "1307093", "1307094", "1307096", "1307097", "1307098", "1307099", "1307100", "1307101", "1307102", "1307106", "1307141", "1307142", "1307143", "1307144", "1307145", "1307148", "1307149", "1307150", "1307151", "1307152", "1307153", "1307154", "1307155", "1307156", "1307157", "1307262", "1307263", "1307298", "1307310", "1307312", "1307313", "1307314", "1307404", "1307539", "1307540", "1307541", "1307542", "1307543", "1307544", "1307545", "1307547", "1307548", "1307549", "1307550", "1307553", "1307554", "1307555", "1307556", "1307557", "1307558", "1307560", "1307561", "1307562", "1307563", "1307564", "1307565", "1307566", "1307567", "1307568", "1307569", "1307570", "1307571", "1307572", "1307573", "1307574", "1307575", "1307576", "1307577", "1307578", "1307579", "1307580", "1307581", "1307582", "1307585", "1307586", "1307587", "1307589", "1307590", "1307591", "1307592", "1307593", "1307594", "1307595", "1307596", "1307597", "1307598", "1307599", "1307600", "1307601", "1307602", "1307603", "1307604", "1307605", "1307606", "1307607", "1307609", "1307610", "1307611", "1307612", "1307613", "1307614", "1307617", "1307618", "1307619", "1307631", "1307636", "1307637", "1307638", "1307639", "1307640", "1307641", "1307642", "1307643", "1307644", "1307645", "1307646", "1307647", "1307648", "1307649", "1307653", "1307655", "1307658", "1307659", "1307660", "1307661", "1307663", "1307664", "1307665", "1307667", "1307668", "1307669", "1307670", "1307671", "1307672", "1307673", "1307674", "1307675", "1307676", "1307677", "1307678", "1307679", "1307680", "1307681", "1307682", "1307683", "1307684", "1307685", "1307686", "1307687", "1307688", "1307689", "1307690", "1307691", "1307692", "1307693", "1307694", "1307695", "1307696", "1307697", "1307698", "1307699", "1307700", "1307701", "1307702", "1307703", "1307704", "1307705", "1307706", "1307707", "1307708", "1307709", "1307710", "1307711", "1307712", "1307713", "1307714", "1307715", "1307716", "1307717", "1307718", "1307719", "1307720", "1307721", "1307722", "1307723", "1307724", "1307726", "1307727", "1307729", "1307730", "1307731", "1307732", "1307733", "1307735", "1307736", "1307737", "1307738", "1307739", "1307740", "1307741", "1307742", "1307743", "1307744", "1307745", "1307746", "1307747", "1307748", "1307749", "1307750", "1307752", "1307753", "1307754", "1307755", "1307756", "1307757", "1307758", "1307759", "1307760", "1307761", "1307762", "1307763", "1307764", "1307766", "1307767", "1307768", "1307769", "1307770", "1307772", "1307773", "1307775", "1307777", "1307778", "1307779", "1307780", "1307781", "1307782", "1307783", "1307784", "1307787", "1307788", "1307789", "1307790", "1307791", "1307792", "1307794", "1307797", "1307800", "1307801", "1307802", "1307803", "1307804", "1307805", "1307806", "1307807", "1307808", "1307809", "1307810", "1307812", "1307814", "1307815", "1307816", "1307817", "1307818", "1307819", "1307820", "1307822", "1307824", "1307825", "1307826", "1307827", "1307828", "1307829", "1307830", "1307831", "1307832", "1307833"] \ No newline at end of file diff --git a/scripts/replay-sampled-terminology.js b/scripts/replay-sampled-terminology.js new file mode 100644 index 0000000..45879c5 --- /dev/null +++ b/scripts/replay-sampled-terminology.js @@ -0,0 +1,381 @@ +#!/usr/bin/env node +'use strict'; + +const fs = require('fs'); +const path = require('path'); +const express = require('express'); +const TXModule = require('../tx/tx.js'); +const ServerStats = require('../stats'); + +function parseArgs(argv) { + const out = { + input: 'captured/snomed.ndjson', + out: 'captured/snomed-replay-intended-results.json', + port: 9400, + endpointPath: '/r4', + librarySource: 'tx/tx.snomed-v0.yml', + intendedSource: 'prod', + compare: null, + }; + + for (let i = 0; i < argv.length; i++) { + const a = argv[i]; + if (a === '--input' && argv[i + 1]) out.input = argv[++i]; + else if (a === '--out' && argv[i + 1]) out.out = argv[++i]; + else if (a === '--port' && argv[i + 1]) out.port = Number(argv[++i]); + else if (a === '--path' && argv[i + 1]) out.endpointPath = argv[++i]; + else if (a === '--library' && argv[i + 1]) out.librarySource = argv[++i]; + else if (a === '--intended-source' && argv[i + 1]) out.intendedSource = argv[++i]; + else if (a === '--compare' && argv[i + 1]) out.compare = argv[++i]; + } + + if (!['prod', 'dev'].includes(out.intendedSource)) { + throw new Error(`--intended-source must be prod|dev (got "${out.intendedSource}")`); + } + if (!Number.isFinite(out.port) || out.port <= 0) { + throw new Error(`Invalid --port: ${out.port}`); + } + + return out; +} + +function readNdjson(filePath) { + const abs = path.resolve(filePath); + if (!fs.existsSync(abs)) { + throw new Error(`Input NDJSON not found: ${abs}`); + } + const lines = fs.readFileSync(abs, 'utf8').split('\n').filter(Boolean); + return lines.map((line, i) => { + try { + return JSON.parse(line); + } catch (error) { + throw new Error(`Invalid JSON at ${abs}:${i + 1} (${error.message})`); + } + }); +} + +function parseResourceType(body) { + if (!body || typeof body !== 'object' || Array.isArray(body)) return null; + return typeof body.resourceType === 'string' ? body.resourceType : null; +} + +function isJsonObject(value) { + return !!value && typeof value === 'object' && !Array.isArray(value); +} + +function intendedStatusFromSample(sample, intendedSource) { + if (intendedSource === 'dev') return sample.devStatus; + return sample.prodStatus; +} + +async function startServer(port, endpointPath, librarySource) { + const app = express(); + app.use(express.raw({ type: 'application/fhir+json', limit: '50mb' })); + app.use(express.raw({ type: 'application/fhir+xml', limit: '50mb' })); + app.use(express.json({ limit: '50mb' })); + + const config = { + enabled: true, + consoleErrors: false, + host: 'local.host', + librarySource, + endpoints: [{ path: endpointPath, fhirVersion: endpointPath === '/r5' ? '5.0' : '4.0', context: null }], + }; + + const stats = new ServerStats(); + const txModule = new TXModule(stats); + await txModule.initialize(config, app); + + const server = await new Promise((resolve, reject) => { + const s = app.listen(port, (err) => (err ? reject(err) : resolve(s))); + }); + + return { app, server, txModule, stats }; +} + +async function stopServer(ctx) { + if (ctx.txModule && typeof ctx.txModule.shutdown === 'function') { + await ctx.txModule.shutdown(); + } + if (ctx.stats && typeof ctx.stats.finishStats === 'function') { + ctx.stats.finishStats(); + } + await new Promise((resolve) => { + ctx.server.closeAllConnections?.(); + ctx.server.close(() => resolve()); + }); +} + +function summarizeResults(results) { + const byActual = {}; + const byIntendedPair = {}; + const topMismatches = {}; + + let intendedPass = 0; + let intendedFail = 0; + let prodMatch = 0; + let devMatch = 0; + let noActual = 0; + let postTotal = 0; + let postWithBody = 0; + let postMissingBody = 0; + let totalDuration = 0; + let maxDuration = 0; + + for (const r of results) { + const statusKey = r.actualStatus == null ? 'ERR' : String(r.actualStatus); + byActual[statusKey] = (byActual[statusKey] || 0) + 1; + + const pair = `${r.intendedStatus}->${statusKey}`; + byIntendedPair[pair] = (byIntendedPair[pair] || 0) + 1; + + if (r.statusMatch.intended === true) intendedPass += 1; + else intendedFail += 1; + + if (r.statusMatch.prod === true) prodMatch += 1; + if (r.statusMatch.dev === true) devMatch += 1; + if (r.actualStatus == null) noActual += 1; + + if (r.method === 'POST') { + postTotal += 1; + if (r.hadBody) postWithBody += 1; + else postMissingBody += 1; + } + + totalDuration += r.durationMs; + if (r.durationMs > maxDuration) maxDuration = r.durationMs; + + if (r.statusMatch.intended !== true) { + const sig = r.signature || `${r.method} ${r.url}`; + topMismatches[sig] = (topMismatches[sig] || 0) + 1; + } + } + + const topPairs = Object.entries(byIntendedPair).sort((a, b) => b[1] - a[1]).slice(0, 12); + const topFailSigs = Object.entries(topMismatches).sort((a, b) => b[1] - a[1]).slice(0, 12); + + return { + total: results.length, + intendedPass, + intendedFail, + prodMatch, + devMatch, + noActual, + postTotal, + postWithBody, + postMissingBody, + avgDurationMs: results.length ? Math.round(totalDuration / results.length) : 0, + maxDurationMs: maxDuration, + byActual, + topPairs, + topFailSigs, + }; +} + +function actualStatusFromPrior(record) { + if (typeof record?.actualStatus === 'number') return record.actualStatus; + if (typeof record?.actual === 'number') return record.actual; + return null; +} + +function compareAgainstPrior(currentResults, priorResults, sampleById) { + const priorById = new Map(); + for (const r of priorResults || []) { + if (r && r.id) priorById.set(r.id, r); + } + + const classifications = []; + const summary = { + compared: 0, + noPrior: 0, + improved: 0, + regressed: 0, + unchangedPass: 0, + unchangedFail: 0, + changedStatus: 0, + }; + + for (const cur of currentResults) { + const sample = sampleById.get(cur.id); + const prior = priorById.get(cur.id); + if (!sample || !prior) { + summary.noPrior += 1; + continue; + } + + const priorActual = actualStatusFromPrior(prior); + if (priorActual == null) { + summary.noPrior += 1; + continue; + } + + const priorIntendedMatch = priorActual === cur.intendedStatus; + const currentIntendedMatch = cur.statusMatch.intended === true; + let classification = 'unchanged-fail'; + + if (!priorIntendedMatch && currentIntendedMatch) classification = 'improved'; + else if (priorIntendedMatch && !currentIntendedMatch) classification = 'regressed'; + else if (priorIntendedMatch && currentIntendedMatch) classification = 'unchanged-pass'; + + summary.compared += 1; + if (classification === 'improved') summary.improved += 1; + else if (classification === 'regressed') summary.regressed += 1; + else if (classification === 'unchanged-pass') summary.unchangedPass += 1; + else summary.unchangedFail += 1; + + if (priorActual !== cur.actualStatus) summary.changedStatus += 1; + + classifications.push({ + id: cur.id, + url: cur.url, + signature: cur.signature, + intendedStatus: cur.intendedStatus, + priorActualStatus: priorActual, + currentActualStatus: cur.actualStatus, + priorIntendedMatch, + currentIntendedMatch, + classification, + }); + } + + const topChanged = classifications + .filter((c) => c.priorActualStatus !== c.currentActualStatus) + .slice(0, 25); + const regressed = classifications.filter((c) => c.classification === 'regressed').slice(0, 25); + const improved = classifications.filter((c) => c.classification === 'improved').slice(0, 25); + + return { summary, improved, regressed, topChanged }; +} + +async function main() { + const args = parseArgs(process.argv.slice(2)); + const samples = readNdjson(args.input); + const sampleById = new Map(samples.map((s) => [s.id, s])); + + const outAbs = path.resolve(args.out); + fs.mkdirSync(path.dirname(outAbs), { recursive: true }); + + const serverCtx = await startServer(args.port, args.endpointPath, args.librarySource); + const results = []; + + try { + for (const sample of samples) { + const started = Date.now(); + const isPost = String(sample.method || '').toUpperCase() === 'POST'; + const body = isJsonObject(sample.requestBody) ? sample.requestBody : null; + const hadBody = !!body; + + let actualStatus = null; + let responseResourceType = null; + let responseBytes = 0; + let error = null; + + try { + const req = { + method: sample.method || 'GET', + headers: { accept: 'application/fhir+json, application/json' }, + }; + if (hadBody) { + req.headers['content-type'] = 'application/fhir+json'; + req.body = JSON.stringify(body); + } + + const resp = await fetch(`http://localhost:${args.port}${sample.url}`, req); + actualStatus = resp.status; + const text = await resp.text(); + responseBytes = text ? Buffer.byteLength(text, 'utf8') : 0; + if (text) { + try { + responseResourceType = parseResourceType(JSON.parse(text)); + } catch (_error) { + responseResourceType = null; + } + } + } catch (e) { + error = String(e?.message || e); + } + + const intendedStatus = intendedStatusFromSample(sample, args.intendedSource); + const prodStatus = sample.prodStatus; + const devStatus = sample.devStatus; + + results.push({ + id: sample.id, + ts: sample.ts, + method: sample.method, + url: sample.url, + signature: sample.signature, + prodStatus, + devStatus, + intendedSource: args.intendedSource, + intendedStatus, + actualStatus, + error, + statusMatch: { + intended: typeof actualStatus === 'number' && typeof intendedStatus === 'number' ? actualStatus === intendedStatus : false, + prod: typeof actualStatus === 'number' && typeof prodStatus === 'number' ? actualStatus === prodStatus : false, + dev: typeof actualStatus === 'number' && typeof devStatus === 'number' ? actualStatus === devStatus : false, + }, + hadBody, + requestBodyMissing: !!sample.requestBodyMissing, + requestBodyParseError: !!sample.requestBodyParseError, + durationMs: Date.now() - started, + responseBytes, + responseResourceType, + }); + } + } finally { + await stopServer(serverCtx); + } + + const overall = summarizeResults(results); + const r4 = summarizeResults(results.filter((r) => String(r.url).startsWith('/r4/'))); + const r5 = summarizeResults(results.filter((r) => String(r.url).startsWith('/r5/'))); + + let comparison = null; + if (args.compare) { + const compareAbs = path.resolve(args.compare); + const priorJson = JSON.parse(fs.readFileSync(compareAbs, 'utf8')); + const priorResults = Array.isArray(priorJson.results) ? priorJson.results : []; + comparison = { + against: compareAbs, + ...compareAgainstPrior(results, priorResults, sampleById), + }; + } + + const payload = { + generatedAt: new Date().toISOString(), + input: path.resolve(args.input), + port: args.port, + endpointPath: args.endpointPath, + librarySource: args.librarySource, + intendedSource: args.intendedSource, + overall, + r4, + r5, + comparison, + results, + }; + + fs.writeFileSync(outAbs, JSON.stringify(payload, null, 2)); + + const cliSummary = { + out: outAbs, + intendedSource: args.intendedSource, + overall: { + total: overall.total, + intendedPass: overall.intendedPass, + intendedFail: overall.intendedFail, + prodMatch: overall.prodMatch, + devMatch: overall.devMatch, + }, + comparison: comparison ? comparison.summary : null, + }; + console.log(JSON.stringify(cliSummary, null, 2)); +} + +main().catch((error) => { + console.error(error); + process.exit(1); +}); + diff --git a/scripts/sqlite-exclude-bench.js b/scripts/sqlite-exclude-bench.js new file mode 100644 index 0000000..54d8dde --- /dev/null +++ b/scripts/sqlite-exclude-bench.js @@ -0,0 +1,151 @@ +#!/usr/bin/env node +'use strict'; + +const path = require('path'); +const Database = require('better-sqlite3'); + +const DB_PATH = process.argv[2] || path.join(__dirname, '..', 'data', 'terminology-cache', 'rxnorm_02032025-a.db'); +const db = new Database(DB_PATH, { readonly: true }); + +// Get real exclude codes +const all50 = db.prepare("SELECT RXCUI FROM rxnconso WHERE SAB='RXNORM' AND TTY='SBD' LIMIT 50").all().map(r => r.RXCUI); +const all500 = db.prepare("SELECT RXCUI FROM rxnconso WHERE SAB='RXNORM' AND TTY='SBD' LIMIT 500").all().map(r => r.RXCUI); +const all2000 = db.prepare("SELECT RXCUI FROM rxnconso WHERE SAB='RXNORM' AND TTY='SBD' LIMIT 2000").all().map(r => r.RXCUI); + +function bench(name, fn, n = 7) { + fn(); fn(); // warmup + const times = []; + let result; + for (let i = 0; i < n; i++) { const t = performance.now(); result = fn(); times.push(performance.now() - t); } + times.sort((a, b) => a - b); + const rows = typeof result === 'number' ? result : '?'; + console.log(` ${name}: ${times[3].toFixed(3)}ms median [${times[0].toFixed(3)}–${times[6].toFixed(3)}] (${rows} rows)`); +} + +const BASE = "SELECT RXCUI, STR, SUPPRESS FROM rxnconso WHERE SAB='RXNORM' AND TTY='SBD' ORDER BY RXCUI"; + +// --- Query plans --- +console.log('=== Query plans ==='); +const plans = [ + ["Baseline", BASE], + ["NOT IN literal", "SELECT RXCUI, STR, SUPPRESS FROM rxnconso WHERE SAB='RXNORM' AND TTY='SBD' AND RXCUI NOT IN ('12345') ORDER BY RXCUI"], + ["NOT IN subquery", "SELECT RXCUI, STR, SUPPRESS FROM rxnconso WHERE SAB='RXNORM' AND TTY='SBD' AND RXCUI NOT IN (SELECT RXCUI FROM rxnconso WHERE SAB='RXNORM' AND TTY='IN') ORDER BY RXCUI"], + ["NOT EXISTS", "SELECT r.RXCUI, r.STR, r.SUPPRESS FROM rxnconso r WHERE r.SAB='RXNORM' AND r.TTY='SBD' AND NOT EXISTS (SELECT 1 FROM rxnconso x WHERE x.RXCUI=r.RXCUI AND x.SAB='RXNORM' AND x.TTY='IN') ORDER BY r.RXCUI"], + ["EXCEPT", "SELECT RXCUI, STR, SUPPRESS FROM rxnconso WHERE SAB='RXNORM' AND TTY='SBD' EXCEPT SELECT RXCUI, STR, SUPPRESS FROM rxnconso WHERE SAB='RXNORM' AND TTY='IN'"], +]; +for (const [label, sql] of plans) { + console.log(`\n ${label}:`); + const plan = db.prepare('EXPLAIN QUERY PLAN ' + sql).all(); + plan.forEach(r => console.log(` ${r.detail}`)); +} + +// --- 50 code excludes --- +console.log('\n\n=== Exclude: 50 codes ==='); +const lit50 = all50.map(c => `'${c}'`).join(','); +const set50 = new Set(all50); + +bench('NOT IN literal(50)', () => { + let c = 0; + for (const r of db.prepare(`SELECT RXCUI, STR, SUPPRESS FROM rxnconso WHERE SAB='RXNORM' AND TTY='SBD' AND RXCUI NOT IN (${lit50}) ORDER BY RXCUI`).iterate()) c++; + return c; +}); + +bench('JS Set.has(50)', () => { + let c = 0; + for (const r of db.prepare(BASE).iterate()) { if (!set50.has(r.RXCUI)) c++; } + return c; +}); + +bench('NOT IN subquery(IN TTY ~14k)', () => { + let c = 0; + for (const r of db.prepare("SELECT RXCUI, STR, SUPPRESS FROM rxnconso WHERE SAB='RXNORM' AND TTY='SBD' AND RXCUI NOT IN (SELECT RXCUI FROM rxnconso WHERE SAB='RXNORM' AND TTY='IN') ORDER BY RXCUI").iterate()) c++; + return c; +}); + +bench('NOT EXISTS(IN TTY ~14k)', () => { + let c = 0; + for (const r of db.prepare("SELECT r.RXCUI, r.STR, r.SUPPRESS FROM rxnconso r WHERE r.SAB='RXNORM' AND r.TTY='SBD' AND NOT EXISTS (SELECT 1 FROM rxnconso x WHERE x.RXCUI=r.RXCUI AND x.SAB='RXNORM' AND x.TTY='IN') ORDER BY r.RXCUI").iterate()) c++; + return c; +}); + +bench('EXCEPT(IN TTY)', () => { + let c = 0; + for (const r of db.prepare("SELECT RXCUI, STR, SUPPRESS FROM rxnconso WHERE SAB='RXNORM' AND TTY='SBD' EXCEPT SELECT RXCUI, STR, SUPPRESS FROM rxnconso WHERE SAB='RXNORM' AND TTY='IN'").iterate()) c++; + return c; +}); + +// --- 500 code excludes --- +console.log('\n=== Exclude: 500 codes ==='); +const lit500 = all500.map(c => `'${c}'`).join(','); +const set500 = new Set(all500); + +bench('NOT IN literal(500)', () => { + let c = 0; + for (const r of db.prepare(`SELECT RXCUI, STR, SUPPRESS FROM rxnconso WHERE SAB='RXNORM' AND TTY='SBD' AND RXCUI NOT IN (${lit500}) ORDER BY RXCUI`).iterate()) c++; + return c; +}); + +bench('JS Set.has(500)', () => { + let c = 0; + for (const r of db.prepare(BASE).iterate()) { if (!set500.has(r.RXCUI)) c++; } + return c; +}); + +// --- 2000 code excludes --- +console.log('\n=== Exclude: 2000 codes ==='); +const lit2000 = all2000.map(c => `'${c}'`).join(','); +const set2000 = new Set(all2000); + +bench('NOT IN literal(2000)', () => { + let c = 0; + for (const r of db.prepare(`SELECT RXCUI, STR, SUPPRESS FROM rxnconso WHERE SAB='RXNORM' AND TTY='SBD' AND RXCUI NOT IN (${lit2000}) ORDER BY RXCUI`).iterate()) c++; + return c; +}); + +bench('JS Set.has(2000)', () => { + let c = 0; + for (const r of db.prepare(BASE).iterate()) { if (!set2000.has(r.RXCUI)) c++; } + return c; +}); + +// --- Temp table approach --- +console.log('\n=== Temp table exclude (2000 codes) ==='); + +// Need a writable connection for temp tables +const dbRW = new Database(DB_PATH); + +dbRW.exec('CREATE TEMP TABLE exclude_codes (rxcui TEXT PRIMARY KEY)'); + +bench('Temp table + NOT IN subquery', () => { + dbRW.exec('DELETE FROM exclude_codes'); + const ins = dbRW.prepare('INSERT INTO exclude_codes VALUES (?)'); + const tx = dbRW.transaction(() => { for (const c of all2000) ins.run(c); }); + tx(); + let c = 0; + for (const r of dbRW.prepare("SELECT RXCUI, STR, SUPPRESS FROM rxnconso WHERE SAB='RXNORM' AND TTY='SBD' AND RXCUI NOT IN (SELECT rxcui FROM exclude_codes) ORDER BY RXCUI").iterate()) c++; + return c; +}); + +bench('Temp table + LEFT JOIN IS NULL', () => { + dbRW.exec('DELETE FROM exclude_codes'); + const ins = dbRW.prepare('INSERT INTO exclude_codes VALUES (?)'); + const tx = dbRW.transaction(() => { for (const c of all2000) ins.run(c); }); + tx(); + let c = 0; + for (const r of dbRW.prepare("SELECT r.RXCUI, r.STR, r.SUPPRESS FROM rxnconso r LEFT JOIN exclude_codes x ON r.RXCUI=x.rxcui WHERE r.SAB='RXNORM' AND r.TTY='SBD' AND x.rxcui IS NULL ORDER BY r.RXCUI").iterate()) c++; + return c; +}); + +bench('Temp table + NOT EXISTS', () => { + dbRW.exec('DELETE FROM exclude_codes'); + const ins = dbRW.prepare('INSERT INTO exclude_codes VALUES (?)'); + const tx = dbRW.transaction(() => { for (const c of all2000) ins.run(c); }); + tx(); + let c = 0; + for (const r of dbRW.prepare("SELECT r.RXCUI, r.STR, r.SUPPRESS FROM rxnconso r WHERE r.SAB='RXNORM' AND r.TTY='SBD' AND NOT EXISTS (SELECT 1 FROM exclude_codes x WHERE x.rxcui=r.RXCUI) ORDER BY r.RXCUI").iterate()) c++; + return c; +}); + +dbRW.close(); +db.close(); +console.log('\n=== Done ==='); diff --git a/scripts/sqlite-microbench.js b/scripts/sqlite-microbench.js new file mode 100644 index 0000000..720cc57 --- /dev/null +++ b/scripts/sqlite-microbench.js @@ -0,0 +1,464 @@ +#!/usr/bin/env node +'use strict'; + +/** + * SQLite microbenchmarks for RxNorm provider interface design. + * Run: node scripts/sqlite-microbench.js [path-to-rxnorm.db] + * + * Tests both async `sqlite3` and sync `better-sqlite3` to inform + * whether lazy cursors, LIMIT/OFFSET, UNION, etc. are worth using + * in the expandForValueSet provider interface. + */ + +const path = require('path'); + +// --- Configuration --- +const DB_PATH = process.argv[2] || path.join(__dirname, '..', 'data', 'terminology-cache', 'rxnorm_02032025-a.db'); +const WARMUP = 1; +const ITERATIONS = 5; + +// --- Helpers --- +function median(arr) { + const s = [...arr].sort((a, b) => a - b); + const mid = Math.floor(s.length / 2); + return s.length % 2 ? s[mid] : (s[mid - 1] + s[mid]) / 2; +} + +function stats(arr) { + const med = median(arr); + const min = Math.min(...arr); + const max = Math.max(...arr); + return { median: med.toFixed(2), min: min.toFixed(2), max: max.toFixed(2) }; +} + +async function bench(name, fn, iterations = ITERATIONS, warmup = WARMUP) { + // Warmup + for (let i = 0; i < warmup; i++) await fn(); + + const times = []; + let lastResult; + for (let i = 0; i < iterations; i++) { + const t0 = performance.now(); + lastResult = await fn(); + times.push(performance.now() - t0); + } + const s = stats(times); + const extra = lastResult && typeof lastResult === 'object' && lastResult._rows != null + ? ` (${lastResult._rows} rows)` : ''; + console.log(` ${name}: ${s.median}ms median [${s.min}–${s.max}]${extra}`); + return { name, ...s, ...lastResult }; +} + +// --- Load both sqlite packages --- +let sqlite3Async, BetterSqlite3; +try { sqlite3Async = require('sqlite3').verbose(); } catch (e) { + console.error('sqlite3 package not available'); process.exit(1); +} +try { BetterSqlite3 = require('better-sqlite3'); } catch (e) { + console.warn('better-sqlite3 not available — skipping sync benchmarks'); +} + +// Async sqlite3 helper: promisified db.all +function dbAll(db, sql, params = []) { + return new Promise((resolve, reject) => { + db.all(sql, params, (err, rows) => err ? reject(err) : resolve(rows)); + }); +} + +// Async sqlite3 helper: db.each with optional early abort +function dbEach(db, sql, params, rowCb) { + return new Promise((resolve, reject) => { + let count = 0; + db.each(sql, params, (err, row) => { + if (err) { reject(err); return; } + count++; + rowCb(row, count); + }, (err, totalRows) => { + if (err) reject(err); + else resolve({ count, totalRows }); + }); + }); +} + +// Open async db +function openAsync(dbPath) { + return new Promise((resolve, reject) => { + const db = new sqlite3Async.Database(dbPath, sqlite3Async.OPEN_READONLY, (err) => { + if (err) reject(err); else resolve(db); + }); + }); +} + +function closeAsync(db) { + return new Promise((resolve) => db.close(resolve)); +} + +// Base query +const BASE_SQL = `SELECT RXCUI, STR, SUPPRESS FROM rxnconso WHERE SAB='RXNORM' AND TTY<>'SY'`; +const SBD_SQL = `SELECT RXCUI, STR, SUPPRESS FROM rxnconso WHERE SAB='RXNORM' AND TTY='SBD'`; + +// --- Benchmark functions --- + +async function runB1(db) { + console.log('\n=== B1: db.all() full materialization (SBD ~23k rows) ==='); + await bench('db.all(SBD)', async () => { + const rows = await dbAll(db, SBD_SQL); + return { _rows: rows.length }; + }); +} + +async function runB2(db) { + console.log('\n=== B2: db.all() with LIMIT/OFFSET ==='); + for (const offset of [0, 100, 1000, 10000]) { + await bench(`LIMIT 100 OFFSET ${offset}`, async () => { + const rows = await dbAll(db, SBD_SQL + ` ORDER BY RXCUI LIMIT 100 OFFSET ${offset}`); + return { _rows: rows.length }; + }); + } +} + +async function runB3(db) { + console.log('\n=== B3: db.each() row-at-a-time (SBD) ==='); + + await bench('db.each() all rows', async () => { + let count = 0; + await dbEach(db, SBD_SQL, [], () => { count++; }); + return { _rows: count }; + }); + + // db.each with processing only first N rows (can't truly abort in sqlite3) + for (const n of [100, 1100]) { + await bench(`db.each() process first ${n}`, async () => { + let processed = 0; + await dbEach(db, SBD_SQL, [], (row, count) => { + if (processed < n) { + // Simulate processing + const obj = { code: row.RXCUI, display: row.STR, suppress: row.SUPPRESS }; + processed++; + } + }); + return { _rows: processed }; + }); + } +} + +async function runB4(bdb) { + console.log('\n=== B4: better-sqlite3 stmt.iterate() — lazy cursor ==='); + const stmt = bdb.prepare(SBD_SQL); + + await bench('iterate() all rows', () => { + let count = 0; + for (const row of stmt.iterate()) { count++; } + return { _rows: count }; + }); + + for (const n of [100, 1100, 5000]) { + await bench(`iterate() break after ${n}`, () => { + let count = 0; + for (const row of stmt.iterate()) { + count++; + if (count >= n) break; + } + return { _rows: count }; + }); + } +} + +async function runB5(bdb) { + console.log('\n=== B5: better-sqlite3 all() vs iterate() — full results ==='); + const stmt = bdb.prepare(SBD_SQL); + + await bench('stmt.all()', () => { + const rows = stmt.all(); + return { _rows: rows.length }; + }); + + await bench('stmt.iterate() → array', () => { + const rows = []; + for (const row of stmt.iterate()) { rows.push(row); } + return { _rows: rows.length }; + }); + + await bench('stmt.iterate() count only', () => { + let count = 0; + for (const row of stmt.iterate()) { count++; } + return { _rows: count }; + }); +} + +async function runB6(bdb) { + console.log('\n=== B6: better-sqlite3 prepared statement reuse ==='); + const stmt = bdb.prepare( + `SELECT RXCUI, STR, SUPPRESS FROM rxnconso WHERE SAB='RXNORM' AND TTY=?` + ); + + // First call includes any implicit caching/compilation + const ttys = ['SBD', 'SCD', 'IN', 'SBDC', 'SCDC']; + + await bench('prepare+iterate SBD (first)', () => { + const s = bdb.prepare(`SELECT RXCUI, STR, SUPPRESS FROM rxnconso WHERE SAB='RXNORM' AND TTY=?`); + let count = 0; + for (const row of s.iterate('SBD')) count++; + return { _rows: count }; + }); + + await bench('reuse stmt across 5 TTYs', () => { + let total = 0; + for (const tty of ttys) { + for (const row of stmt.iterate(tty)) total++; + } + return { _rows: total }; + }); + + // Compare: iterate same TTY 5 times (reuse amortization) + await bench('reuse stmt, SBD x5', () => { + let total = 0; + for (let i = 0; i < 5; i++) { + for (const row of stmt.iterate('SBD')) total++; + } + return { _rows: total }; + }); +} + +async function runB7(bdb) { + console.log('\n=== B7: UNION vs separate queries vs IN() ==='); + + await bench('UNION ALL (SBD+SCD)', () => { + const stmt = bdb.prepare( + `SELECT RXCUI, STR, SUPPRESS FROM rxnconso WHERE SAB='RXNORM' AND TTY='SBD' + UNION ALL + SELECT RXCUI, STR, SUPPRESS FROM rxnconso WHERE SAB='RXNORM' AND TTY='SCD'` + ); + let count = 0; + for (const row of stmt.iterate()) count++; + return { _rows: count }; + }); + + await bench('Two separate queries', () => { + const s1 = bdb.prepare(`SELECT RXCUI, STR, SUPPRESS FROM rxnconso WHERE SAB='RXNORM' AND TTY='SBD'`); + const s2 = bdb.prepare(`SELECT RXCUI, STR, SUPPRESS FROM rxnconso WHERE SAB='RXNORM' AND TTY='SCD'`); + let count = 0; + for (const row of s1.iterate()) count++; + for (const row of s2.iterate()) count++; + return { _rows: count }; + }); + + await bench('IN (SBD, SCD)', () => { + const stmt = bdb.prepare( + `SELECT RXCUI, STR, SUPPRESS FROM rxnconso WHERE SAB='RXNORM' AND TTY IN ('SBD','SCD')` + ); + let count = 0; + for (const row of stmt.iterate()) count++; + return { _rows: count }; + }); +} + +async function runB8(bdb) { + console.log('\n=== B8: NOT IN for excludes ==='); + + // Get 50 real RxCUIs to use as excludes + const sample = bdb.prepare( + `SELECT RXCUI FROM rxnconso WHERE SAB='RXNORM' AND TTY='SBD' LIMIT 50` + ).all().map(r => r.RXCUI); + + const excludeList = sample.map(c => `'${c}'`).join(','); + const excludeSet = new Set(sample); + + await bench('SQL NOT IN (50 codes)', () => { + const stmt = bdb.prepare( + `SELECT RXCUI, STR, SUPPRESS FROM rxnconso WHERE SAB='RXNORM' AND TTY='SBD' AND RXCUI NOT IN (${excludeList})` + ); + let count = 0; + for (const row of stmt.iterate()) count++; + return { _rows: count }; + }); + + await bench('JS filter (50 codes)', () => { + const stmt = bdb.prepare(SBD_SQL); + let count = 0; + for (const row of stmt.iterate()) { + if (!excludeSet.has(row.RXCUI)) count++; + } + return { _rows: count }; + }); + + // Larger exclude: 500 codes + const sample500 = bdb.prepare( + `SELECT RXCUI FROM rxnconso WHERE SAB='RXNORM' AND TTY='SBD' LIMIT 500` + ).all().map(r => r.RXCUI); + const excludeList500 = sample500.map(c => `'${c}'`).join(','); + const excludeSet500 = new Set(sample500); + + await bench('SQL NOT IN (500 codes)', () => { + const stmt = bdb.prepare( + `SELECT RXCUI, STR, SUPPRESS FROM rxnconso WHERE SAB='RXNORM' AND TTY='SBD' AND RXCUI NOT IN (${excludeList500})` + ); + let count = 0; + for (const row of stmt.iterate()) count++; + return { _rows: count }; + }); + + await bench('JS filter (500 codes)', () => { + const stmt = bdb.prepare(SBD_SQL); + let count = 0; + for (const row of stmt.iterate()) { + if (!excludeSet500.has(row.RXCUI)) count++; + } + return { _rows: count }; + }); +} + +async function runB9(bdb) { + console.log('\n=== B9: Row construction cost ==='); + const stmt = bdb.prepare(SBD_SQL); + + await bench('Count only (no construction)', () => { + let count = 0; + for (const row of stmt.iterate()) count++; + return { _rows: count }; + }); + + await bench('Minimal object { code, display, suppress }', () => { + let count = 0; + for (const row of stmt.iterate()) { + const obj = { code: row.RXCUI, display: row.STR, suppress: row.SUPPRESS === '1' }; + count++; + } + return { _rows: count }; + }); + + await bench('Rich object (FHIR-like entry)', () => { + let count = 0; + for (const row of stmt.iterate()) { + const entry = { + system: 'http://www.nlm.nih.gov/research/umls/rxnorm', + code: row.RXCUI, + display: row.STR, + inactive: row.SUPPRESS === '1', + designation: [{ language: 'en', value: row.STR }], + property: row.SUPPRESS !== '1' + ? [] + : [{ code: 'status', valueCode: 'inactive' }], + }; + count++; + } + return { _rows: count }; + }); + + await bench('Rich object + Map dedup check', () => { + let count = 0; + const map = new Map(); + for (const row of stmt.iterate()) { + const key = `http://www.nlm.nih.gov/research/umls/rxnorm|${row.RXCUI}`; + if (!map.has(key)) { + const entry = { + system: 'http://www.nlm.nih.gov/research/umls/rxnorm', + code: row.RXCUI, + display: row.STR, + inactive: row.SUPPRESS === '1', + designation: [{ language: 'en', value: row.STR }], + property: row.SUPPRESS !== '1' + ? [] + : [{ code: 'status', valueCode: 'inactive' }], + }; + map.set(key, entry); + count++; + } + } + return { _rows: count }; + }); +} + +async function runB10(bdb) { + console.log('\n=== B10: OFFSET scan cost (all TTYs ~250k rows) ==='); + + for (const offset of [0, 100, 1000, 10000, 50000, 100000]) { + await bench(`LIMIT 100 OFFSET ${offset}`, () => { + const stmt = bdb.prepare(BASE_SQL + ` ORDER BY RXCUI LIMIT 100 OFFSET ${offset}`); + const rows = stmt.all(); + return { _rows: rows.length }; + }); + } + + console.log('\n (compare: no ORDER BY)'); + for (const offset of [0, 10000, 50000, 100000]) { + await bench(`no ORDER LIMIT 100 OFFSET ${offset}`, () => { + const stmt = bdb.prepare(BASE_SQL + ` LIMIT 100 OFFSET ${offset}`); + const rows = stmt.all(); + return { _rows: rows.length }; + }); + } +} + +// --- Bonus: async vs sync package comparison --- +async function runPackageComparison(asyncDb, syncDb) { + console.log('\n=== BONUS: async sqlite3 vs better-sqlite3 (SBD all rows) ==='); + + await bench('async sqlite3 db.all()', async () => { + const rows = await dbAll(asyncDb, SBD_SQL); + return { _rows: rows.length }; + }); + + await bench('sync better-sqlite3 stmt.all()', () => { + const rows = syncDb.prepare(SBD_SQL).all(); + return { _rows: rows.length }; + }); + + await bench('sync better-sqlite3 stmt.iterate()', () => { + let count = 0; + for (const row of syncDb.prepare(SBD_SQL).iterate()) count++; + return { _rows: count }; + }); +} + +// --- Main --- +async function main() { + console.log(`SQLite Microbenchmarks — RxNorm`); + console.log(`DB: ${DB_PATH}`); + console.log(`Iterations: ${ITERATIONS} (warmup: ${WARMUP})`); + console.log(`Node: ${process.version}`); + + // Open async db + const asyncDb = await openAsync(DB_PATH); + + // Get row count for reference + const countRow = await dbAll(asyncDb, `SELECT COUNT(*) as cnt FROM rxnconso WHERE SAB='RXNORM' AND TTY='SBD'`); + console.log(`SBD rows: ${countRow[0].cnt}`); + const allCount = await dbAll(asyncDb, `SELECT COUNT(*) as cnt FROM rxnconso WHERE SAB='RXNORM' AND TTY<>'SY'`); + console.log(`All non-SY rows: ${allCount[0].cnt}`); + + // --- Async sqlite3 benchmarks --- + await runB1(asyncDb); + await runB2(asyncDb); + await runB3(asyncDb); + + // --- Better-sqlite3 benchmarks --- + let syncDb; + if (BetterSqlite3) { + syncDb = new BetterSqlite3(DB_PATH, { readonly: true }); + + await runB4(syncDb); + await runB5(syncDb); + await runB6(syncDb); + await runB7(syncDb); + await runB8(syncDb); + await runB9(syncDb); + await runB10(syncDb); + + // Package comparison + await runPackageComparison(asyncDb, syncDb); + + syncDb.close(); + } else { + console.log('\n[SKIPPED] better-sqlite3 benchmarks (package not installed)'); + } + + await closeAsync(asyncDb); + + console.log('\n=== Done ==='); +} + +main().catch(err => { + console.error(err); + process.exit(1); +}); diff --git a/scripts/test-expand-cross-system.js b/scripts/test-expand-cross-system.js new file mode 100644 index 0000000..7caf66a --- /dev/null +++ b/scripts/test-expand-cross-system.js @@ -0,0 +1,1307 @@ +#!/usr/bin/env node +'use strict'; + +/** + * Comprehensive expandForValueSet tests: richer include/exclude combinations + * and cross-system (RxNorm + LOINC) ValueSets. + * + * Tests exercise: + * - Filter-based excludes that fully cover, partially cover, or don't overlap includes + * - Multi-include with multi-exclude using filters on both sides + * - Cross-system ValueSets (RxNorm + LOINC includes, excludes across systems) + * - Edge cases: exclude superset of include, empty result sets, disjoint exclude + * + * Usage: node scripts/test-expand-cross-system.js [--full] + */ + +const http = require('http'); +const fs = require('fs'); +const { spawn } = require('child_process'); +const path = require('path'); + +const PORT = 3000; +const REF_PORT = 3001; +const BASE_URL = `http://localhost:${PORT}/r4`; +const REF_URL = `http://localhost:${REF_PORT}/r4`; +const SERVER_START_TIMEOUT = 300000; +const LIBRARY_CONFIG = process.env.TEST_LIBRARY_CONFIG || 'tx/tx.all-v0.yml'; +const BASELINE_CONFIG = process.env.TEST_BASELINE_CONFIG || 'tx/tx.upstream-baseline.yml'; +const HAS_BASELINE = LIBRARY_CONFIG !== BASELINE_CONFIG; + +const RXSYS = 'http://www.nlm.nih.gov/research/umls/rxnorm'; +const LNSYS = 'http://loinc.org'; + +// --- Test helpers --- +function makeVS(compose) { + return { + resourceType: 'Parameters', + parameter: [ + { name: 'valueSet', resource: { resourceType: 'ValueSet', compose } }, + ...(compose._params || []), + ], + }; +} + +// ============================================================ +// RxNorm-only: richer include/exclude patterns +// Includes STY-based excludes which stress-test query planning — +// if these hang, that's a real issue (blocks the event loop). +// ============================================================ +const RXNORM_TESTS = [ + { + name: 'rx-exclude-same-tty', + desc: 'Include TTY=SBD, exclude TTY=SBD (full cover → 0 results)', + body: makeVS({ + include: [{ system: RXSYS, filter: [{ property: 'TTY', op: '=', value: 'SBD' }] }], + exclude: [{ system: RXSYS, filter: [{ property: 'TTY', op: '=', value: 'SBD' }] }], + _params: [{ name: 'count', valueInteger: 10 }], + }), + }, + { + name: 'rx-exclude-disjoint-tty', + desc: 'Include TTY=SBD, exclude TTY=IN (disjoint → no effect)', + drainCount: 25000, + body: makeVS({ + include: [{ system: RXSYS, filter: [{ property: 'TTY', op: '=', value: 'SBD' }] }], + exclude: [{ system: RXSYS, filter: [{ property: 'TTY', op: '=', value: 'IN' }] }], + _params: [{ name: 'count', valueInteger: 10 }], + }), + }, + { + name: 'rx-exclude-partial-tty', + desc: 'Include TTY in SBD,SCD, exclude TTY=SBD (partial → only SCD left)', + drainCount: 25000, + body: makeVS({ + include: [{ system: RXSYS, filter: [{ property: 'TTY', op: 'in', value: 'SBD,SCD' }] }], + exclude: [{ system: RXSYS, filter: [{ property: 'TTY', op: '=', value: 'SBD' }] }], + _params: [{ name: 'count', valueInteger: 10 }], + }), + }, + { + name: 'rx-sty-exclude-overlapping', + desc: 'Include TTY=SBD, exclude STY=T200 (cross-property partial overlap)', + drainCount: 25000, + body: makeVS({ + include: [{ system: RXSYS, filter: [{ property: 'TTY', op: '=', value: 'SBD' }] }], + exclude: [{ system: RXSYS, filter: [{ property: 'STY', op: '=', value: 'T200' }] }], + _params: [{ name: 'count', valueInteger: 10 }], + }), + }, + { + name: 'rx-concepts-exclude-tty-filter', + desc: '10 concepts include, exclude TTY=SBD (removes SBD members)', + body: makeVS({ + include: [{ system: RXSYS, concept: [ + { code: '197381' }, { code: '197382' }, { code: '197383' }, + { code: '197384' }, { code: '197385' }, { code: '313782' }, + { code: '312961' }, { code: '312962' }, { code: '310798' }, + { code: '308056' }, + ]}], + exclude: [{ system: RXSYS, filter: [{ property: 'TTY', op: '=', value: 'SBD' }] }], + }), + }, + { + name: 'rx-filter-exclude-20-concepts', + desc: 'Include TTY=SCD, exclude 20 specific codes', + drainCount: 20000, + body: makeVS({ + include: [{ system: RXSYS, filter: [{ property: 'TTY', op: '=', value: 'SCD' }] }], + exclude: [{ system: RXSYS, concept: [ + { code: '197381' }, { code: '197382' }, { code: '197383' }, + { code: '197384' }, { code: '197385' }, { code: '197386' }, + { code: '197387' }, { code: '197388' }, { code: '197389' }, + { code: '197390' }, { code: '197391' }, { code: '197392' }, + { code: '197393' }, { code: '197394' }, { code: '197395' }, + { code: '197396' }, { code: '197397' }, { code: '197398' }, + { code: '197399' }, { code: '197400' }, + ]}], + _params: [{ name: 'count', valueInteger: 10 }], + }), + }, + { + name: 'rx-multi-include-sty+concepts-exclude', + desc: 'SBD+SCD includes, exclude STY=T200 + 5 concepts', + drainCount: 40000, + body: makeVS({ + include: [ + { system: RXSYS, filter: [{ property: 'TTY', op: '=', value: 'SBD' }] }, + { system: RXSYS, filter: [{ property: 'TTY', op: '=', value: 'SCD' }] }, + ], + exclude: [ + { system: RXSYS, filter: [{ property: 'STY', op: '=', value: 'T200' }] }, + { system: RXSYS, concept: [ + { code: '197381' }, { code: '197382' }, { code: '197383' }, + { code: '197384' }, { code: '197385' }, + ]}, + ], + _params: [{ name: 'count', valueInteger: 10 }], + }), + }, + { + name: 'rx-3-tty-include-2-tty-exclude', + desc: 'Include TTY in SBD,SCD,GPCK, exclude TTY in SBD,GPCK', + drainCount: 25000, + body: makeVS({ + include: [{ system: RXSYS, filter: [{ property: 'TTY', op: 'in', value: 'SBD,SCD,GPCK' }] }], + exclude: [{ system: RXSYS, filter: [{ property: 'TTY', op: 'in', value: 'SBD,GPCK' }] }], + _params: [{ name: 'count', valueInteger: 10 }], + }), + }, +]; + +// ============================================================ +// LOINC-only: richer include/exclude patterns +// ============================================================ +const LOINC_TESTS = [ + { + name: 'ln-exclude-filter-partial', + desc: 'Include CLASS=LP7786-9, exclude COMPONENT=LP14635-4 (partial)', + drainCount: 5000, + body: makeVS({ + include: [{ system: LNSYS, filter: [{ property: 'CLASS', op: '=', value: 'LP7786-9' }] }], + exclude: [{ system: LNSYS, filter: [{ property: 'COMPONENT', op: '=', value: 'LP14635-4' }] }], + _params: [{ name: 'count', valueInteger: 10 }], + }), + }, + { + name: 'ln-exclude-same-filter', + desc: 'Include CLASS=LP7786-9, exclude CLASS=LP7786-9 (full cover → 0)', + body: makeVS({ + include: [{ system: LNSYS, filter: [{ property: 'CLASS', op: '=', value: 'LP7786-9' }] }], + exclude: [{ system: LNSYS, filter: [{ property: 'CLASS', op: '=', value: 'LP7786-9' }] }], + _params: [{ name: 'count', valueInteger: 10 }], + }), + }, + { + name: 'ln-exclude-disjoint', + desc: 'Include CLASS=LP7786-9, exclude CLASS=LP7819-8 (disjoint)', + drainCount: 5000, + body: makeVS({ + include: [{ system: LNSYS, filter: [{ property: 'CLASS', op: '=', value: 'LP7786-9' }] }], + exclude: [{ system: LNSYS, filter: [{ property: 'CLASS', op: '=', value: 'LP7819-8' }] }], + _params: [{ name: 'count', valueInteger: 10 }], + }), + }, + { + name: 'ln-concepts-exclude-filter', + desc: '5 LOINC codes include, exclude CLASS=LP7786-9 (removes CHEM)', + body: makeVS({ + include: [{ system: LNSYS, concept: [ + { code: '2339-0' }, { code: '2345-7' }, { code: '718-7' }, + { code: '4548-4' }, { code: '14749-6' }, + ]}], + exclude: [{ system: LNSYS, filter: [{ property: 'CLASS', op: '=', value: 'LP7786-9' }] }], + }), + }, + { + name: 'ln-multi-include-multi-exclude', + desc: 'CHEM + HEM/BC, exclude COMPONENT=Glucose + 3 concepts', + drainCount: 8000, + body: makeVS({ + include: [ + { system: LNSYS, filter: [{ property: 'CLASS', op: '=', value: 'LP7786-9' }] }, + { system: LNSYS, filter: [{ property: 'CLASS', op: '=', value: 'LP7803-2' }] }, + ], + exclude: [ + { system: LNSYS, filter: [{ property: 'COMPONENT', op: '=', value: 'LP14635-4' }] }, + { system: LNSYS, concept: [{ code: '2339-0' }, { code: '2345-7' }, { code: '718-7' }] }, + ], + _params: [{ name: 'count', valueInteger: 10 }], + }), + }, +]; + +// ============================================================ +// Cross-system: RxNorm + LOINC in same ValueSet +// ============================================================ +const CROSS_SYSTEM_TESTS = [ + { + name: 'cross-rx-ln-include', + desc: 'Include RxNorm TTY=SBD + LOINC CLASS=CHEM, count=10', + drainCount: 30000, + body: makeVS({ + include: [ + { system: RXSYS, filter: [{ property: 'TTY', op: '=', value: 'SBD' }] }, + { system: LNSYS, filter: [{ property: 'CLASS', op: '=', value: 'LP7786-9' }] }, + ], + _params: [{ name: 'count', valueInteger: 10 }], + }), + }, + { + name: 'cross-rx-include-ln-exclude', + desc: 'Include RxNorm TTY=SBD + LOINC CHEM, exclude LOINC COMPONENT=Glucose', + drainCount: 30000, + body: makeVS({ + include: [ + { system: RXSYS, filter: [{ property: 'TTY', op: '=', value: 'SBD' }] }, + { system: LNSYS, filter: [{ property: 'CLASS', op: '=', value: 'LP7786-9' }] }, + ], + exclude: [ + { system: LNSYS, filter: [{ property: 'COMPONENT', op: '=', value: 'LP14635-4' }] }, + ], + _params: [{ name: 'count', valueInteger: 10 }], + }), + }, + { + name: 'cross-ln-include-rx-exclude', + desc: 'Include LOINC CHEM + RxNorm SBD, exclude RxNorm STY=T200', + drainCount: 30000, + body: makeVS({ + include: [ + { system: LNSYS, filter: [{ property: 'CLASS', op: '=', value: 'LP7786-9' }] }, + { system: RXSYS, filter: [{ property: 'TTY', op: '=', value: 'SBD' }] }, + ], + exclude: [ + { system: RXSYS, filter: [{ property: 'STY', op: '=', value: 'T200' }] }, + ], + _params: [{ name: 'count', valueInteger: 10 }], + }), + }, + { + name: 'cross-concepts-both-systems', + desc: 'Include 3 RxNorm concepts + 3 LOINC concepts', + body: makeVS({ + include: [ + { system: RXSYS, concept: [{ code: '197381' }, { code: '197382' }, { code: '197383' }] }, + { system: LNSYS, concept: [{ code: '2339-0' }, { code: '2345-7' }, { code: '718-7' }] }, + ], + }), + }, + { + name: 'cross-concepts-exclude-concepts', + desc: '3 RxNorm + 3 LOINC concepts, exclude 1 from each system', + body: makeVS({ + include: [ + { system: RXSYS, concept: [{ code: '197381' }, { code: '197382' }, { code: '197383' }] }, + { system: LNSYS, concept: [{ code: '2339-0' }, { code: '2345-7' }, { code: '718-7' }] }, + ], + exclude: [ + { system: RXSYS, concept: [{ code: '197381' }] }, + { system: LNSYS, concept: [{ code: '718-7' }] }, + ], + }), + }, + { + name: 'cross-filter-exclude-cross', + desc: 'Include RxNorm SBD + LOINC CHEM, exclude both RxNorm T200 + LOINC Glucose', + drainCount: 30000, + body: makeVS({ + include: [ + { system: RXSYS, filter: [{ property: 'TTY', op: '=', value: 'SBD' }] }, + { system: LNSYS, filter: [{ property: 'CLASS', op: '=', value: 'LP7786-9' }] }, + ], + exclude: [ + { system: RXSYS, filter: [{ property: 'STY', op: '=', value: 'T200' }] }, + { system: LNSYS, filter: [{ property: 'COMPONENT', op: '=', value: 'LP14635-4' }] }, + ], + _params: [{ name: 'count', valueInteger: 10 }], + }), + }, + { + name: 'cross-mixed-concepts-filters', + desc: 'RxNorm concepts + LOINC filter, exclude LOINC concepts + RxNorm filter', + body: makeVS({ + include: [ + { system: RXSYS, concept: [{ code: '197381' }, { code: '197382' }, { code: '313782' }] }, + { system: LNSYS, filter: [{ property: 'CLASS', op: '=', value: 'LP7803-2' }] }, + ], + exclude: [ + { system: LNSYS, concept: [{ code: '718-7' }] }, + { system: RXSYS, filter: [{ property: 'TTY', op: '=', value: 'SBD' }] }, + ], + _params: [{ name: 'count', valueInteger: 10 }], + }), + }, + // Unsupported filter property → forces fallback to baseline path (~1.0x) + { + name: 'rx-unsupported-filter-fallback', + desc: 'Include with unsupported filter property → baseline fallback', + body: makeVS({ + include: [ + { system: RXSYS, filter: [{ property: 'BOGUS_PROPERTY', op: '=', value: 'XYZ' }] }, + ], + _params: [{ name: 'count', valueInteger: 10 }], + }), + }, + { + name: 'rx-unsupported-exclude-filter-fallback', + desc: 'Supported include + unsupported exclude filter → exclude falls back', + body: makeVS({ + include: [ + { system: RXSYS, filter: [{ property: 'TTY', op: '=', value: 'SBD' }] }, + ], + exclude: [ + { system: RXSYS, filter: [{ property: 'BOGUS_PROPERTY', op: '=', value: 'XYZ' }] }, + ], + _params: [{ name: 'count', valueInteger: 10 }], + }), + }, +]; + +const SCTSYS = 'http://snomed.info/sct'; + +// ============================================================ +// Snippet library for fuzz-composing ValueSets +// Each snippet is a partial compose fragment (include or exclude clause). +// The generator mixes and matches these into Frankenstein ValueSets. +// ============================================================ +const SNIPPETS = { + rxIncludes: [ + { system: RXSYS, filter: [{ property: 'TTY', op: '=', value: 'SBD' }] }, + { system: RXSYS, filter: [{ property: 'TTY', op: '=', value: 'SCD' }] }, + { system: RXSYS, filter: [{ property: 'TTY', op: 'in', value: 'SBD,SCD' }] }, + { system: RXSYS, filter: [{ property: 'STY', op: '=', value: 'T200' }] }, + // RxNorm codes are 2-7 digits; narrow prefixes keep results small + { system: RXSYS, filter: [{ property: 'code', op: 'regex', value: '10[0-9]{2}' }] }, // 4-digit 10xx (~50 codes) + { system: RXSYS, filter: [{ property: 'code', op: 'regex', value: '200[0-9]{3}' }] }, // 6-digit 200xxx (~100 codes) + { system: RXSYS, filter: [ + { property: 'code', op: 'regex', value: '1[0-9]{5}' }, + { property: 'TTY', op: '=', value: 'SBD' }, + ]}, + { system: RXSYS, concept: [{ code: '197381' }, { code: '312961' }, { code: '1000000' }] }, + ], + rxExcludes: [ + { system: RXSYS, filter: [{ property: 'TTY', op: '=', value: 'SBD' }] }, + { system: RXSYS, filter: [{ property: 'STY', op: '=', value: 'T200' }] }, + { system: RXSYS, filter: [{ property: 'code', op: 'regex', value: '10[0-9]{4}' }] }, // 6-digit 10xxxx + { system: RXSYS, concept: [{ code: '197381' }, { code: '197382' }] }, + ], + lnIncludes: [ + { system: LNSYS, filter: [{ property: 'CLASS', op: '=', value: 'LP7786-9' }] }, + { system: LNSYS, filter: [{ property: 'CLASS', op: '=', value: 'LP7819-8' }] }, + // LOINC codes are NNNNN-N; constrain first 2+ digits + { system: LNSYS, filter: [{ property: 'code', op: 'regex', value: '123[0-9]{2}-[0-9]' }] }, // 123xx-N (~10 codes) + { system: LNSYS, filter: [{ property: 'code', op: 'regex', value: '45[0-9]{3}-[0-9]' }] }, // 45xxx-N (~100 codes) + { system: LNSYS, filter: [{ property: 'COMPONENT', op: '=', value: 'LP14635-4' }] }, + { system: LNSYS, concept: [{ code: '2160-0' }, { code: '718-7' }, { code: '2345-7' }] }, + ], + lnExcludes: [ + { system: LNSYS, filter: [{ property: 'CLASS', op: '=', value: 'LP7786-9' }] }, + { system: LNSYS, filter: [{ property: 'COMPONENT', op: '=', value: 'LP14635-4' }] }, + { system: LNSYS, filter: [{ property: 'code', op: 'regex', value: '1[0-9]{3}-[0-9]' }] }, // 1xxx-N + { system: LNSYS, concept: [{ code: '2160-0' }, { code: '718-7' }] }, + ], + sctIncludes: [ + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '73211009' }] }, // Diabetes (~200) + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '85562004' }] }, // Hand (~30) + { system: SCTSYS, filter: [{ property: 'concept', op: 'descendent-of', value: '73211009' }] }, + // SNOMED codes are 6-18 digits; use specific 3-digit prefix + length to bound + { system: SCTSYS, filter: [{ property: 'code', op: 'regex', value: '732[0-9]{5}' }] }, // 8-digit 732xxxxx (~70 codes) + { system: SCTSYS, filter: [{ property: 'code', op: 'regex', value: '4405[0-9]{4}' }] }, // 8-digit 4405xxxx (~50 codes) + { system: SCTSYS, concept: [{ code: '73211009' }, { code: '44054006' }, { code: '46635009' }] }, + ], + sctExcludes: [ + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '44054006' }] }, // Type 2 DM + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '7569003' }] }, // Finger + { system: SCTSYS, filter: [{ property: 'code', op: 'regex', value: '732[0-9]{5}' }] }, + { system: SCTSYS, concept: [{ code: '44054006' }] }, + ], +}; + +function pick(arr) { return arr[Math.floor(Math.random() * arr.length)]; } +function pickN(arr, n) { + const shuffled = [...arr].sort(() => Math.random() - 0.5); + return shuffled.slice(0, Math.min(n, shuffled.length)); +} + +function generateFuzzTests(count, seed) { + // Simple seedable PRNG for reproducibility + let s = seed; + const rand = () => { s = (s * 1664525 + 1013904223) & 0x7fffffff; return s / 0x7fffffff; }; + const rpick = (arr) => arr[Math.floor(rand() * arr.length)]; + const rpickN = (arr, n) => { + const shuffled = [...arr].sort(() => rand() - 0.5); + return shuffled.slice(0, Math.min(n, shuffled.length)); + }; + + const allIncludes = [...SNIPPETS.rxIncludes, ...SNIPPETS.lnIncludes, ...SNIPPETS.sctIncludes]; + const allExcludes = [...SNIPPETS.rxExcludes, ...SNIPPETS.lnExcludes, ...SNIPPETS.sctExcludes]; + + const tests = []; + for (let i = 0; i < count; i++) { + // 1-3 includes, 0-2 excludes + const numInc = 1 + Math.floor(rand() * 3); + const numExc = Math.floor(rand() * 3); + const includes = rpickN(allIncludes, numInc); + const excludes = rpickN(allExcludes, numExc); + + const systems = new Set([...includes, ...excludes].map(s => s.system)); + const sysLabel = [...systems].map(s => + s.includes('rxnorm') ? 'rx' : s.includes('loinc') ? 'ln' : 'sct' + ).sort().join('+'); + + const hasRegex = [...includes, ...excludes].some(s => + (s.filter || []).some(f => f.op === 'regex') + ); + const hasConcepts = [...includes, ...excludes].some(s => s.concept); + const hasHierarchy = [...includes, ...excludes].some(s => + (s.filter || []).some(f => f.op === 'is-a' || f.op === 'descendent-of') + ); + + const tags = [sysLabel]; + if (hasRegex) tags.push('regex'); + if (hasConcepts) tags.push('concepts'); + if (hasHierarchy) tags.push('hier'); + if (numExc > 0) tags.push(`exc${numExc}`); + + tests.push({ + name: `fuzz-${i + 1}-${tags.join('-')}`, + desc: `Fuzz: ${numInc} includes, ${numExc} excludes [${tags.join(', ')}]`, + skipBaseline: true, + body: makeVS({ + include: includes, + ...(excludes.length > 0 ? { exclude: excludes } : {}), + _params: [{ name: 'count', valueInteger: 10 }], + }), + }); + } + return tests; +} + +// Hand-crafted regex + combination tests +const REGEX_TESTS = [ + { + name: 'rx-code-regex-numeric-range', + desc: 'RxNorm codes matching 4-digit pattern 10xx', + drainCount: 500, + skipBaseline: true, + body: makeVS({ + include: [{ system: RXSYS, filter: [ + { property: 'code', op: 'regex', value: '10[0-9]{2}' }, + ] }], + _params: [{ name: 'count', valueInteger: 10 }], + }), + }, + { + name: 'rx-regex-plus-tty-filter', + desc: 'RxNorm code regex combined with TTY=SBD property filter', + drainCount: 500, + skipBaseline: true, + body: makeVS({ + include: [{ system: RXSYS, filter: [ + { property: 'code', op: 'regex', value: '1[0-9]{5}' }, + { property: 'TTY', op: '=', value: 'SBD' }, + ] }], + _params: [{ name: 'count', valueInteger: 10 }], + }), + }, + { + name: 'ln-code-regex', + desc: 'LOINC codes matching pattern like 12345-*', + drainCount: 500, + skipBaseline: true, + body: makeVS({ + include: [{ system: LNSYS, filter: [ + { property: 'code', op: 'regex', value: '1234[0-9]-[0-9]' }, + ] }], + _params: [{ name: 'count', valueInteger: 10 }], + }), + }, + { + name: 'rx-regex-exclude-concepts', + desc: 'RxNorm regex include, exclude specific concepts', + skipBaseline: true, + body: makeVS({ + include: [{ system: RXSYS, filter: [ + { property: 'code', op: 'regex', value: '1000[0-9]{2}' }, + ] }], + exclude: [{ system: RXSYS, concept: [{ code: '100008' }, { code: '100009' }] }], + _params: [{ name: 'count', valueInteger: 10 }], + }), + }, + { + name: 'rx-regex-exclude-regex', + desc: 'RxNorm regex include, regex-based exclude via TTY', + skipBaseline: true, + body: makeVS({ + include: [{ system: RXSYS, filter: [ + { property: 'code', op: 'regex', value: '10[0-9]{3}' }, + ] }], + exclude: [{ system: RXSYS, filter: [{ property: 'TTY', op: '=', value: 'SBD' }] }], + _params: [{ name: 'count', valueInteger: 10 }], + }), + }, + { + name: 'cross-rx-regex-ln-class', + desc: 'Cross-system: RxNorm regex + LOINC CLASS include', + skipBaseline: true, + body: makeVS({ + include: [ + { system: RXSYS, filter: [{ property: 'code', op: 'regex', value: '200[0-9]{3}' }] }, + { system: LNSYS, filter: [{ property: 'CLASS', op: '=', value: 'LP7786-9' }] }, + ], + _params: [{ name: 'count', valueInteger: 10 }], + }), + }, + { + name: 'cross-regex-both-systems', + desc: 'Cross-system: regex on both RxNorm and LOINC', + skipBaseline: true, + body: makeVS({ + include: [ + { system: RXSYS, filter: [{ property: 'code', op: 'regex', value: '200[0-9]{3}' }] }, + { system: LNSYS, filter: [{ property: 'code', op: 'regex', value: '123[0-9]{2}-[0-9]' }] }, + ], + _params: [{ name: 'count', valueInteger: 10 }], + }), + }, + { + name: 'rx-concepts-plus-regex-exclude', + desc: 'RxNorm concept list include, regex-pattern exclude', + skipBaseline: true, + body: makeVS({ + include: [{ system: RXSYS, concept: [ + { code: '197381' }, { code: '197382' }, { code: '312961' }, + { code: '1000000' }, { code: '1000005' }, + ] }], + exclude: [{ system: RXSYS, filter: [{ property: 'code', op: 'regex', value: '10000[0-9]+' }] }], + _params: [{ name: 'count', valueInteger: 10 }], + }), + }, + { + name: 'sct-hier-plus-rx-regex', + desc: 'Cross: SNOMED hierarchy + RxNorm regex, exclude SNOMED subtree', + skipBaseline: true, + body: makeVS({ + include: [ + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '73211009' }] }, + { system: RXSYS, filter: [{ property: 'code', op: 'regex', value: '200[0-9]{3}' }] }, + ], + exclude: [ + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '44054006' }] }, + ], + _params: [{ name: 'count', valueInteger: 10 }], + }), + }, + { + name: 'ln-regex-exclude-class', + desc: 'LOINC regex include, CLASS-based exclude', + skipBaseline: true, + body: makeVS({ + include: [{ system: LNSYS, filter: [ + { property: 'code', op: 'regex', value: '[0-9]{4}-[0-9]' }, + ] }], + exclude: [{ system: LNSYS, filter: [{ property: 'CLASS', op: '=', value: 'LP7786-9' }] }], + _params: [{ name: 'count', valueInteger: 10 }], + }), + }, + { + name: 'search-filter-plus-property', + desc: 'Text search "tylenol" intersected with TTY=SBD', + skipBaseline: true, + body: { + resourceType: 'Parameters', + parameter: [ + { name: 'valueSet', resource: { resourceType: 'ValueSet', compose: { + include: [{ system: RXSYS, filter: [{ property: 'TTY', op: '=', value: 'SBD' }] }], + }}}, + { name: 'filter', valueString: 'tylenol' }, + { name: 'count', valueInteger: 20 }, + ], + }, + }, + { + name: 'search-filter-plus-class', + desc: 'Text search "glucose" intersected with LOINC CLASS=CHEM', + skipBaseline: true, + body: { + resourceType: 'Parameters', + parameter: [ + { name: 'valueSet', resource: { resourceType: 'ValueSet', compose: { + include: [{ system: LNSYS, filter: [{ property: 'CLASS', op: '=', value: 'LP7786-9' }] }], + }}}, + { name: 'filter', valueString: 'glucose' }, + { name: 'count', valueInteger: 10 }, + ], + }, + }, +]; + +// ============================================================ +// SNOMED: hierarchy-based include/exclude patterns +// Tests is-a expansions with exclude subtrees — exercises +// closure table joins and NOT EXISTS pushdown. +// ============================================================ +const SNOMED_TESTS = [ + { + name: 'sct-hand-minus-fingers', + desc: 'Include is-a Hand (85562004), exclude is-a Finger (7569003) → non-finger hand parts', + drainCount: 1000, + body: makeVS({ + include: [{ system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '85562004' }] }], + exclude: [{ system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '7569003' }] }], + _params: [{ name: 'count', valueInteger: 20 }], + }), + }, + { + name: 'sct-diabetes-minus-type2', + desc: 'Include is-a Diabetes mellitus (73211009), exclude is-a Type 2 (44054006)', + drainCount: 5000, + body: makeVS({ + include: [{ system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '73211009' }] }], + exclude: [{ system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '44054006' }] }], + _params: [{ name: 'count', valueInteger: 20 }], + }), + }, + { + name: 'sct-procedure-minus-surgical', + desc: 'Include is-a Procedure (71388002), exclude is-a Surgical procedure (387713003)', + drainCount: 100000, + body: makeVS({ + include: [{ system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '71388002' }] }], + exclude: [{ system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '387713003' }] }], + _params: [{ name: 'count', valueInteger: 20 }], + }), + }, + { + name: 'sct-eye-minus-retina', + desc: 'Include is-a Eye structure (81745001), exclude is-a Retina (5665001)', + drainCount: 1000, + body: makeVS({ + include: [{ system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '81745001' }] }], + exclude: [{ system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '5665001' }] }], + _params: [{ name: 'count', valueInteger: 20 }], + }), + }, + { + name: 'sct-exclude-same-subtree', + desc: 'Include is-a Fracture (125605004), exclude same → 0 results', + drainCount: 5000, + body: makeVS({ + include: [{ system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '125605004' }] }], + exclude: [{ system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '125605004' }] }], + _params: [{ name: 'count', valueInteger: 20 }], + }), + }, + { + name: 'sct-concepts-exclude-subtree', + desc: '5 diabetes concepts, exclude is-a Type 2 (44054006)', + drainCount: 100, + body: makeVS({ + include: [{ + system: SCTSYS, + concept: [ + { code: '73211009' }, // Diabetes mellitus + { code: '44054006' }, // Type 2 DM + { code: '46635009' }, // Type 1 DM + { code: '11530004' }, // Brittle DM + { code: '237599002' }, // Insulin-treated Type 2 DM + ], + }], + exclude: [{ system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '44054006' }] }], + _params: [{ name: 'count', valueInteger: 20 }], + }), + }, +]; + +// ============================================================ +// Real-world IG ValueSets — tests modeled on actual FHIR IGs +// (IPS, US Core, FHIR R4 Core). These exercise patterns found +// in production use: massive hierarchies, multi-exclude, +// multi-property AND filters, cross-system concepts+filters, +// and union-of-many-roots. +// ============================================================ +const REALWORLD_TESTS = [ + // --- Massive SNOMED hierarchies (FHIR R4 Core) --- + { + name: 'rw-sct-all-procedures', + desc: 'FHIR R4 Procedure Codes: is-a 71388002 (Procedure) — ~59K codes', + skipBaseline: true, + body: makeVS({ + include: [{ system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '71388002' }] }], + _params: [{ name: 'count', valueInteger: 100 }], + }), + }, + { + name: 'rw-sct-clinical-findings', + desc: 'FHIR R4 Clinical Findings: is-a 404684003 — ~124K codes, largest hierarchy', + skipBaseline: true, + body: makeVS({ + include: [{ system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '404684003' }] }], + _params: [{ name: 'count', valueInteger: 100 }], + }), + }, + { + name: 'rw-sct-body-structures', + desc: 'FHIR R4 Body Structures: is-a 442083009 — ~37K codes', + skipBaseline: true, + body: makeVS({ + include: [{ system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '442083009' }] }], + _params: [{ name: 'count', valueInteger: 100 }], + }), + }, + + // --- SNOMED medication codes (3-root union, FHIR R4 Core) --- + { + name: 'rw-sct-medications-3root', + desc: 'FHIR R4 Medication Codes: 3 is-a roots (drug, pharma product, immunologic)', + skipBaseline: true, + body: makeVS({ + include: [ + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '410942007' }] }, + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '373873005' }] }, + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '106181007' }] }, + ], + _params: [{ name: 'count', valueInteger: 100 }], + }), + }, + + // --- IPS Procedures: include + 8 hierarchy excludes (the gold standard) --- + { + name: 'rw-ips-procedures-8exc', + desc: 'IPS Procedures: all procedures minus 8 admin/bloodbank/community subtrees', + skipBaseline: true, + body: makeVS({ + include: [ + { system: SCTSYS, filter: [{ property: 'concept', op: 'descendent-of', value: '71388002' }] }, + ], + exclude: [ + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '14734007' }] }, + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '59524001' }] }, + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '389067005' }] }, + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '442006003' }] }, + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '225288009' }] }, + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '308335008' }] }, + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '710135002' }] }, + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '389084004' }] }, + ], + _params: [{ name: 'count', valueInteger: 100 }], + }), + }, + + // --- IPS Medications: medicinal products minus vaccines --- + { + name: 'rw-ips-meds-minus-vaccines', + desc: 'IPS Medications: medicinal products (763158003) minus vaccines (787859002)', + skipBaseline: true, + body: makeVS({ + include: [ + { system: SCTSYS, filter: [{ property: 'concept', op: 'descendent-of', value: '763158003' }] }, + ], + exclude: [ + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '787859002' }] }, + ], + _params: [{ name: 'count', valueInteger: 100 }], + }), + }, + + // --- IPS Allergy Reaction: 19-root union of small hierarchies --- + { + name: 'rw-ips-allergy-reaction-19root', + desc: 'IPS Allergy Reactions: union of 19 SNOMED is-a roots (bronchospasm, seizure, etc)', + skipBaseline: true, + body: makeVS({ + include: [ + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '4386001' }] }, + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '9826008' }] }, + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '39579001' }] }, + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '41291007' }] }, + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '49727002' }] }, + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '62315008' }] }, + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '91175000' }] }, + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '126485001' }] }, + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '195967001' }] }, + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '267036007' }] }, + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '271807003' }] }, + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '410430005' }] }, + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '418363000' }] }, + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '422400008' }] }, + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '422587007' }] }, + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '698247007' }] }, + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '702809001' }] }, + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '768962006' }] }, + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '781682005' }] }, + ], + _params: [{ name: 'count', valueInteger: 100 }], + }), + }, + + // --- IPS Microorganisms: 5 organism taxonomy roots --- + { + name: 'rw-ips-microorganisms', + desc: 'IPS Microorganisms: bacteria, fungi, viruses, cestoda/nematoda, prions', + skipBaseline: true, + body: makeVS({ + include: [ + { system: SCTSYS, filter: [{ property: 'concept', op: 'descendent-of', value: '409822003' }] }, + { system: SCTSYS, filter: [{ property: 'concept', op: 'descendent-of', value: '414561005' }] }, + { system: SCTSYS, filter: [{ property: 'concept', op: 'descendent-of', value: '49872002' }] }, + { system: SCTSYS, filter: [{ property: 'concept', op: 'descendent-of', value: '441649000' }] }, + { system: SCTSYS, filter: [{ property: 'concept', op: 'descendent-of', value: '84676004' }] }, + ], + _params: [{ name: 'count', valueInteger: 100 }], + }), + }, + + // --- Condition/Problem/Diagnosis: hierarchy + concept list (FHIR R4) --- + { + name: 'rw-condition-code', + desc: 'FHIR R4 Condition Codes: Clinical finding is-a + "No current problems" concept', + skipBaseline: true, + body: makeVS({ + include: [ + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '404684003' }] }, + { system: SCTSYS, concept: [{ code: '160245001' }] }, + ], + _params: [{ name: 'count', valueInteger: 100 }], + }), + }, + + // --- LOINC Document Type Codes (SCALE_TYP=Doc) --- + { + name: 'rw-loinc-doc-types', + desc: 'FHIR R4 Document Type: LOINC SCALE_TYP=LP32888-7 (Doc) — ~12K document codes', + skipBaseline: true, + body: makeVS({ + include: [{ system: LNSYS, filter: [{ property: 'SCALE_TYP', op: '=', value: 'LP32888-7' }] }], + _params: [{ name: 'count', valueInteger: 100 }], + }), + }, + + // --- IPS Lab Results: LOINC CLASSTYPE=1 + STATUS=ACTIVE, minus 4 CLASS excludes --- + { + name: 'rw-ips-lab-results', + desc: 'IPS Lab Results: LOINC CLASSTYPE=1 AND STATUS=ACTIVE, exclude 4 CLASS values', + skipBaseline: true, + body: makeVS({ + include: [{ system: LNSYS, filter: [ + { property: 'STATUS', op: '=', value: 'ACTIVE' }, + { property: 'CLASSTYPE', op: '=', value: '1' }, + ] }], + exclude: [ + { system: LNSYS, filter: [{ property: 'CLASS', op: '=', value: 'LP62148-9' }] }, + { system: LNSYS, filter: [{ property: 'CLASS', op: '=', value: 'LP175679-2' }] }, + { system: LNSYS, filter: [{ property: 'CLASS', op: '=', value: 'LP7785-1' }] }, + { system: LNSYS, filter: [{ property: 'CLASS', op: '=', value: 'LP94892-4' }] }, + ], + _params: [{ name: 'count', valueInteger: 100 }], + }), + }, + + // --- IPS Radiology Observations: LOINC STATUS=ACTIVE AND CLASS (Radiology) --- + { + name: 'rw-ips-radiology-obs', + desc: 'IPS Radiology: LOINC STATUS=ACTIVE AND CLASS=LP29684-5 (multi-property AND)', + skipBaseline: true, + body: makeVS({ + include: [{ system: LNSYS, filter: [ + { property: 'STATUS', op: '=', value: 'ACTIVE' }, + { property: 'CLASS', op: '=', value: 'LP29684-5' }, + ] }], + _params: [{ name: 'count', valueInteger: 100 }], + }), + }, + + // --- RxNorm clinical drugs + LOINC lab tests: realistic cross-system --- + { + name: 'rw-cross-rx-drugs-ln-labs', + desc: 'Cross-system: RxNorm SBD drugs + LOINC active lab codes (CLASSTYPE=1)', + skipBaseline: true, + body: makeVS({ + include: [ + { system: RXSYS, filter: [{ property: 'TTY', op: '=', value: 'SBD' }] }, + { system: LNSYS, filter: [ + { property: 'STATUS', op: '=', value: 'ACTIVE' }, + { property: 'CLASSTYPE', op: '=', value: '1' }, + ] }, + ], + _params: [{ name: 'count', valueInteger: 100 }], + }), + }, + + // --- Substance codes: 2-root union (FHIR R4 Core) --- + { + name: 'rw-sct-substances-2root', + desc: 'FHIR R4 Substance: Substance (105590001) + Pharma products (373873005)', + skipBaseline: true, + body: makeVS({ + include: [ + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '105590001' }] }, + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '373873005' }] }, + ], + _params: [{ name: 'count', valueInteger: 100 }], + }), + }, + + // --- IPS cross-system medication example: SNOMED + RxNorm concepts --- + { + name: 'rw-ips-meds-sct-rx-concepts', + desc: 'IPS Medication Example: 7 SNOMED + 3 RxNorm concept codes', + skipBaseline: true, + body: makeVS({ + include: [ + { system: SCTSYS, concept: [ + { code: '777067000' }, { code: '774587000' }, { code: '776556004' }, + { code: '774409003' }, { code: '780130002' }, { code: '778315007' }, + { code: '779725005' }, + ] }, + { system: RXSYS, concept: [ + { code: '331055' }, { code: '437158' }, { code: '332122' }, + ] }, + ], + _params: [{ name: 'count', valueInteger: 100 }], + }), + }, + + // --- Surgical procedures minus fracture management (realistic clinical) --- + { + name: 'rw-surgical-minus-fracture', + desc: 'Surgical procedure (387713003) minus fracture repair (125605004)', + skipBaseline: true, + body: makeVS({ + include: [ + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '387713003' }] }, + ], + exclude: [ + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '125605004' }] }, + ], + _params: [{ name: 'count', valueInteger: 100 }], + }), + }, + + // --- IPS Problems: 3 descendent-of + 1 is-a (massive multi-root) --- + { + name: 'rw-ips-problems-4root', + desc: 'IPS Problems: Clinical finding + Context + Events + No current problems', + skipBaseline: true, + body: makeVS({ + include: [ + { system: SCTSYS, filter: [{ property: 'concept', op: 'descendent-of', value: '404684003' }] }, + { system: SCTSYS, filter: [{ property: 'concept', op: 'descendent-of', value: '243796009' }] }, + { system: SCTSYS, filter: [{ property: 'concept', op: 'descendent-of', value: '272379006' }] }, + { system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '160245001' }] }, + ], + _params: [{ name: 'count', valueInteger: 100 }], + }), + }, + + // --- Text search + hierarchy: "heart" conditions --- + { + name: 'rw-heart-conditions-search', + desc: 'Search "heart" within Clinical Findings hierarchy (search+filter combo)', + skipBaseline: true, + body: makeVS({ + include: [{ system: SCTSYS, filter: [{ property: 'concept', op: 'is-a', value: '404684003' }] }], + _params: [ + { name: 'count', valueInteger: 50 }, + { name: 'filter', valueString: 'heart' }, + ], + }), + }, + + // --- Text search + LOINC property: "glucose" lab tests --- + { + name: 'rw-glucose-lab-search', + desc: 'Search "glucose" within LOINC active lab codes', + skipBaseline: true, + body: makeVS({ + include: [{ system: LNSYS, filter: [ + { property: 'STATUS', op: '=', value: 'ACTIVE' }, + { property: 'CLASSTYPE', op: '=', value: '1' }, + ] }], + _params: [ + { name: 'count', valueInteger: 50 }, + { name: 'filter', valueString: 'glucose' }, + ], + }), + }, + + // --- Text search + RxNorm TTY: "metformin" branded drugs --- + { + name: 'rw-metformin-sbd-search', + desc: 'Search "metformin" within RxNorm SBD (branded drugs)', + skipBaseline: true, + body: makeVS({ + include: [{ system: RXSYS, filter: [{ property: 'TTY', op: '=', value: 'SBD' }] }], + _params: [ + { name: 'count', valueInteger: 50 }, + { name: 'filter', valueString: 'metformin' }, + ], + }), + }, +]; + +// ============================================================ +// HTTP helpers +// ============================================================ +function postJson(url, body, timeoutMs = 30000) { + return new Promise((resolve, reject) => { + const data = JSON.stringify(body); + const u = new URL(url); + const req = http.request({ + hostname: u.hostname, port: u.port, path: u.pathname, + method: 'POST', + headers: { 'Content-Type': 'application/fhir+json', 'Content-Length': Buffer.byteLength(data) }, + timeout: timeoutMs, + }, (res) => { + let buf = ''; + res.on('data', c => buf += c); + res.on('end', () => resolve({ status: res.statusCode, body: buf })); + }); + req.on('timeout', () => { req.destroy(); reject(new Error('Request timed out')); }); + req.on('error', reject); + req.write(data); + req.end(); + }); +} + +function httpPost(url) { + return new Promise((resolve, reject) => { + const u = new URL(url); + const req = http.request({ + hostname: u.hostname, port: u.port, path: u.pathname + (u.search || ''), + method: 'POST', headers: { 'Content-Length': 0 }, + }, (res) => { + let buf = ''; + res.on('data', c => buf += c); + res.on('end', () => resolve({ status: res.statusCode, body: buf })); + }); + req.on('error', reject); + req.end(); + }); +} + +function httpGet(url) { + return new Promise((resolve, reject) => { + http.get(url, (res) => { + let buf = ''; + res.on('data', c => buf += c); + res.on('end', () => resolve({ status: res.statusCode, body: buf })); + }).on('error', reject); + }); +} + +async function waitForServer(url, timeout) { + const start = Date.now(); + while (Date.now() - start < timeout) { + try { + const res = await httpGet(url); + if (res.status === 200) return true; + } catch (_) { /* not ready */ } + await new Promise(r => setTimeout(r, 2000)); + } + throw new Error('Server did not start within timeout'); +} + +function extractCodes(responseBody) { + try { + const json = JSON.parse(responseBody); + if (!json.expansion || !json.expansion.contains) return []; + return json.expansion.contains.map(c => ({ + code: c.code, display: c.display, system: c.system, inactive: c.inactive || false, + })); + } catch (_) { return null; } +} + +function codesEqual(a, b) { + if (a === null || b === null) return { match: false, reason: 'null' }; + if (a.length !== b.length) return { match: false, reason: `count ${a.length} vs ${b.length}` }; + let exact = true; + for (let i = 0; i < a.length; i++) { + if (a[i].code !== b[i].code || a[i].system !== b[i].system) { exact = false; break; } + } + if (exact) return { match: true, reason: 'exact' }; + const key = c => `${c.system}|${c.code}`; + const setA = new Set(a.map(key)); + const setB = new Set(b.map(key)); + const sameSet = setA.size === setB.size && [...setA].every(c => setB.has(c)); + if (sameSet) return { match: true, reason: 'order differs' }; + return { match: false, reason: 'different codes' }; +} + +// ============================================================ +// Main runner +// ============================================================ +function log(msg) { console.log(`[${new Date().toISOString().slice(11,19)}] ${msg}`); } + +async function main() { + const full = process.argv.includes('--full'); + const rxOnly = process.argv.includes('--rx'); + const lnOnly = process.argv.includes('--ln'); + const crossOnly = process.argv.includes('--cross'); + const sctOnly = process.argv.includes('--sct'); + const fuzzOnly = process.argv.includes('--fuzz'); + const rwOnly = process.argv.includes('--rw'); + const fuzzCount = parseInt(process.argv.find(a => a.startsWith('--fuzz-count='))?.split('=')[1] || '20'); + const fuzzSeed = parseInt(process.argv.find(a => a.startsWith('--fuzz-seed='))?.split('=')[1] || '42'); + + let testList; + if (rxOnly) testList = RXNORM_TESTS; + else if (lnOnly) testList = LOINC_TESTS; + else if (crossOnly) testList = CROSS_SYSTEM_TESTS; + else if (sctOnly) testList = SNOMED_TESTS; + else if (fuzzOnly) testList = generateFuzzTests(fuzzCount, fuzzSeed); + else if (rwOnly) testList = REALWORLD_TESTS; + else if (full) testList = [...RXNORM_TESTS, ...LOINC_TESTS, ...CROSS_SYSTEM_TESTS, ...REGEX_TESTS, ...SNOMED_TESTS, ...REALWORLD_TESTS, ...generateFuzzTests(fuzzCount, fuzzSeed)]; + else testList = [...RXNORM_TESTS, ...LOINC_TESTS, ...CROSS_SYSTEM_TESTS, ...REGEX_TESTS, ...SNOMED_TESTS]; + + const serverDir = path.resolve(__dirname, '..'); + + log(`Running ${testList.length} tests`); + log(`Using library: ${LIBRARY_CONFIG}`); + if (HAS_BASELINE) log(`Baseline: ${BASELINE_CONFIG} on port ${REF_PORT}`); + + let server, refServer; + try { + log(`Starting v0 server on port ${PORT}...`); + server = spawn('node', ['server.js'], { + cwd: serverDir, + stdio: ['ignore', 'pipe', 'pipe'], + env: { ...process.env, NODE_ENV: 'test', TX_LIBRARY_SOURCE: LIBRARY_CONFIG, PORT: String(PORT) }, + }); + server.stdout.on('data', () => {}); + server.stderr.on('data', () => {}); + + if (HAS_BASELINE) { + log(`Starting baseline server on port ${REF_PORT}...`); + refServer = spawn('node', ['server.js'], { + cwd: serverDir, + stdio: ['ignore', 'pipe', 'pipe'], + env: { ...process.env, NODE_ENV: 'test', TX_LIBRARY_SOURCE: BASELINE_CONFIG, PORT: String(REF_PORT) }, + }); + refServer.stdout.on('data', () => {}); + refServer.stderr.on('data', () => {}); + } + + await waitForServer(`http://localhost:${PORT}/r4/metadata`, SERVER_START_TIMEOUT); + log('v0 server ready.'); + if (HAS_BASELINE) { + await waitForServer(`http://localhost:${REF_PORT}/r4/metadata`, SERVER_START_TIMEOUT); + log('Baseline server ready.'); + } + log(''); + + try { await httpPost(`http://localhost:${PORT}/debug/perf-counters/enable`); } catch (_) {} + + const results = []; + + for (let ti = 0; ti < testList.length; ti++) { + const test = testList[ti]; + const body = JSON.parse(JSON.stringify(test.body)); + if (body.parameter[0].resource.compose._params) { + body.parameter.push(...body.parameter[0].resource.compose._params); + delete body.parameter[0].resource.compose._params; + } + + log(`[${ti+1}/${testList.length}] ${test.name}: ${test.desc}`); + + // V0 run + const t0 = performance.now(); + let optRes; + try { optRes = await postJson(BASE_URL + '/ValueSet/$expand', body); } + catch (e) { optRes = { status: 'ERROR', body: e.message }; } + const optMs = performance.now() - t0; + const optCodes = typeof optRes.body === 'string' ? extractCodes(optRes.body) : null; + + // BASELINE (upstream native providers) + let baseMs = 0, baseCodes = null, baseStatus = null, baseSkipped = false; + if (HAS_BASELINE && !test.skipBaseline) { + const t1 = performance.now(); + let baseRes; + try { baseRes = await postJson(REF_URL + '/ValueSet/$expand', body); } + catch (e) { baseRes = { status: 'TIMEOUT', body: '' }; } + baseMs = performance.now() - t1; + baseStatus = baseRes.status; + baseCodes = typeof baseRes.body === 'string' ? extractCodes(baseRes.body) : null; + } else { + baseSkipped = true; + } + + // Compare v0 vs baseline (paged) + let cmpBase = baseSkipped ? { match: null, reason: 'baseline skipped' } + : codesEqual(optCodes, baseCodes); + + const speedup = baseSkipped ? null : baseMs / optMs; + + // Drain: if paged comparison failed and drainCount is set, re-request full sets and compare sorted + let drainCmp = null; + if (cmpBase.match === false && test.drainCount) { + const drainBody = JSON.parse(JSON.stringify(body)); + // Replace count parameter with drainCount + const countIdx = drainBody.parameter.findIndex(p => p.name === 'count'); + if (countIdx >= 0) drainBody.parameter[countIdx].valueInteger = test.drainCount; + else drainBody.parameter.push({ name: 'count', valueInteger: test.drainCount }); + + let drainOpt, drainBase; + try { drainOpt = await postJson(BASE_URL + '/ValueSet/$expand', drainBody, 120000); } catch (_) {} + try { drainBase = await postJson(REF_URL + '/ValueSet/$expand', drainBody, 120000); } catch (_) {} + const drainOptCodes = drainOpt?.body ? extractCodes(drainOpt.body) : null; + const drainBaseCodes = drainBase?.body ? extractCodes(drainBase.body) : null; + drainCmp = codesEqual(drainOptCodes, drainBaseCodes); + if (drainCmp.match) { + drainCmp.optTotal = drainOptCodes?.length; + drainCmp.baseTotal = drainBaseCodes?.length; + } + } + + const baseIcon = cmpBase.match === true ? '✅' : cmpBase.match === false ? '❌' : '—'; + const drainIcon = drainCmp ? (drainCmp.match ? '✅' : '❌') : ''; + const drainNote = drainCmp ? ` drain(${test.drainCount}): ${drainIcon} ${drainCmp.reason}${drainCmp.optTotal ? ` (${drainCmp.optTotal}/${drainCmp.baseTotal})` : ''}` : ''; + + const baseLabel = baseSkipped ? 'N/A' : `${baseMs.toFixed(0)}ms`; + log(` v0: ${optMs.toFixed(0)}ms (${optCodes?.length ?? '?'} codes) Base: ${baseLabel} ${baseIcon} ${cmpBase.reason || ''}${drainNote}`); + + const SLOW_THRESHOLD_MS = parseInt(process.env.SLOW_THRESHOLD_MS || '3000'); + if (optMs > SLOW_THRESHOLD_MS || (baseMs > SLOW_THRESHOLD_MS && !baseSkipped)) { + const compose = body.parameter[0]?.resource?.compose; + if (compose) { + log(` ⚠️ SLOW (>${SLOW_THRESHOLD_MS}ms) — compose input:`); + log(' ' + JSON.stringify(compose, null, 2).split('\n').join('\n ')); + } + } + + log(''); + + results.push({ + name: test.name, optMs: optMs.toFixed(1), + baseMs: baseSkipped ? 'N/A' : baseMs.toFixed(1), + speedup: speedup != null ? speedup.toFixed(1) : null, + matchBase: cmpBase.match, reasonBase: cmpBase.reason, + drainMatch: drainCmp?.match ?? null, drainReason: drainCmp?.reason ?? null, + baseSkipped, + optCount: optCodes?.length ?? '?', + baseCount: baseCodes?.length ?? '?', + }); + } + + // Summary table + const lines = []; + lines.push('=== v0 vs upstream baseline test results ==='); + lines.push(`Date: ${new Date().toISOString()}`); + lines.push(`v0 config: ${LIBRARY_CONFIG}`); + lines.push(`Baseline config: ${BASELINE_CONFIG}`); + lines.push(`Tests: ${testList.length}`); + lines.push(''); + + lines.push('Test | v0 (ms) | Base (ms) | Speedup | Codes | Match | Drain'); + lines.push('-----------------------------------|---------|-----------|---------|-------|------------|------'); + for (const r of results) { + const icon = r.baseSkipped ? '—' : r.matchBase === true ? '✅' : '❌'; + const detail = r.baseSkipped ? '' : (r.reasonBase || ''); + const speedCol = r.speedup != null ? `${r.speedup.padStart(6)}x` : ' — '; + const drainCol = r.drainMatch === true ? '✅ ' + r.drainReason : r.drainMatch === false ? '❌ ' + r.drainReason : ''; + lines.push(`${r.name.padEnd(35)}| ${r.optMs.padStart(7)} | ${(r.baseMs || '').padStart(9)} | ${speedCol} | ${String(r.optCount).padStart(5)} | ${icon} ${detail.padEnd(10)}| ${drainCol}`); + } + + console.log('\n' + lines.join('\n')); + + const outPath = path.join(serverDir, 'test-cross-system-results.txt'); + fs.writeFileSync(outPath, lines.join('\n') + '\n'); + log(`Results written to ${outPath}`); + + } finally { + if (server) { + server.kill('SIGTERM'); + await new Promise(r => setTimeout(r, 500)); + } + if (refServer) { + refServer.kill('SIGTERM'); + await new Promise(r => setTimeout(r, 500)); + } + } +} + +main().catch(err => { + console.error(err); + process.exit(1); +}); diff --git a/scripts/test-expand-for-valueset.js b/scripts/test-expand-for-valueset.js new file mode 100644 index 0000000..c61eeea --- /dev/null +++ b/scripts/test-expand-for-valueset.js @@ -0,0 +1,467 @@ +#!/usr/bin/env node +'use strict'; + +/** + * Test expandForValueSet correctness and performance. + * + * Starts the server with lite config, runs RxNorm expansion requests with + * and without the expandForValueSet bypass flag, compares results. + * + * Usage: node scripts/test-expand-for-valueset.js + */ + +const http = require('http'); +const fs = require('fs'); +const { spawn } = require('child_process'); +const path = require('path'); + +const PORT = 3000; +const BASE_URL = `http://localhost:${PORT}/r4`; +const SERVER_START_TIMEOUT = 300000; +const LITE_CONFIG = 'tx/tx.rxnorm-only.yml'; + +// --- Test cases --- +function makeVS(compose) { + return { + resourceType: 'Parameters', + parameter: [ + { name: 'valueSet', resource: { resourceType: 'ValueSet', compose } }, + ...(compose._params || []), + ], + }; +} + +const RXSYS = 'http://www.nlm.nih.gov/research/umls/rxnorm'; + +const TESTS = [ + { + name: 'filter-tty-sbd-10', + desc: 'TTY=SBD, count=10', + body: makeVS({ + include: [{ system: RXSYS, filter: [{ property: 'TTY', op: '=', value: 'SBD' }] }], + _params: [{ name: 'count', valueInteger: 10 }], + }), + }, + { + name: 'concept-5', + desc: '5 explicit codes', + body: makeVS({ + include: [{ system: RXSYS, + concept: [{ code: '197381' }, { code: '197382' }, { code: '197383' }, { code: '197384' }, { code: '197385' }] + }], + }), + }, + { + name: 'exclude-concepts-3', + desc: 'TTY=SBD, 3 concept excludes, count=10', + body: makeVS({ + include: [{ system: RXSYS, filter: [{ property: 'TTY', op: '=', value: 'SBD' }] }], + exclude: [{ system: RXSYS, concept: [{ code: '197381' }, { code: '197382' }, { code: '197383' }] }], + _params: [{ name: 'count', valueInteger: 10 }], + }), + }, + { + name: 'multi-include-2', + desc: 'TTY=SBD + TTY=SCD, count=10', + drainCount: 40000, // must exceed total SBD+SCD (~37k) for valid set comparison + body: makeVS({ + include: [ + { system: RXSYS, filter: [{ property: 'TTY', op: '=', value: 'SBD' }] }, + { system: RXSYS, filter: [{ property: 'TTY', op: '=', value: 'SCD' }] }, + ], + _params: [{ name: 'count', valueInteger: 10 }], + }), + }, + { + name: 'activeonly-sbd', + desc: 'TTY=SBD, activeOnly, count=10', + body: makeVS({ + include: [{ system: RXSYS, filter: [{ property: 'TTY', op: '=', value: 'SBD' }] }], + _params: [{ name: 'count', valueInteger: 10 }, { name: 'activeOnly', valueBoolean: true }], + }), + }, +]; + +// --- EXTENDED tests (add with --full flag) --- +const EXTENDED_TESTS = [ + { + name: 'filter-tty-in-multi', + desc: 'TTY in SBD,SCD, count=10', + body: makeVS({ + include: [{ system: RXSYS, filter: [{ property: 'TTY', op: 'in', value: 'SBD,SCD' }] }], + _params: [{ name: 'count', valueInteger: 10 }], + }), + }, + { + name: 'filter-sty-t200', + desc: 'STY=T200, count=10', + body: makeVS({ + include: [{ system: RXSYS, filter: [{ property: 'STY', op: '=', value: 'T200' }] }], + _params: [{ name: 'count', valueInteger: 10 }], + }), + }, + { + name: 'paged-offset-100', + desc: 'TTY=SBD, offset=100, count=10', + body: makeVS({ + include: [{ system: RXSYS, filter: [{ property: 'TTY', op: '=', value: 'SBD' }] }], + _params: [{ name: 'count', valueInteger: 10 }, { name: 'offset', valueInteger: 100 }], + }), + }, + { + name: 'text-aspirin', + desc: 'TTY=SBD, filter=aspirin, count=10', + body: makeVS({ + include: [{ system: RXSYS, filter: [{ property: 'TTY', op: '=', value: 'SBD' }] }], + _params: [{ name: 'count', valueInteger: 10 }, { name: 'filter', valueString: 'aspirin' }], + }), + }, + { + name: 'exclude-filter', + desc: 'TTY=SBD, exclude TTY=SBDC, count=10', + body: makeVS({ + include: [{ system: RXSYS, filter: [{ property: 'TTY', op: '=', value: 'SBD' }] }], + exclude: [{ system: RXSYS, filter: [{ property: 'TTY', op: '=', value: 'SBDC' }] }], + _params: [{ name: 'count', valueInteger: 10 }], + }), + }, + { + name: 'multi-include-concept+filter', + desc: 'Concepts + TTY=SBD filter, count=10', + drainCount: 25000, + body: makeVS({ + include: [ + { system: RXSYS, concept: [{ code: '197381' }, { code: '197382' }] }, + { system: RXSYS, filter: [{ property: 'TTY', op: '=', value: 'SBD' }] }, + ], + _params: [{ name: 'count', valueInteger: 10 }], + }), + }, + { + name: 'combo-active-text-paged', + desc: 'TTY=SBD, activeOnly, filter=tablet, offset=10, count=5', + body: makeVS({ + include: [{ system: RXSYS, filter: [{ property: 'TTY', op: '=', value: 'SBD' }] }], + _params: [ + { name: 'count', valueInteger: 5 }, { name: 'offset', valueInteger: 10 }, + { name: 'activeOnly', valueBoolean: true }, { name: 'filter', valueString: 'tablet' }, + ], + }), + }, + { + name: 'multi-include-multi-exclude', + desc: 'SBD+SCD, exclude 3 concepts + SBDC, count=10', + drainCount: 40000, + body: makeVS({ + include: [ + { system: RXSYS, filter: [{ property: 'TTY', op: '=', value: 'SBD' }] }, + { system: RXSYS, filter: [{ property: 'TTY', op: '=', value: 'SCD' }] }, + ], + exclude: [ + { system: RXSYS, concept: [{ code: '197381' }, { code: '197382' }, { code: '197383' }] }, + { system: RXSYS, filter: [{ property: 'TTY', op: '=', value: 'SBDC' }] }, + ], + _params: [{ name: 'count', valueInteger: 10 }], + }), + }, +]; + +// --- HTTP helpers --- +function postJson(url, body, timeoutMs = 5000) { + return new Promise((resolve, reject) => { + const data = JSON.stringify(body); + const u = new URL(url); + const req = http.request({ + hostname: u.hostname, port: u.port, path: u.pathname, + method: 'POST', + headers: { 'Content-Type': 'application/fhir+json', 'Content-Length': Buffer.byteLength(data) }, + timeout: timeoutMs, + }, (res) => { + let buf = ''; + res.on('data', c => buf += c); + res.on('end', () => resolve({ status: res.statusCode, body: buf })); + }); + req.on('timeout', () => { req.destroy(); reject(new Error('Request timed out')); }); + req.on('error', reject); + req.write(data); + req.end(); + }); +} + +function httpPost(url) { + return new Promise((resolve, reject) => { + const u = new URL(url); + const req = http.request({ + hostname: u.hostname, port: u.port, path: u.pathname + (u.search || ''), + method: 'POST', headers: { 'Content-Length': 0 }, + }, (res) => { + let buf = ''; + res.on('data', c => buf += c); + res.on('end', () => resolve({ status: res.statusCode, body: buf })); + }); + req.on('error', reject); + req.end(); + }); +} + +function httpGet(url) { + return new Promise((resolve, reject) => { + http.get(url, (res) => { + let buf = ''; + res.on('data', c => buf += c); + res.on('end', () => resolve({ status: res.statusCode, body: buf })); + }).on('error', reject); + }); +} + +async function waitForServer(url, timeout) { + const start = Date.now(); + while (Date.now() - start < timeout) { + try { + const res = await httpGet(url); + if (res.status === 200) return true; + } catch (e) { /* not ready */ } + await new Promise(r => setTimeout(r, 2000)); + } + throw new Error('Server did not start within timeout'); +} + +function extractCodes(responseBody) { + try { + const json = JSON.parse(responseBody); + if (!json.expansion || !json.expansion.contains) return []; + return json.expansion.contains.map(c => ({ + code: c.code, + display: c.display, + system: c.system, + inactive: c.inactive || false, + })); + } catch (e) { + return null; + } +} + +function codesEqual(a, b) { + if (a === null || b === null) return { match: false, reason: 'null' }; + if (a.length !== b.length) return { match: false, reason: `count ${a.length} vs ${b.length}` }; + // Check exact positional match + let exact = true; + for (let i = 0; i < a.length; i++) { + if (a[i].code !== b[i].code) { exact = false; break; } + } + if (exact) return { match: true, reason: 'exact' }; + // Check set match (same codes, different order) + const setA = new Set(a.map(c => c.code)); + const setB = new Set(b.map(c => c.code)); + const sameSet = setA.size === setB.size && [...setA].every(c => setB.has(c)); + if (sameSet) return { match: true, reason: 'order differs' }; + return { match: false, reason: 'different codes' }; +} + +// --- Main --- +function log(msg) { + console.log(`[${new Date().toISOString().slice(11,19)}] ${msg}`); +} + +async function main() { + const full = process.argv.includes('--full'); + const testList = full ? [...TESTS, ...EXTENDED_TESTS] : TESTS; + const serverDir = path.resolve(__dirname, '..'); + + log(`Running ${testList.length} tests (${full ? 'full' : 'core'} mode, pass --full for all)`); + log(`Using library: ${LITE_CONFIG}`); + + let server; + try { + log(`Starting server on port ${PORT}...`); + server = spawn('node', ['server.js'], { + cwd: serverDir, + stdio: ['ignore', 'pipe', 'pipe'], + env: { ...process.env, NODE_ENV: 'test', TX_LIBRARY_SOURCE: LITE_CONFIG }, + }); + + let serverOutput = ''; + server.stdout.on('data', d => { serverOutput += d.toString(); }); + server.stderr.on('data', d => { serverOutput += d.toString(); }); + + await waitForServer(`http://localhost:${PORT}/r4/metadata`, SERVER_START_TIMEOUT); + log('Server ready.\n'); + + // Enable perf counters + await httpPost(`http://localhost:${PORT}/debug/perf-counters/enable`); + + const results = []; + const ITERS = 2; + + for (let ti = 0; ti < testList.length; ti++) { + const test = testList[ti]; + // Fix up the body: move _params to top level + const body = JSON.parse(JSON.stringify(test.body)); + if (body.parameter[0].resource.compose._params) { + body.parameter.push(...body.parameter[0].resource.compose._params); + delete body.parameter[0].resource.compose._params; + } + + log(`[${ti+1}/${testList.length}] ${test.name}: ${test.desc}`); + + // Run OPTIMIZED (expandForValueSet enabled) + await httpPost(`http://localhost:${PORT}/debug/bypass-expand-for-valueset?bypass=false`); + await httpPost(`http://localhost:${PORT}/debug/perf-counters/reset`); + + const timesOpt = []; + let optCodes, optStatus; + for (let i = 0; i < ITERS; i++) { + log(` opt iter ${i+1}/${ITERS}...`); + const t0 = performance.now(); + const res = await postJson(BASE_URL + '/ValueSet/$expand', body); + const elapsed = performance.now() - t0; + timesOpt.push(elapsed); + log(` opt iter ${i+1}: ${elapsed.toFixed(0)}ms, HTTP ${res.status}`); + if (i === 0) { optCodes = extractCodes(res.body); optStatus = res.status; } + } + const cOpt = JSON.parse((await httpGet(`http://localhost:${PORT}/debug/perf-counters`)).body); + + // Run BASELINE (expandForValueSet bypassed) + await httpPost(`http://localhost:${PORT}/debug/bypass-expand-for-valueset?bypass=true`); + await httpPost(`http://localhost:${PORT}/debug/perf-counters/reset`); + + const timesBase = []; + let baseCodes, baseStatus; + let baseTimeout = false; + for (let i = 0; i < ITERS; i++) { + if (baseTimeout) break; // don't repeat after timeout + log(` base iter ${i+1}/${ITERS}...`); + const t0 = performance.now(); + try { + const res = await postJson(BASE_URL + '/ValueSet/$expand', body); + const elapsed = performance.now() - t0; + timesBase.push(elapsed); + log(` base iter ${i+1}: ${elapsed.toFixed(0)}ms, HTTP ${res.status}`); + if (i === 0) { baseCodes = extractCodes(res.body); baseStatus = res.status; } + } catch (e) { + const elapsed = performance.now() - t0; + if (e.message === 'Request timed out') { + timesBase.push(elapsed); + log(` base iter ${i+1}: TIMEOUT after ${elapsed.toFixed(0)}ms`); + baseTimeout = true; + if (i === 0) { baseCodes = null; baseStatus = 'TIMEOUT'; } + } else { + throw e; + } + } + } + const cBase = JSON.parse((await httpGet(`http://localhost:${PORT}/debug/perf-counters`)).body); + + // Compare + timesOpt.sort((a, b) => a - b); + timesBase.sort((a, b) => a - b); + const medOpt = timesOpt[Math.floor(timesOpt.length / 2)]; + const medBase = timesBase[Math.floor(timesBase.length / 2)]; + const cmp = baseTimeout + ? { match: null, reason: 'baseline timeout' } + : codesEqual(optCodes, baseCodes); + const speedup = medBase / medOpt; + + const statusNote = (optStatus !== 200 || (baseStatus !== 200 && baseStatus !== 'TIMEOUT')) + ? ` [HTTP opt:${optStatus} base:${baseStatus}]` : ''; + + const matchIcon = cmp.match === true ? '✅' : cmp.match === false ? '❌' : '⏱️'; + const baseLabel = baseTimeout ? 'TIMEOUT' : `${medBase.toFixed(1)}ms`; + log(` Optimized: ${medOpt.toFixed(1)}ms | Baseline: ${baseLabel} | Speedup: ${baseTimeout ? '∞' : speedup.toFixed(1) + 'x'}${statusNote}`); + log(` Codes: ${matchIcon} ${cmp.reason} (opt: ${optCodes?.length ?? '?'}, base: ${baseCodes?.length ?? '?'})`); + + if (!cmp.match && optCodes && baseCodes) { + const maxShow = Math.min(5, Math.max(optCodes.length, baseCodes.length)); + for (let i = 0; i < maxShow; i++) { + const o = optCodes[i]; const b = baseCodes[i]; + if (!o || !b || o.code !== b.code || o.display !== b.display) { + log(` [${i}] opt: ${o?.code}/${o?.display?.substring(0,40)} | base: ${b?.code}/${b?.display?.substring(0,40)}`); + } + } + } + + const handled = cOpt.counters?.['expandForValueSet.handled'] || 0; + const fallback = cOpt.counters?.['expandForValueSet.fallback'] || 0; + log(` Counters: handled=${handled}, fallback=${fallback}`); + + // Full-drain set comparison: fetch ALL codes from both paths, compare as sets + let drainResult = null; + if (test.drainCount && !baseTimeout && (!cmp.match || cmp.reason === 'order differs')) { + log(` Draining up to ${test.drainCount} codes for full set comparison...`); + const drainBody = JSON.parse(JSON.stringify(body)); + // Replace count/offset/limit params for full drain + drainBody.parameter = drainBody.parameter.filter(p => + p.name !== 'count' && p.name !== 'offset' && p.name !== 'limit'); + drainBody.parameter.push({ name: 'count', valueInteger: test.drainCount }); + drainBody.parameter.push({ name: 'limit', valueInteger: test.drainCount }); + + await httpPost(`http://localhost:${PORT}/debug/bypass-expand-for-valueset?bypass=false`); + const drainOpt = await postJson(BASE_URL + '/ValueSet/$expand', drainBody, 120000); + await httpPost(`http://localhost:${PORT}/debug/bypass-expand-for-valueset?bypass=true`); + const drainBase = await postJson(BASE_URL + '/ValueSet/$expand', drainBody, 120000); + + if (drainOpt.status !== 200 || drainBase.status !== 200) { + drainResult = `HTTP error (opt:${drainOpt.status} base:${drainBase.status})`; + log(` Drain failed: ${drainResult}`); + } else { + const optCodes2 = extractCodes(drainOpt.body) || []; + const baseCodes2 = extractCodes(drainBase.body) || []; + const optSet = new Set(optCodes2.map(c => c.code)); + const baseSet = new Set(baseCodes2.map(c => c.code)); + const onlyOpt = [...optSet].filter(c => !baseSet.has(c)); + const onlyBase = [...baseSet].filter(c => !optSet.has(c)); + const setsEqual = onlyOpt.length === 0 && onlyBase.length === 0; + drainResult = setsEqual + ? `sets equal (${optSet.size} codes)` + : `sets differ (opt-only: ${onlyOpt.length}, base-only: ${onlyBase.length})`; + log(` Drain: opt=${optSet.size} base=${baseSet.size} → ${drainResult}`); + if (!setsEqual && onlyOpt.length > 0) log(` opt-only sample: ${onlyOpt.slice(0,5).join(', ')}`); + if (!setsEqual && onlyBase.length > 0) log(` base-only sample: ${onlyBase.slice(0,5).join(', ')}`); + } + } + log(''); + + results.push({ name: test.name, medOpt: medOpt.toFixed(1), + medBase: baseTimeout ? 'TIMEOUT' : medBase.toFixed(1), + speedup: baseTimeout ? '∞' : speedup.toFixed(1), + match: cmp.match, reason: cmp.reason, + drainResult, baseTimeout, + optCount: optCodes?.length ?? '?', baseCount: baseCodes?.length ?? '?' }); + } + + // Summary + const lines = []; + lines.push('=== expandForValueSet test results ==='); + lines.push(`Date: ${new Date().toISOString()}`); + lines.push(`Tests: ${testList.length} (${full ? 'full' : 'core'})`); + lines.push(''); + lines.push('Test | New (ms) | Old (ms) | Speedup | Codes | Result'); + lines.push('------------------------------|----------|----------|---------|-------|-------'); + for (const r of results) { + const drainOk = r.drainResult && r.drainResult.startsWith('sets equal'); + const pass = r.match === true || drainOk; + const icon = r.baseTimeout ? '⏱️' : (pass ? '✅' : '❌'); + const detail = r.baseTimeout ? `baseline timeout (opt OK: ${r.optCount} codes)` + : (drainOk ? `page order differs, ${r.drainResult}` : r.reason); + const speedCol = typeof r.speedup === 'string' && r.speedup === '∞' ? ' ∞ ' : `${r.speedup.padStart(5)}x `; + lines.push(`${r.name.padEnd(30)}| ${r.medOpt.padStart(8)} | ${r.medBase.padStart(8)} | ${speedCol} | ${String(r.optCount).padStart(5)} | ${icon} ${detail}`); + } + + console.log('\n' + lines.join('\n')); + + const outPath = path.join(serverDir, 'test-expand-results.txt'); + fs.writeFileSync(outPath, lines.join('\n') + '\n'); + log(`Results written to ${outPath}`); + + } finally { + if (server) { + server.kill('SIGTERM'); + await new Promise(r => setTimeout(r, 1000)); + } + } +} + +main().catch(err => { + console.error(err); + process.exit(1); +}); diff --git a/scripts/test-loinc-expand.js b/scripts/test-loinc-expand.js new file mode 100644 index 0000000..af5c395 --- /dev/null +++ b/scripts/test-loinc-expand.js @@ -0,0 +1,508 @@ +#!/usr/bin/env node +/** + * Benchmark test suite for LOINC expandForValueSet + * Mirrors the RxNorm test harness (scripts/test-expand-for-valueset.js) + * Tests various LOINC filter/concept patterns against baseline + */ + +const http = require('http'); + +const PORT = 3000; +const BASE = `http://localhost:${PORT}`; +const SYSTEM = 'http://loinc.org'; + +// Test cases exercising different LOINC query patterns +const TEST_CASES = [ + // --- Filter: relationship property (COMPONENT = part code) --- + { + name: 'filter-component-bacteria', + description: 'COMPONENT = LP14082-9 (Bacteria, 27 codes)', + compose: { + include: [{ + system: SYSTEM, + filter: [{ property: 'COMPONENT', op: '=', value: 'LP14082-9' }] + }] + }, + params: { count: 10 } + }, + + // --- Filter: relationship property (CLASS = class part) --- + { + name: 'filter-class-chem', + description: 'CLASS = LP7786-9 (CHEM, 10707 codes)', + compose: { + include: [{ + system: SYSTEM, + filter: [{ property: 'CLASS', op: '=', value: 'LP7786-9' }] + }] + }, + params: { count: 10 } + }, + + // --- Filter: SCALE_TYP (large: 42k codes) --- + { + name: 'filter-scale-qn', + description: 'SCALE_TYP = LP7753-9 (Qn, 42085 codes)', + compose: { + include: [{ + system: SYSTEM, + filter: [{ property: 'SCALE_TYP', op: '=', value: 'LP7753-9' }] + }] + }, + params: { count: 10 } + }, + + // --- Filter: SYSTEM (medium: 13k codes) --- + { + name: 'filter-system-ser', + description: 'SYSTEM = LP7567-3 (Ser, 13584 codes)', + compose: { + include: [{ + system: SYSTEM, + filter: [{ property: 'SYSTEM', op: '=', value: 'LP7567-3' }] + }] + }, + params: { count: 10 } + }, + + // --- Concept list (explicit codes) --- + { + name: 'concept-5', + description: '5 explicit LOINC codes', + compose: { + include: [{ + system: SYSTEM, + concept: [ + { code: '2160-0' }, // Creatinine + { code: '2345-7' }, // Glucose + { code: '718-7' }, // Hemoglobin + { code: '4548-4' }, // HbA1c + { code: '2951-2' } // Sodium + ] + }] + }, + params: {} + }, + + // --- Exclude: component filter minus specific codes --- + { + name: 'exclude-concepts', + description: 'COMPONENT=LP14082-9 minus 2 specific codes', + compose: { + include: [{ + system: SYSTEM, + filter: [{ property: 'COMPONENT', op: '=', value: 'LP14082-9' }] + }], + exclude: [{ + system: SYSTEM, + concept: [ + { code: '100906-7' }, + { code: '11101-3' } + ] + }] + }, + params: { count: 10 } + }, + + // --- ActiveOnly: CLASS=CHEM with activeOnly --- + { + name: 'activeonly-class', + description: 'CLASS=LP7786-9 activeOnly=true', + compose: { + include: [{ + system: SYSTEM, + filter: [{ property: 'CLASS', op: '=', value: 'LP7786-9' }] + }] + }, + params: { count: 10, activeOnly: true } + }, + + // --- LIST filter (answer list) --- + { + name: 'filter-list-ll150', + description: 'LIST = LL150-4 (255 answers)', + compose: { + include: [{ + system: SYSTEM, + filter: [{ property: 'LIST', op: '=', value: 'LL150-4' }] + }] + }, + params: { count: 10 }, + drainCount: 300 + }, + + // --- Property filter (CLASSTYPE = 1 = Laboratory) --- + { + name: 'filter-classtype-lab', + description: 'CLASSTYPE = 1 (Laboratory, ~60k codes)', + compose: { + include: [{ + system: SYSTEM, + filter: [{ property: 'CLASSTYPE', op: '=', value: '1' }] + }] + }, + params: { count: 10 } + }, + + // --- Paged: offset into CLASS filter --- + { + name: 'paged-class-offset-100', + description: 'CLASS=LP7786-9 offset=100 count=10', + compose: { + include: [{ + system: SYSTEM, + filter: [{ property: 'CLASS', op: '=', value: 'LP7786-9' }] + }] + }, + params: { count: 10, offset: 100 } + }, + + // --- Multi-filter: COMPONENT + SCALE_TYP --- + { + name: 'multi-filter-component-scale', + description: 'COMPONENT=LP14082-9 AND SCALE_TYP=Qn', + compose: { + include: [{ + system: SYSTEM, + filter: [ + { property: 'COMPONENT', op: '=', value: 'LP14082-9' }, + { property: 'SCALE_TYP', op: '=', value: 'LP7753-9' } + ] + }] + }, + params: { count: 10 } + }, + + // --- STATUS filter --- + { + name: 'filter-status-active', + description: 'STATUS = ACTIVE', + compose: { + include: [{ + system: SYSTEM, + filter: [{ property: 'STATUS', op: '=', value: 'ACTIVE' }] + }] + }, + params: { count: 10 }, + drainCount: 200000 + }, + + // --- Text search + filter --- + { + name: 'text-glucose', + description: 'Text search "glucose" with count=10', + compose: { + include: [{ + system: SYSTEM, + filter: [{ property: 'SCALE_TYP', op: '=', value: 'LP7753-9' }] + }] + }, + params: { count: 10, filter: 'glucose' } + }, + + // --- Multi-include: two component filters --- + { + name: 'multi-include-2-components', + description: 'COMPONENT=LP14082-9 OR COMPONENT=LP33405-9', + compose: { + include: [ + { + system: SYSTEM, + filter: [{ property: 'COMPONENT', op: '=', value: 'LP14082-9' }] + }, + { + system: SYSTEM, + filter: [{ property: 'COMPONENT', op: '=', value: 'LP33405-9' }] + } + ] + }, + params: { count: 10 }, + drainCount: 1000 + }, +]; + +// --- HTTP helpers --- +function httpPost(url, body, timeout = 30000) { + return new Promise((resolve, reject) => { + const data = JSON.stringify(body); + const parsed = new URL(url); + const req = http.request({ + hostname: parsed.hostname, + port: parsed.port, + path: parsed.pathname + parsed.search, + method: 'POST', + headers: { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(data) }, + timeout + }, res => { + let chunks = []; + res.on('data', c => chunks.push(c)); + res.on('end', () => { + try { resolve(JSON.parse(Buffer.concat(chunks).toString())); } + catch (e) { reject(new Error('Bad JSON: ' + Buffer.concat(chunks).toString().substring(0, 200))); } + }); + }); + req.on('error', reject); + req.on('timeout', () => { req.destroy(); reject(new Error('timeout')); }); + req.write(data); + req.end(); + }); +} + +function httpGet(url) { + return new Promise((resolve, reject) => { + http.get(url, res => { + let d = ''; + res.on('data', c => d += c); + res.on('end', () => { try { resolve(JSON.parse(d)); } catch { resolve(d); } }); + }).on('error', reject); + }); +} + +async function waitForServer(maxWait = 60000) { + const start = Date.now(); + while (Date.now() - start < maxWait) { + try { + await httpGet(`${BASE}/r4/metadata`); + return true; + } catch { await new Promise(r => setTimeout(r, 500)); } + } + throw new Error('Server did not start'); +} + +function buildExpandRequest(tc, bypass = false) { + const vs = { + resourceType: 'ValueSet', + compose: tc.compose + }; + + const params = { + resourceType: 'Parameters', + parameter: [ + { name: 'valueSet', resource: vs } + ] + }; + + if (tc.params.count) params.parameter.push({ name: 'count', valueInteger: tc.params.count }); + if (tc.params.offset) params.parameter.push({ name: 'offset', valueInteger: tc.params.offset }); + if (tc.params.activeOnly) params.parameter.push({ name: 'activeOnly', valueBoolean: true }); + if (tc.params.filter) params.parameter.push({ name: 'filter', valueString: tc.params.filter }); + + return params; +} + +function extractCodes(response) { + if (!response?.expansion?.contains) return []; + return response.expansion.contains.map(c => ({ code: c.code, display: c.display?.substring(0, 50) })); +} + +function ts() { + return new Date().toISOString().substring(11, 19); +} + +async function setBypass(enabled) { + const method = enabled ? 'POST' : 'POST'; + const url = `${BASE}/debug/bypass-expand-for-valueset?bypass=${enabled}`; + return new Promise((resolve, reject) => { + const req = http.request(url, { method: 'POST' }, res => { + let d = ''; + res.on('data', c => d += c); + res.on('end', () => resolve(d)); + }); + req.on('error', reject); + req.end(); + }); +} + +async function runTest(tc) { + const baselineTimeout = 10000; + const result = { name: tc.name, optMs: 0, baseMs: 0, optCodes: [], baseCodes: [], error: null }; + + try { + // Optimized run (expandForValueSet enabled) + await setBypass(false); + const optReq = buildExpandRequest(tc); + const t0 = performance.now(); + const optResp = await httpPost(`${BASE}/r4/ValueSet/$expand`, optReq, 30000); + result.optMs = performance.now() - t0; + result.optCodes = extractCodes(optResp); + + if (optResp.issue) { + result.error = optResp.issue[0]?.diagnostics?.substring(0, 100); + return result; + } + + // Baseline run (expandForValueSet bypassed) + await setBypass(true); + const baseReq = buildExpandRequest(tc); + try { + const t1 = performance.now(); + const baseResp = await httpPost(`${BASE}/r4/ValueSet/$expand`, baseReq, baselineTimeout); + result.baseMs = performance.now() - t1; + result.baseCodes = extractCodes(baseResp); + } catch (e) { + if (e.message === 'timeout') { + result.baseMs = -1; + } else throw e; + } + + // Re-enable optimized path + await setBypass(false); + + // Drain comparison if requested + if (tc.drainCount && result.baseMs !== -1) { + console.log(`[${ts()}] Draining up to ${tc.drainCount} codes for full set comparison...`); + const drainParams = JSON.parse(JSON.stringify(tc)); + drainParams.params.count = tc.drainCount; + drainParams.params.offset = 0; + + await setBypass(false); + const dOptResp = await httpPost(`${BASE}/r4/ValueSet/$expand`, buildExpandRequest(drainParams), 120000); + const dOptCodes = new Set(extractCodes(dOptResp).map(c => c.code)); + + await setBypass(true); + const dBaseResp = await httpPost(`${BASE}/r4/ValueSet/$expand`, buildExpandRequest(drainParams), 120000); + const dBaseCodes = new Set(extractCodes(dBaseResp).map(c => c.code)); + + await setBypass(false); + + result.drainOpt = dOptCodes.size; + result.drainBase = dBaseCodes.size; + result.drainEqual = dOptCodes.size === dBaseCodes.size && [...dOptCodes].every(c => dBaseCodes.has(c)); + console.log(`[${ts()}] Drain: opt=${dOptCodes.size} base=${dBaseCodes.size} → ${result.drainEqual ? 'sets equal' : 'DIFFERENT'} (${dOptCodes.size} codes)`); + } + + } catch (e) { + result.error = e.message; + } + + return result; +} + +function compareResults(result) { + if (result.error) return `❌ ${result.error}`; + if (result.baseMs === -1) return `⏱️ baseline timeout (opt OK: ${result.optCodes.length} codes)`; + + const optCodes = result.optCodes.map(c => c.code); + const baseCodes = result.baseCodes.map(c => c.code); + + if (optCodes.length !== baseCodes.length) { + return `❌ count ${optCodes.length} vs ${baseCodes.length}`; + } + + // Exact match? + if (JSON.stringify(optCodes) === JSON.stringify(baseCodes)) return '✅ exact'; + + // Set equal? + const optSet = new Set(optCodes); + const baseSet = new Set(baseCodes); + if (optSet.size === baseSet.size && [...optSet].every(c => baseSet.has(c))) { + if (result.drainEqual !== undefined) { + return result.drainEqual + ? `✅ page order differs, sets equal (${result.drainOpt} codes)` + : `❌ page order differs, full sets DIFFERENT`; + } + return '✅ page order differs, sets equal'; + } + + // Page sets differ — check drain if available + if (result.drainEqual !== undefined) { + return result.drainEqual + ? `✅ page order differs, sets equal (${result.drainOpt} codes)` + : `❌ different codes (drain: ${result.drainOpt} vs ${result.drainBase})`; + } + + return '❌ different codes'; +} + +async function main() { + // Start server + const { spawn } = require('child_process'); + const fs = require('fs'); + + // Patch config — the librarySource is nested in tx module config + const configPath = 'data/config.json'; + const origConfig = fs.readFileSync(configPath, 'utf8'); + const config = JSON.parse(origConfig); + // Find the nested librarySource in the tx module + const origLib = config.modules?.tx?.librarySource || config.librarySource; + if (config.modules?.tx) { + config.modules.tx.librarySource = 'tx/tx.loinc-only.yml'; + } else { + config.librarySource = 'tx/tx.loinc-only.yml'; + } + fs.writeFileSync(configPath, JSON.stringify(config, null, 2)); + + console.log(`[${ts()}] Starting server with LOINC-only config...`); + + const server = spawn('node', ['server.js'], { + env: { ...process.env, PORT: PORT.toString() }, + stdio: ['ignore', 'pipe', 'pipe'] + }); + + let serverOutput = ''; + server.stdout.on('data', d => serverOutput += d.toString()); + server.stderr.on('data', d => serverOutput += d.toString()); + + try { + await waitForServer(120000); + console.log(`[${ts()}] Server ready`); + + const results = []; + for (const tc of TEST_CASES) { + console.log(`[${ts()}] Running: ${tc.name} — ${tc.description}`); + const r = await runTest(tc); + results.push(r); + + const speedup = r.baseMs === -1 ? '∞' : r.baseMs <= 0 ? 'N/A' : (r.baseMs / r.optMs).toFixed(1) + 'x'; + const comparison = compareResults(r); + console.log(`[${ts()}] Optimized: ${r.optMs.toFixed(1)}ms | Baseline: ${r.baseMs === -1 ? 'TIMEOUT' : r.baseMs.toFixed(1) + 'ms'} | Speedup: ${speedup}`); + + // Show first few codes side by side if different + if (r.optCodes.length > 0 && r.baseCodes.length > 0) { + const match = JSON.stringify(r.optCodes.map(c=>c.code)) === JSON.stringify(r.baseCodes.map(c=>c.code)); + if (!match) { + for (let i = 0; i < Math.min(5, r.optCodes.length); i++) { + console.log(`[${ts()}] [${i}] opt: ${r.optCodes[i]?.code}/${r.optCodes[i]?.display} | base: ${r.baseCodes[i]?.code}/${r.baseCodes[i]?.display}`); + } + } + } + } + + // Summary table + console.log(''); + console.log(`=== LOINC expandForValueSet test results ===`); + console.log(`Date: ${new Date().toISOString()}`); + console.log(`Tests: ${results.length}`); + console.log(''); + console.log('Test | New (ms) | Old (ms) | Speedup | Codes | Result'); + console.log('------------------------------|----------|----------|---------|-------|-------'); + + for (const r of results) { + const name = r.name.padEnd(30); + const optMs = r.optMs.toFixed(1).padStart(8); + const baseMs = r.baseMs === -1 ? ' TIMEOUT' : r.baseMs.toFixed(1).padStart(8); + const speedup = r.baseMs === -1 ? ' ∞' : r.baseMs <= 0 ? ' N/A' : (r.baseMs / r.optMs).toFixed(1).padStart(5) + 'x'; + const codes = String(r.optCodes.length).padStart(5); + const comparison = compareResults(r); + console.log(`${name}|${optMs} |${baseMs} | ${speedup.padStart(7)} | ${codes} | ${comparison}`); + } + + // Write results + const outPath = 'test-loinc-expand-results.txt'; + const lines = results.map(r => { + const speedup = r.baseMs === -1 ? 'Inf' : (r.baseMs / r.optMs).toFixed(1); + return `${r.name}\t${r.optMs.toFixed(1)}\t${r.baseMs === -1 ? 'TIMEOUT' : r.baseMs.toFixed(1)}\t${speedup}\t${r.optCodes.length}\t${compareResults(r)}`; + }); + fs.writeFileSync(outPath, lines.join('\n')); + console.log(`\n[${ts()}] Results written to ${outPath}`); + + } finally { + // Restore config + fs.writeFileSync(configPath, origConfig); + + server.kill(); + console.log(`[${ts()}] Server stopped, config restored`); + } +} + +main().catch(e => { console.error(e); process.exit(1); }); diff --git a/scripts/test-snomed-expand.js b/scripts/test-snomed-expand.js new file mode 100644 index 0000000..2b430a7 --- /dev/null +++ b/scripts/test-snomed-expand.js @@ -0,0 +1,543 @@ +#!/usr/bin/env node +'use strict'; + +/** + * 3-way SNOMED CT expansion benchmark: + * 1. SQLite v0 + expandForValueSet (optimized) + * 2. SQLite v0 with expandForValueSet bypassed (v0 baseline) + * 3. Legacy in-memory binary provider (upstream baseline) + * + * Usage: node scripts/test-snomed-expand.js [--full] + */ + +const http = require('http'); +const fs = require('fs'); +const { spawn } = require('child_process'); +const path = require('path'); + +const PORT = 3000; +const BASE_URL = `http://localhost:${PORT}/r4`; +const SERVER_START_TIMEOUT = 300000; +const REQUEST_TIMEOUT = 60000; +const V0_CONFIG = 'tx/tx.snomed-v0-only.yml'; +const LEGACY_CONFIG = 'tx/tx.snomed-legacy-only.yml'; +const CONFIG_PATH = path.join(__dirname, '..', 'data', 'config.json'); + +// --- Config patching --- +let origLibrarySource; + +function patchConfig(librarySource) { + const config = JSON.parse(fs.readFileSync(CONFIG_PATH, 'utf8')); + if (!origLibrarySource) origLibrarySource = config.modules.tx.librarySource; + config.modules.tx.librarySource = librarySource; + fs.writeFileSync(CONFIG_PATH, JSON.stringify(config, null, 2)); +} + +function restoreConfig() { + if (!origLibrarySource) return; + const config = JSON.parse(fs.readFileSync(CONFIG_PATH, 'utf8')); + config.modules.tx.librarySource = origLibrarySource; + fs.writeFileSync(CONFIG_PATH, JSON.stringify(config, null, 2)); +} + +// --- Test cases --- +function makeVS(compose) { + return { + resourceType: 'Parameters', + parameter: [ + { name: 'valueSet', resource: { resourceType: 'ValueSet', compose } }, + ...(compose._params || []), + ], + }; +} + +const SCT = 'http://snomed.info/sct'; + +const TESTS = [ + { + name: 'is-a-diabetes', + desc: 'is-a 73211009 (Diabetes mellitus)', + body: makeVS({ + include: [{ system: SCT, filter: [{ property: 'concept', op: 'is-a', value: '73211009' }] }], + _params: [{ name: 'count', valueInteger: 200 }], + }), + }, + { + name: 'concept-3', + desc: '3 explicit SNOMED codes', + body: makeVS({ + include: [{ system: SCT, + concept: [{ code: '73211009' }, { code: '44054006' }, { code: '46635009' }] + }], + }), + }, + { + name: 'descendent-of-diabetes', + desc: 'descendent-of 73211009 (excludes self)', + body: makeVS({ + include: [{ system: SCT, filter: [{ property: 'concept', op: 'descendent-of', value: '73211009' }] }], + _params: [{ name: 'count', valueInteger: 200 }], + }), + }, + { + name: 'is-a-clinical-finding-100', + desc: 'is-a 404684003 (Clinical finding), count=100', + body: makeVS({ + include: [{ system: SCT, filter: [{ property: 'concept', op: 'is-a', value: '404684003' }] }], + _params: [{ name: 'count', valueInteger: 100 }], + }), + }, + { + name: 'exclude-concept', + desc: 'is-a 73211009, exclude 44054006', + body: makeVS({ + include: [{ system: SCT, filter: [{ property: 'concept', op: 'is-a', value: '73211009' }] }], + exclude: [{ system: SCT, concept: [{ code: '44054006' }] }], + _params: [{ name: 'count', valueInteger: 200 }], + }), + }, +]; + +const EXTENDED_TESTS = [ + { + name: 'is-a-clinical-paged', + desc: 'is-a 404684003, offset=5000, count=100', + body: makeVS({ + include: [{ system: SCT, filter: [{ property: 'concept', op: 'is-a', value: '404684003' }] }], + _params: [{ name: 'count', valueInteger: 100 }, { name: 'offset', valueInteger: 5000 }], + }), + }, + { + name: 'is-a-procedure-100', + desc: 'is-a 71388002 (Procedure), count=100', + body: makeVS({ + include: [{ system: SCT, filter: [{ property: 'concept', op: 'is-a', value: '71388002' }] }], + _params: [{ name: 'count', valueInteger: 100 }], + }), + }, + { + name: 'is-a-body-structure', + desc: 'is-a 123037004 (Body structure), count=100', + body: makeVS({ + include: [{ system: SCT, filter: [{ property: 'concept', op: 'is-a', value: '123037004' }] }], + _params: [{ name: 'count', valueInteger: 100 }], + }), + }, + { + name: 'refset-laterality', + desc: 'concept in 723264001 (laterality refset), count=100', + body: makeVS({ + include: [{ system: SCT, filter: [{ property: 'concept', op: 'in', value: '723264001' }] }], + _params: [{ name: 'count', valueInteger: 100 }], + }), + }, + { + name: 'exclude-is-a-filter', + desc: 'is-a 73211009, exclude is-a 44054006 (Type II subtree)', + body: makeVS({ + include: [{ system: SCT, filter: [{ property: 'concept', op: 'is-a', value: '73211009' }] }], + exclude: [{ system: SCT, filter: [{ property: 'concept', op: 'is-a', value: '44054006' }] }], + _params: [{ name: 'count', valueInteger: 200 }], + }), + }, + { + name: 'multi-include', + desc: 'is-a Diabetes + is-a Hypertension, count=100', + drainCount: 1500, + body: makeVS({ + include: [ + { system: SCT, filter: [{ property: 'concept', op: 'is-a', value: '73211009' }] }, + { system: SCT, filter: [{ property: 'concept', op: 'is-a', value: '38341003' }] }, + ], + _params: [{ name: 'count', valueInteger: 100 }], + }), + }, + { + name: 'activeonly', + desc: 'is-a 73211009, activeOnly', + body: makeVS({ + include: [{ system: SCT, filter: [{ property: 'concept', op: 'is-a', value: '73211009' }] }], + _params: [{ name: 'count', valueInteger: 200 }, { name: 'activeOnly', valueBoolean: true }], + }), + }, + { + name: 'concept-equals', + desc: 'concept = 73211009 (single code via filter)', + body: makeVS({ + include: [{ system: SCT, filter: [{ property: 'concept', op: '=', value: '73211009' }] }], + }), + }, + { + name: 'is-a-clinical-deep-paged', + desc: 'is-a 404684003, offset=50000, count=100', + body: makeVS({ + include: [{ system: SCT, filter: [{ property: 'concept', op: 'is-a', value: '404684003' }] }], + _params: [{ name: 'count', valueInteger: 100 }, { name: 'offset', valueInteger: 50000 }], + }), + }, +]; + +// --- HTTP helpers --- +function postJson(url, body, timeoutMs = REQUEST_TIMEOUT) { + return new Promise((resolve, reject) => { + const data = JSON.stringify(body); + const u = new URL(url); + const req = http.request({ + hostname: u.hostname, port: u.port, path: u.pathname, + method: 'POST', + headers: { 'Content-Type': 'application/fhir+json', 'Content-Length': Buffer.byteLength(data) }, + timeout: timeoutMs, + }, (res) => { + let buf = ''; + res.on('data', c => buf += c); + res.on('end', () => resolve({ status: res.statusCode, body: buf })); + }); + req.on('timeout', () => { req.destroy(); reject(new Error('Request timed out')); }); + req.on('error', reject); + req.write(data); + req.end(); + }); +} + +function httpPost(url) { + return new Promise((resolve, reject) => { + const u = new URL(url); + const req = http.request({ + hostname: u.hostname, port: u.port, path: u.pathname + (u.search || ''), + method: 'POST', headers: { 'Content-Length': 0 }, + }, (res) => { + let buf = ''; + res.on('data', c => buf += c); + res.on('end', () => resolve({ status: res.statusCode, body: buf })); + }); + req.on('error', reject); + req.end(); + }); +} + +function httpGet(url) { + return new Promise((resolve, reject) => { + http.get(url, (res) => { + let buf = ''; + res.on('data', c => buf += c); + res.on('end', () => resolve({ status: res.statusCode, body: buf })); + }).on('error', reject); + }); +} + +async function waitForServer(url, timeout) { + const start = Date.now(); + while (Date.now() - start < timeout) { + try { + const res = await httpGet(url); + if (res.status === 200) return true; + } catch (e) { /* not ready */ } + await new Promise(r => setTimeout(r, 2000)); + } + throw new Error('Server did not start within timeout'); +} + +function extractCodes(responseBody) { + try { + const json = JSON.parse(responseBody); + if (!json.expansion || !json.expansion.contains) return []; + return json.expansion.contains.map(c => ({ + code: c.code, + display: c.display, + system: c.system, + inactive: c.inactive || false, + })); + } catch (e) { + return null; + } +} + +function codesEqual(a, b) { + if (a === null || b === null) return { match: false, reason: 'null' }; + if (a.length !== b.length) return { match: false, reason: `count ${a.length} vs ${b.length}` }; + let exact = true; + for (let i = 0; i < a.length; i++) { + if (a[i].code !== b[i].code) { exact = false; break; } + } + if (exact) return { match: true, reason: 'exact' }; + const setA = new Set(a.map(c => c.code)); + const setB = new Set(b.map(c => c.code)); + const sameSet = setA.size === setB.size && [...setA].every(c => setB.has(c)); + if (sameSet) return { match: true, reason: 'order differs' }; + return { match: false, reason: 'different codes' }; +} + +// --- Main --- +function log(msg) { + console.log(`[${new Date().toISOString().slice(11,19)}] ${msg}`); +} + +async function startServer(serverDir) { + const server = spawn('node', ['server.js'], { + cwd: serverDir, + stdio: ['ignore', 'pipe', 'pipe'], + env: { ...process.env, NODE_ENV: 'test' }, + }); + server.stdout.on('data', () => {}); + server.stderr.on('data', () => {}); + await waitForServer(`http://localhost:${PORT}/r4/metadata`, SERVER_START_TIMEOUT); + return server; +} + +async function stopServer(server) { + if (!server) return; + server.kill('SIGTERM'); + await new Promise(r => setTimeout(r, 2000)); + if (!server.killed) server.kill('SIGKILL'); + await new Promise(r => setTimeout(r, 500)); +} + +function prepBody(test) { + const body = JSON.parse(JSON.stringify(test.body)); + if (body.parameter[0].resource.compose._params) { + body.parameter.push(...body.parameter[0].resource.compose._params); + delete body.parameter[0].resource.compose._params; + } + return body; +} + +async function runMode(body, label, iters) { + const times = []; + let codes, status; + let timedOut = false; + for (let i = 0; i < iters; i++) { + if (timedOut) break; + const t0 = performance.now(); + try { + const res = await postJson(BASE_URL + '/ValueSet/$expand', body); + const elapsed = performance.now() - t0; + times.push(elapsed); + log(` ${label} iter ${i+1}: ${elapsed.toFixed(0)}ms, HTTP ${res.status}`); + if (i === 0) { codes = extractCodes(res.body); status = res.status; } + } catch (e) { + const elapsed = performance.now() - t0; + if (e.message === 'Request timed out') { + times.push(elapsed); + log(` ${label} iter ${i+1}: TIMEOUT after ${elapsed.toFixed(0)}ms`); + timedOut = true; + if (i === 0) { codes = null; status = 'TIMEOUT'; } + } else throw e; + } + } + times.sort((a, b) => a - b); + return { med: times[Math.floor(times.length / 2)], codes, status, timedOut }; +} + +async function main() { + const full = process.argv.includes('--full'); + const testList = full ? [...TESTS, ...EXTENDED_TESTS] : TESTS; + const serverDir = path.resolve(__dirname, '..'); + const ITERS = 2; + + log(`Running ${testList.length} SNOMED tests (${full ? 'full' : 'core'} mode)`); + log('3-way comparison: v0+expandForValueSet vs v0-baseline vs legacy in-memory\n'); + + const results = []; + let server; + + try { + // === Phase 1: SQLite v0 (optimized + bypassed) === + log('=== Phase 1: SQLite v0 provider ==='); + patchConfig(V0_CONFIG); + server = await startServer(serverDir); + log('v0 server ready.\n'); + await httpPost(`http://localhost:${PORT}/debug/perf-counters/enable`); + + for (let ti = 0; ti < testList.length; ti++) { + const test = testList[ti]; + const body = prepBody(test); + log(`[${ti+1}/${testList.length}] ${test.name}: ${test.desc}`); + + // v0 optimized + await httpPost(`http://localhost:${PORT}/debug/bypass-expand-for-valueset?bypass=false`); + const opt = await runMode(body, 'v0-opt', ITERS); + + // v0 bypassed + await httpPost(`http://localhost:${PORT}/debug/bypass-expand-for-valueset?bypass=true`); + const v0base = await runMode(body, 'v0-base', ITERS); + + results.push({ name: test.name, test, body, opt, v0base, legacy: null }); + log(''); + } + + await stopServer(server); + server = null; + + // === Phase 2: Legacy in-memory provider === + log('\n=== Phase 2: Legacy in-memory provider ==='); + patchConfig(LEGACY_CONFIG); + server = await startServer(serverDir); + log('Legacy server ready.\n'); + + for (let ti = 0; ti < testList.length; ti++) { + const r = results[ti]; + const body = r.body; + log(`[${ti+1}/${testList.length}] ${r.name}`); + + const legacy = await runMode(body, 'legacy', ITERS); + r.legacy = legacy; + log(''); + } + + await stopServer(server); + server = null; + + // === Phase 3: Drain verification for mismatched cases === + // For tests where page order differs, drain ALL codes and compare sets + const needsDrain = results.filter(r => { + if (!r.opt.codes || !r.legacy.codes) return false; + const cmp = codesEqual(r.opt.codes, r.legacy.codes); + return !cmp.match; + }); + + if (needsDrain.length > 0) { + log(`\n=== Phase 3: Drain verification (${needsDrain.length} tests) ===`); + + // Drain from v0 optimized + patchConfig(V0_CONFIG); + server = await startServer(serverDir); + log('v0 server ready for drain.\n'); + await httpPost(`http://localhost:${PORT}/debug/bypass-expand-for-valueset?bypass=false`); + + for (const r of needsDrain) { + const drainCount = r.test.drainCount || 200000; + log(` Draining ${r.name} (up to ${drainCount})...`); + const drainBody = JSON.parse(JSON.stringify(r.body)); + drainBody.parameter = drainBody.parameter.filter(p => + p.name !== 'count' && p.name !== 'offset'); + drainBody.parameter.push({ name: 'count', valueInteger: drainCount }); + try { + const res = await postJson(BASE_URL + '/ValueSet/$expand', drainBody, 120000); + r.drainOptCodes = extractCodes(res.body) || []; + log(` v0-opt: ${r.drainOptCodes.length} codes (HTTP ${res.status})`); + } catch (e) { + log(` v0-opt: ERROR ${e.message}`); + r.drainOptCodes = null; + } + } + + await stopServer(server); + server = null; + + // Drain from legacy + patchConfig(LEGACY_CONFIG); + server = await startServer(serverDir); + log('Legacy server ready for drain.\n'); + + for (const r of needsDrain) { + const drainCount = r.test.drainCount || 200000; + log(` Draining ${r.name} (up to ${drainCount})...`); + const drainBody = JSON.parse(JSON.stringify(r.body)); + drainBody.parameter = drainBody.parameter.filter(p => + p.name !== 'count' && p.name !== 'offset'); + drainBody.parameter.push({ name: 'count', valueInteger: drainCount }); + try { + const res = await postJson(BASE_URL + '/ValueSet/$expand', drainBody, 120000); + r.drainLegacyCodes = extractCodes(res.body) || []; + log(` legacy: ${r.drainLegacyCodes.length} codes (HTTP ${res.status})`); + } catch (e) { + log(` legacy: ERROR ${e.message}`); + r.drainLegacyCodes = null; + } + } + + await stopServer(server); + server = null; + + // Compare drained sets + for (const r of needsDrain) { + if (!r.drainOptCodes || !r.drainLegacyCodes) { + r.drainResult = 'drain error'; + continue; + } + const optSet = new Set(r.drainOptCodes.map(c => c.code)); + const legSet = new Set(r.drainLegacyCodes.map(c => c.code)); + const onlyOpt = [...optSet].filter(c => !legSet.has(c)); + const onlyLeg = [...legSet].filter(c => !optSet.has(c)); + if (onlyOpt.length === 0 && onlyLeg.length === 0) { + r.drainResult = `sets equal (${optSet.size} codes)`; + log(` ${r.name}: ✅ ${r.drainResult}`); + } else { + r.drainResult = `sets differ (v0-only: ${onlyOpt.length}, legacy-only: ${onlyLeg.length})`; + log(` ${r.name}: ❌ ${r.drainResult}`); + if (onlyOpt.length > 0) log(` v0-only sample: ${onlyOpt.slice(0,5).join(', ')}`); + if (onlyLeg.length > 0) log(` legacy-only sample: ${onlyLeg.slice(0,5).join(', ')}`); + } + } + } + + // === Summary === + const lines = []; + lines.push('=== SNOMED CT 3-way expansion benchmark ==='); + lines.push(`Date: ${new Date().toISOString()}`); + lines.push(`Tests: ${testList.length} (${full ? 'full' : 'core'})`); + lines.push(''); + lines.push('Modes: v0-opt = SQLite v0 + expandForValueSet'); + lines.push(' v0-base = SQLite v0, expandForValueSet bypassed'); + lines.push(' legacy = upstream in-memory binary provider'); + lines.push(''); + lines.push('Test | v0-opt | v0-base | legacy | v0-opt vs legacy | Codes | Match'); + lines.push('------------------------------|----------|----------|----------|------------------|-------|------'); + + for (const r of results) { + const fmtMs = (v) => v.timedOut ? ' TIMEOUT' : `${v.med.toFixed(1)}`.padStart(8); + const optMs = fmtMs(r.opt); + const v0bMs = fmtMs(r.v0base); + const legMs = fmtMs(r.legacy); + + let vsLegacy = ''; + if (!r.opt.timedOut && !r.legacy.timedOut) { + const ratio = r.legacy.med / r.opt.med; + vsLegacy = ratio >= 1 + ? `${ratio.toFixed(1)}x faster`.padStart(16) + : `${(1/ratio).toFixed(1)}x slower`.padStart(16); + } else { + vsLegacy = ' N/A'; + } + + // Compare opt codes vs legacy codes + const cmp = (!r.opt.codes || !r.legacy.codes) + ? { match: null, reason: 'N/A' } + : codesEqual(r.opt.codes, r.legacy.codes); + + let matchLabel; + if (cmp.match === true) { + matchLabel = `✅ ${cmp.reason}`; + } else if (r.drainResult && r.drainResult.startsWith('sets equal')) { + matchLabel = `✅ order differs, ${r.drainResult}`; + } else if (r.drainResult) { + matchLabel = `❌ ${r.drainResult}`; + } else if (cmp.match === false) { + matchLabel = `❌ ${cmp.reason}`; + } else { + matchLabel = `⚠️ ${cmp.reason}`; + } + const codeCount = r.opt.codes?.length ?? '?'; + + lines.push( + `${r.name.padEnd(30)}| ${optMs} | ${v0bMs} | ${legMs} | ${vsLegacy} | ${String(codeCount).padStart(5)} | ${matchLabel}` + ); + } + + console.log('\n' + lines.join('\n')); + + const outPath = path.join(serverDir, 'test-snomed-expand-results.txt'); + fs.writeFileSync(outPath, lines.join('\n') + '\n'); + log(`\nResults written to ${outPath}`); + + } finally { + restoreConfig(); + await stopServer(server); + } +} + +main().catch(err => { + console.error(err); + restoreConfig(); + process.exit(1); +}); diff --git a/server.js b/server.js index 14cb176..180c894 100644 --- a/server.js +++ b/server.js @@ -502,6 +502,25 @@ app.get('/health', async (req, res) => { res.json(healthStatus); }); +// Debug/diagnostics endpoints — counters are no-op by default (must POST /enable first). +// Safe to expose: no sensitive data, useful for production perf diagnosis. +app.get('/debug/perf-counters', (req, res) => { + const perfCounters = require('./tx/perf-counters'); + res.json(perfCounters.snapshot()); +}); + +app.post('/debug/perf-counters/reset', (req, res) => { + const perfCounters = require('./tx/perf-counters'); + perfCounters.reset(); + res.json({ ok: true }); +}); + +app.post('/debug/perf-counters/enable', (req, res) => { + const perfCounters = require('./tx/perf-counters'); + perfCounters.enable(); + res.json({ ok: true }); +}); + /** * Get log directory statistics: file count, total size, and disk space info * @returns {string} HTML table row(s) with log stats diff --git a/tx/cs/cs-api.js b/tx/cs/cs-api.js index 2448bad..833f663 100644 --- a/tx/cs/cs-api.js +++ b/tx/cs/cs-api.js @@ -506,8 +506,12 @@ class CodeSystemProvider { return false; } + /** + * @return true if the cs provider handles offset/count (paging) within its filter pipeline. + * If true, offset and count are passed to getPrepContext. + */ handlesOffset() { - + return false; } /** * gets a single context in which filters will be evaluated. The server doesn't doesn't make use of this context; @@ -597,6 +601,16 @@ class CodeSystemProvider { */ async filterExcludeConcepts(filterContext, code) { throw new Error("Must override"); } // well, only if any filters are actually supported + /** + * Inform the CS provider about explicitly included concept codes from the value set compose. + * This allows the provider to include them in the same SQL query as filters, enabling + * correct offset/count handling and batch designation pre-fetch. + * + * @param {FilterExecutionContext} filterContext filtering context + * @param {String[]} codes list of codes to include + */ + async includeConcepts(filterContext, codes) { /* no-op by default */ } + /** * called once all the filters have been handled, and iteration is about to happen. * this function returns one more filters. If there were multiple filters, but only diff --git a/tx/cs/cs-sqlite-expression-adapter.js b/tx/cs/cs-sqlite-expression-adapter.js new file mode 100644 index 0000000..98ced25 --- /dev/null +++ b/tx/cs/cs-sqlite-expression-adapter.js @@ -0,0 +1,365 @@ +'use strict'; + +/** + * Thin adapter that implements the binary SNOMED structures interface + * backed by a v0 SQLite database, enabling SnomedExpressionParser and + * SnomedExpressionServices to work without loading the .cache file. + * + * Uses concept_id (integer PK) as the "reference" in place of the + * byte-offset indexes used by the binary format. + */ + +const { + SnomedExpressionParser, + SnomedExpressionServices, + SnomedExpressionContext, + SnomedExpression, + SnomedConcept, + NO_REFERENCE, + SnomedServicesRenderOption +} = require('../sct/expressions'); + +// ── Concepts adapter ──────────────────────────────────────────────── +class SqliteConceptsAdapter { + constructor(syncDb, csId) { + this.db = syncDb; + this.csId = csId; + this._stmts = {}; + } + + _stmt(key, sql) { + if (!this._stmts[key]) this._stmts[key] = this.db.prepare(sql); + return this._stmts[key]; + } + + /** Find concept by SCTID (string or BigInt). Returns { found, index: concept_id } */ + findConcept(identity) { + const code = typeof identity === 'bigint' ? identity.toString() : String(identity); + const row = this._stmt('find', `SELECT concept_id FROM concept WHERE cs_id = ? AND code = ?`).get(this.csId, code); + return row ? { found: true, index: row.concept_id } : { found: false, index: 0 }; + } + + /** Get concept object by concept_id (our "reference"). */ + getConcept(conceptId) { + const row = this._stmt('get', `SELECT concept_id, code, display, active FROM concept WHERE concept_id = ?`).get(conceptId); + if (!row) throw new Error(`Concept reference ${conceptId} not found`); + return { + identity: BigInt(row.code), + // flags bit 0 = primitive (assume primitive for all — expression services only + // use this for normalisation which we don't need for basic validation/rendering) + flags: 1, + outbounds: conceptId, // pass-through; refs adapter intercepts + inbounds: conceptId, + parents: conceptId, + descriptions: conceptId + }; + } + + /** Get closure descendants ref — returns conceptId as the key for RefsAdapter. */ + getAllDesc(conceptId) { + return -conceptId; // negative sentinel so RefsAdapter knows to query closure + } + + /** Normal form not available from SQLite — expression services can skip. */ + getNormalForm(_conceptId) { + return 0; + } + + count() { + const row = this._stmt('count', `SELECT COUNT(*) as cnt FROM concept WHERE cs_id = ?`).get(this.csId); + return row ? row.cnt : 0; + } +} + +// ── Relationships adapter ─────────────────────────────────────────── +class SqliteRelationshipsAdapter { + constructor(syncDb, csId) { + this.db = syncDb; + this.csId = csId; + this._stmts = {}; + } + + _stmt(key, sql) { + if (!this._stmts[key]) this._stmts[key] = this.db.prepare(sql); + return this._stmts[key]; + } + + /** Get relationship by edge_id. */ + getRelationship(edgeId) { + const row = this._stmt('get', ` + SELECT cl.source_concept_id, cl.target_concept_id, cl.group_id, cl.active, + pd.property_code, + (SELECT concept_id FROM concept WHERE cs_id = ? AND code = pd.property_code) as rel_type_concept_id + FROM concept_link cl + JOIN property_def pd ON cl.property_id = pd.property_id + WHERE cl.edge_id = ? + `).get(this.csId, edgeId); + if (!row) throw new Error(`Relationship ${edgeId} not found`); + return { + source: row.source_concept_id, + target: row.target_concept_id, + relType: row.rel_type_concept_id, + group: row.group_id, + active: row.active === 1, + defining: true // v0 schema only stores defining relationships + }; + } +} + +// ── References adapter ────────────────────────────────────────────── +class SqliteRefsAdapter { + constructor(syncDb, csId, isAConceptId) { + this.db = syncDb; + this.csId = csId; + this.isAConceptId = isAConceptId; + this._stmts = {}; + } + + _stmt(key, sql) { + if (!this._stmts[key]) this._stmts[key] = this.db.prepare(sql); + return this._stmts[key]; + } + + /** + * getReferences is called with different "index" values: + * - concept.outbounds (conceptId) → return edge_ids for outbound relationships + * - concept.inbounds (conceptId) → return edge_ids for inbound relationships + * - concept.parents (conceptId) → return parent concept_ids + * - concept.descriptions (conceptId) → return designation pseudo-indexes + * - getAllDesc result (negative conceptId) → return descendant concept_ids from closure + * + * Since we use the same conceptId for all, we need the calling context. + * The expression services call patterns are predictable enough that we + * handle this via a context stack. + */ + getReferences(index) { + // Negative sentinel = closure descendants query + if (index < 0) { + const ancestorId = -index; + const rows = this._stmt('closure', `SELECT descendant_id FROM closure WHERE ancestor_id = ?`).all(ancestorId); + return rows.map(r => r.descendant_id); + } + + // For positive indexes, we need to figure out what's being asked. + // The expression services code always calls in this pattern: + // concept.outbounds → getReferences → iterate → getRelationship + // concept.parents → getReferences → iterate (concept_ids) + // concept.descriptions → getReferences → getDescription + // concept.inbounds → getReferences → iterate → getRelationship + // Since we set all to conceptId, we use the _contextHint set by callers. + const conceptId = index; + switch (this._contextHint) { + case 'outbounds': + return this._getOutboundEdgeIds(conceptId); + case 'inbounds': + return this._getInboundEdgeIds(conceptId); + case 'parents': + return this._getParentConceptIds(conceptId); + case 'descriptions': + return this._getDescriptionIndexes(conceptId); + default: + // Default: try outbounds (most common usage in expression services) + return this._getOutboundEdgeIds(conceptId); + } + } + + _getOutboundEdgeIds(conceptId) { + const rows = this._stmt('outEdges', ` + SELECT edge_id FROM concept_link WHERE source_concept_id = ? AND active = 1 + `).all(conceptId); + return rows.map(r => r.edge_id); + } + + _getInboundEdgeIds(conceptId) { + const rows = this._stmt('inEdges', ` + SELECT edge_id FROM concept_link WHERE target_concept_id = ? AND active = 1 + `).all(conceptId); + return rows.map(r => r.edge_id); + } + + _getParentConceptIds(conceptId) { + const isAPropId = this._getIsAPropId(); + if (!isAPropId) return []; + const rows = this._stmt('parents', ` + SELECT target_concept_id FROM concept_link + WHERE source_concept_id = ? AND property_id = ? AND active = 1 + `).all(conceptId, isAPropId); + return rows.map(r => r.target_concept_id); + } + + _getDescriptionIndexes(conceptId) { + const rows = this._stmt('desigIds', ` + SELECT designation_id FROM designation WHERE concept_id = ? AND active = 1 + ORDER BY preferred DESC + `).all(conceptId); + return rows.map(r => r.designation_id); + } + + _getIsAPropId() { + if (this._isAPropId !== undefined) return this._isAPropId; + const row = this._stmt('isAProp', ` + SELECT property_id FROM property_def WHERE cs_id = ? AND is_hierarchy = 1 + `).get(this.csId); + this._isAPropId = row ? row.property_id : null; + return this._isAPropId; + } +} + +// ── Descriptions adapter ──────────────────────────────────────────── +class SqliteDescriptionsAdapter { + constructor(syncDb) { + this.db = syncDb; + this._stmts = {}; + } + + _stmt(key, sql) { + if (!this._stmts[key]) this._stmts[key] = this.db.prepare(sql); + return this._stmts[key]; + } + + /** Get designation by designation_id. */ + getDescription(designationId) { + const row = this._stmt('get', ` + SELECT designation_id, active, language_code, term, preferred FROM designation WHERE designation_id = ? + `).get(designationId); + if (!row) return { active: false, lang: 0, iDesc: 0 }; + return { + active: row.active === 1, + lang: row.language_code === 'en' ? 1 : 0, + iDesc: designationId, // pass-through for strings adapter + _term: row.term, // direct access shortcut + _preferred: row.preferred + }; + } + + count() { + const row = this.db.prepare(`SELECT COUNT(*) as cnt FROM designation`).get(); + return row ? row.cnt : 0; + } +} + +// ── Strings adapter ───────────────────────────────────────────────── +class SqliteStringsAdapter { + constructor(syncDb) { + this.db = syncDb; + this._stmts = {}; + } + + _stmt(key, sql) { + if (!this._stmts[key]) this._stmts[key] = this.db.prepare(sql); + return this._stmts[key]; + } + + /** Get term text by designation_id (since iDesc = designation_id). */ + getEntry(designationId) { + if (!designationId) return ''; + const row = this._stmt('get', `SELECT term FROM designation WHERE designation_id = ?`).get(designationId); + return row ? row.term : ''; + } + + get length() { + return 1; // Non-zero to pass existence checks + } +} + +// ── Patched ExpressionServices that uses context hints ─────────────── + +/** + * Subclass of SnomedExpressionServices that sets _contextHint on the refs + * adapter before calling methods that read concept.outbounds/parents/etc. + */ +class SqliteExpressionServices extends SnomedExpressionServices { + constructor(structures, isAConceptId) { + super(structures, isAConceptId); + } + + getDefiningRelationships(conceptIndex) { + this.refs._contextHint = 'outbounds'; + try { return super.getDefiningRelationships(conceptIndex); } + finally { this.refs._contextHint = null; } + } + + getConceptParents(reference) { + this.refs._contextHint = 'parents'; + try { return super.getConceptParents(reference); } + finally { this.refs._contextHint = null; } + } + + getConceptChildren(reference) { + this.refs._contextHint = 'inbounds'; + try { return super.getConceptChildren(reference); } + finally { this.refs._contextHint = null; } + } + + listDisplayNames(conceptIndex, languageFilter = 0) { + this.refs._contextHint = 'descriptions'; + try { return super.listDisplayNames(conceptIndex, languageFilter); } + finally { this.refs._contextHint = null; } + } + + /** Override subsumes to use closure table directly. */ + subsumes(a, b) { + if (a === b) return true; + const closureRef = this.concepts.getAllDesc(a); + const descendants = this.refs.getReferences(closureRef); + return descendants.includes(b); + } +} + +// ── Factory function ──────────────────────────────────────────────── + +/** + * Create expression services backed by a v0 SQLite database. + * @param {object} syncDb - better-sqlite3 database connection + * @param {number} csId - code_system cs_id in the v0 database + * @returns {{ expressionServices, parser, ExpressionContext }} + */ +function createSqliteExpressionServices(syncDb, csId) { + // Find the is-a property concept_id + const isAPropRow = syncDb.prepare( + `SELECT property_code FROM property_def WHERE cs_id = ? AND is_hierarchy = 1` + ).get(csId); + + let isAConceptId = NO_REFERENCE; + if (isAPropRow) { + const conceptRow = syncDb.prepare( + `SELECT concept_id FROM concept WHERE cs_id = ? AND code = ?` + ).get(csId, isAPropRow.property_code); + if (conceptRow) isAConceptId = conceptRow.concept_id; + } + + const concepts = new SqliteConceptsAdapter(syncDb, csId); + const relationships = new SqliteRelationshipsAdapter(syncDb, csId); + const refs = new SqliteRefsAdapter(syncDb, csId, isAConceptId); + const descriptions = new SqliteDescriptionsAdapter(syncDb); + const strings = new SqliteStringsAdapter(syncDb); + + const structures = { + concepts, + relationships, + refs, + descriptions, + strings, + // Unused by expression services but required by constructor: + words: null, + stems: null, + descriptionIndex: null, + refSetMembers: null, + refSetIndex: null + }; + + const expressionServices = new SqliteExpressionServices(structures, isAConceptId); + const parser = new SnomedExpressionParser(concepts); + + return { expressionServices, parser }; +} + +module.exports = { + createSqliteExpressionServices, + SqliteExpressionServices, + SqliteConceptsAdapter, + SqliteRelationshipsAdapter, + SqliteRefsAdapter, + SqliteDescriptionsAdapter, + SqliteStringsAdapter +}; diff --git a/tx/cs/cs-sqlite-runtime-v0.js b/tx/cs/cs-sqlite-runtime-v0.js new file mode 100644 index 0000000..b903c54 --- /dev/null +++ b/tx/cs/cs-sqlite-runtime-v0.js @@ -0,0 +1,3404 @@ +'use strict'; + +/** + * Generic SQLite v0 terminology provider. + * + * This module provides a generic code system provider backed by a normalized + * SQLite schema (concept / concept_property / concept_relationship / etc.). + * Any terminology can be loaded through the v0 schema — RxNorm, LOINC, SNOMED, + * or custom code systems — using the corresponding v0 importer. + * + * ## Architecture: Factory and Provider + * + * FHIRsmith separates long-lived factories from per-request providers: + * + * - **SqliteRuntimeV0FactoryProvider** (extends CodeSystemFactoryProvider): + * Loaded once at startup per database file. Stays in memory to answer + * runtime queries like `buildKnownValueSet()` (synthesizing ValueSet + * resources from URLs like `http://loinc.org/vs/LP1234`). Creates + * per-request provider instances via `build()`. + * + * - **SqliteRuntimeV0Provider** (extends CodeSystemProvider): + * Created fresh for each request. Handles terminology operations: + * locate, filter, executeFilters, subsumesTest, etc. + * + * ## Specialization system + * + * Some terminologies need behavior beyond what the generic provider offers. + * For example, SNOMED needs post-coordinated expression support and LOINC + * needs implicit value set generation from URL patterns. Rather than + * hardcoding these in the base, specializations register themselves. + * + * **How it works:** + * + * 1. Specialization modules (e.g. `cs-sqlite-snomed-v0.js`) define a factory + * subclass and call `registerSpecialization()` at require-time: + * + * SqliteRuntimeV0FactoryProvider.registerSpecialization({ + * id: 'snomed-expressions', + * systemPrefix: 'http://snomed.info/sct', + * tags: ['snomed'], + * FactoryClass: SnomedSqliteV0Factory + * }); + * + * 2. At startup, `createFromMetadata()` probes the database's metadata + * (canonical URI and behaviorFlags.tags) and matches against the registry. + * The first matching entry (by priority) determines the factory class. + * + * 3. The specialized factory controls everything — it can override + * `buildKnownValueSet()` for factory-level behavior, and/or override + * `build()` to return a specialized provider class. + * + * **Matching rules:** + * + * - `systemPrefix`: matched against the db's canonical URI (prefix match) + * - `tags`: all listed tags must be present in the db's `behaviorFlags.tags` + * - Both conditions must be satisfied (when specified) + * - Higher `priority` wins when multiple entries match + * + * **Discovery:** Specialization modules are auto-discovered at require-time + * by scanning for `cs-sqlite-*.js` files in this directory. Each file is + * required, and if it calls `registerSpecialization()`, the entry is added + * to the registry. No explicit enumeration of subclass files is needed. + * + * ## YAML configuration + * + * Databases are loaded via the `sqlite-v0:` source type in YAML config: + * + * sources: + * - sqlite-v0:rxnorm.v0.db # auto-match (default) + * - sqlite-v0:loinc.v0.db?specialization=none # force generic base + * - sqlite-v0:snomed.v0.db?specialization=snomed-expressions # force specific + * + * The `specialization` query parameter overrides automatic matching: + * + * - Omitted: automatic tag/URL matching from the registry (the default) + * - `none`: skip the registry entirely, use the generic base factory + * - ``: select a specific registered specialization by its id + * + * This lets operators control behavior without rebuilding databases. For + * example, to debug whether a specialization is causing issues, temporarily + * add `?specialization=none` to isolate the generic behavior. + */ + +const sqlite3 = require('sqlite3').verbose(); +let BetterSqlite3; +try { BetterSqlite3 = require('better-sqlite3'); } catch (_) { BetterSqlite3 = null; } +const { CodeSystem } = require('../library/codesystem'); +const { CodeSystemProvider, CodeSystemFactoryProvider, FilterExecutionContext } = require('./cs-api'); + +// Specialization registry — populated by subclass modules at require-time. +const V0_SPECIALIZATION_REGISTRY = []; + +class SqliteRuntimeV0Context { + constructor(conceptId, code, display, definition, active) { + this.conceptId = conceptId; + this.code = code; + this.display = display; + this.definition = definition; + this.active = active; + } +} + +class SqliteRuntimeV0Iterator { + constructor(codes) { + this.codes = codes || []; + this.cursor = 0; + } +} + +class SqliteRuntimeV0QueryIterator { + constructor(mode, options = {}) { + this.mode = mode; + this.pageSize = Number(options.pageSize) > 0 ? Number(options.pageSize) : 512; + this.targetConceptId = options.targetConceptId || null; + this.rows = []; + this.cursor = 0; + this.lastCode = null; + this.done = false; + } +} + +class SqliteRuntimeV0FilterSet { + constructor(name, codes, closed = true) { + this.name = name; + this.summary = name; + this.codes = codes || []; + this.cursor = -1; + this.closed = closed; + this._set = null; + } + + has(code) { + if (!this._set) { + this._set = new Set(this.codes); + } + return this._set.has(code); + } +} + +class SqliteRuntimeV0PredicateFilter { + constructor(name, kind, details = {}, closed = true) { + this.name = name; + this.summary = name; + this.kind = kind; + this.closed = closed; + this.cursor = -1; + Object.assign(this, details || {}); + } +} + +class SqliteRuntimeV0PagedDescendantFilter { + constructor(name, ancestorId, includeSelf, pageSize = 512) { + this.name = name; + this.summary = name; + this.ancestorId = ancestorId; + this.includeSelf = includeSelf; + this.pageSize = pageSize; + this.closed = true; + this.cursor = -1; + this.rows = []; + this.done = false; + this.lastCode = null; + this.strategy = null; + this.descendantCount = null; + } +} + +class SqliteRuntimeV0Provider extends CodeSystemProvider { + constructor(opContext, supplements, db, metadata, runtime, options = {}) { + super(opContext, supplements); + this.db = db; + this.meta = metadata; + this.runtime = runtime || {}; + this.propertyDefs = new Map(); + this.sharedState = options.sharedState || null; + this.statusCache = null; + this.ownsDb = options.ownsDb === true; + this.dbPath = options.dbPath || null; + this._syncDb = null; + this.defaultIterationRegex = null; + const regexSource = this.runtime?.iteration?.defaultCodeRegex; + if (regexSource) { + try { + this.defaultIterationRegex = new RegExp(String(regexSource)); + } catch (_error) { + this.defaultIterationRegex = null; + } + } + } + + close() { + if (!this.db || !this.ownsDb) return; + this.statusCache = null; + this.db.close(); + this.db = null; + } + + handlesExcludes() { + return !!this.#getSyncDb(); + } + + handlesOffset() { + return !!this.#getSyncDb(); + } + + // --- SQL query building for filter pipeline --- + + #getSyncDb() { + if (this._syncDb) return this._syncDb; + if (!BetterSqlite3 || !this.dbPath) return null; + this._syncDb = new BetterSqlite3(this.dbPath, { readonly: true }); + // REGEXP with effort-based breaker: cache compiled regex, limit evaluations + const regexpCache = new Map(); + const REGEXP_EFFORT_LIMIT = 500000; + let regexpCalls = 0; + this._syncDb.function('regexp', (pattern, value) => { + if (++regexpCalls > REGEXP_EFFORT_LIMIT) { + throw new Error(`REGEXP effort limit exceeded (${REGEXP_EFFORT_LIMIT} evaluations)`); + } + let re = regexpCache.get(pattern); + if (!re) { + re = new RegExp(pattern); + regexpCache.set(pattern, re); + } + return re.test(value) ? 1 : 0; + }); + // Reset counter before each statement execution via a helper + this._syncDb._resetRegexpEffort = () => { regexpCalls = 0; regexpCache.clear(); }; + return this._syncDb; + } + + /** Protected accessor for subclasses that need the sync db. */ + _getOrCreateSyncDb() { + return this.#getSyncDb(); + } + + /** + * Build a SQL condition for a single filter {property, op, value}. + * Returns { sql, params, joins } or null if unsupported. + * @param {string} alias - concept table alias (e.g. 'c' or 'c2') + */ + #buildV0FilterSql(filter, paramPrefix, alias = 'c') { + const { property, op, value } = filter; + const csId = this.meta.csId; + const params = {}; + + if (property === 'concept') { + if (op === '=') { + params[`${paramPrefix}_code`] = value; + return { + sql: ` AND ${alias}.code = @${paramPrefix}_code`, + params, + joins: '', + }; + } + + if (op === 'is-a' || op === 'descendent-of') { + const includeSelf = op === 'is-a' + ? (this.runtime?.filters?.concept?.isAIncludesSelf !== false) + : false; + // Use closure table for hierarchy + params[`${paramPrefix}_anc_code`] = value; + params[`${paramPrefix}_cs`] = csId; + const selfClause = includeSelf ? '' : ` AND cl_${paramPrefix}.descendant_id != cl_${paramPrefix}.ancestor_id`; + return { + sql: selfClause, + params, + joins: ` JOIN closure cl_${paramPrefix} ON cl_${paramPrefix}.descendant_id = ${alias}.concept_id` + + ` AND cl_${paramPrefix}.ancestor_id = (SELECT concept_id FROM concept WHERE code = @${paramPrefix}_anc_code AND cs_id = @${paramPrefix}_cs)`, + }; + } + + if (op === 'in') { + const url = resolveInValueSetUrl(this.system(), value, this.runtime); + params[`${paramPrefix}_vs_url`] = url; + params[`${paramPrefix}_cs`] = csId; + return { + sql: '', + params, + joins: ` JOIN value_set_member vsm_${paramPrefix} ON vsm_${paramPrefix}.concept_id = ${alias}.concept_id AND vsm_${paramPrefix}.active = 1` + + ` JOIN value_set vs_${paramPrefix} ON vs_${paramPrefix}.vs_id = vsm_${paramPrefix}.vs_id AND vs_${paramPrefix}.cs_id = @${paramPrefix}_cs AND vs_${paramPrefix}.url = @${paramPrefix}_vs_url`, + }; + } + + return null; // Unsupported concept operator + } + + if (property === 'code' && op === 'regex') { + params[`${paramPrefix}_re`] = value; + return { + sql: ` AND ${alias}.code REGEXP @${paramPrefix}_re`, + params, + joins: '', + }; + } + + // Property filter: resolve via property_def → concept_link or concept_literal + const syncDb = this.#getSyncDb(); + if (!syncDb) return null; + + const propDef = syncDb.prepare( + 'SELECT property_id, value_kind FROM property_def WHERE cs_id = ? AND property_code = ? LIMIT 1' + ).get(csId, property); + if (!propDef) return null; + + if (propDef.value_kind === 'concept') { + if (op === '=' || op === 'in') { + const values = op === 'in' ? splitFilterValueList(value) : [value]; + params[`${paramPrefix}_prop`] = propDef.property_id; + params[`${paramPrefix}_val_cs`] = csId; + params[`${paramPrefix}_eset`] = this.meta.hierarchyEdgeSetId || 1; + const placeholders = values.map((v, j) => { + params[`${paramPrefix}_vc${j}`] = v; + return `@${paramPrefix}_vc${j}`; + }).join(','); + return { + sql: '', + params, + joins: ` JOIN concept_link lnk_${paramPrefix}` + + ` ON lnk_${paramPrefix}.source_concept_id = ${alias}.concept_id` + + ` AND lnk_${paramPrefix}.property_id = @${paramPrefix}_prop` + + ` AND lnk_${paramPrefix}.edge_set_id = @${paramPrefix}_eset` + + ` AND lnk_${paramPrefix}.active = 1` + + ` AND lnk_${paramPrefix}.target_concept_id IN (SELECT concept_id FROM concept WHERE code IN (${placeholders}) AND cs_id = @${paramPrefix}_val_cs)`, + }; + } + return null; + } + + if (propDef.value_kind === 'string' || propDef.value_kind === 'literal') { + if (op === '=' || op === 'in') { + const values = op === 'in' ? splitFilterValueList(value) : [value]; + params[`${paramPrefix}_prop`] = propDef.property_id; + const placeholders = values.map((v, j) => { + params[`${paramPrefix}_vl${j}`] = v; + return `@${paramPrefix}_vl${j}`; + }).join(','); + return { + sql: '', + params, + joins: ` JOIN concept_literal lit_${paramPrefix}` + + ` ON lit_${paramPrefix}.source_concept_id = ${alias}.concept_id` + + ` AND lit_${paramPrefix}.property_id = @${paramPrefix}_prop` + + ` AND lit_${paramPrefix}.active = 1` + + ` AND lit_${paramPrefix}.value_text COLLATE NOCASE IN (${placeholders})`, + }; + } + return null; + } + + return null; // Unsupported property type + } + + /** + * Build a NOT EXISTS clause for exclude filters by reusing #buildV0FilterSql. + * Each filter is resolved via the same path as includes (concept hierarchy, + * property_def lookup, etc.) — no duplicated logic. + * Returns SQL string or null if any filter is unsupported. + */ + #buildExcludeClause(filters, paramPrefix, allParams) { + const alias = `c_${paramPrefix}`; + const parts = []; + for (let fi = 0; fi < filters.length; fi++) { + const result = this.#buildV0FilterSql(filters[fi], `${paramPrefix}f${fi}`, alias); + if (!result) return null; + parts.push(result); + } + if (parts.length === 0) return null; + const joins = parts.map(p => p.joins).join(''); + const where = parts.map(p => p.sql).join(''); + for (const p of parts) Object.assign(allParams, p.params); + allParams[`${paramPrefix}_csId`] = this.meta.csId; + return `NOT EXISTS (SELECT 1 FROM concept ${alias}${joins}` + + ` WHERE ${alias}.cs_id = @${paramPrefix}_csId` + + ` AND ${alias}.concept_id = t.concept_id${where})`; + } + + system() { + return this.meta.baseUri || this.meta.canonicalUri || ''; + } + + version() { + const outputMode = this.runtime?.versioning?.output || 'canonical'; + if (outputMode === 'version') { + return this.meta.version || this.meta.canonicalUri || null; + } + return this.meta.canonicalUri || this.meta.version || null; + } + + name() { + return this.meta.name || this.system(); + } + + description() { + return `${this.name()} (${this.meta.version || 'unknown version'})`; + } + + async totalCount() { + return this.meta.totalConcepts || 0; + } + + hasParents() { + return !!this.meta.hierarchyPropertyId; + } + + defLang() { + return this.runtime.languages?.default || this.meta.defaultLanguage || 'en'; + } + + versionAlgorithm() { + return this.runtime.versioning?.algorithm || 'string'; + } + + versionIsMoreDetailed(checkVersion, actualVersion) { + if (!checkVersion || !actualVersion) return false; + + const partialMatch = this.runtime.versioning?.partialMatch !== false; + if (!partialMatch) { + return checkVersion === actualVersion; + } + + return actualVersion.startsWith(checkVersion); + } + + async code(context) { + const ctxt = await this._ensureContext(context); + return ctxt ? ctxt.code : null; + } + + async display(context) { + const ctxt = await this._ensureContext(context); + if (!ctxt) return null; + + const supplementDisplay = this._displayFromSupplements(ctxt.code); + if (supplementDisplay) { + return supplementDisplay; + } + + return ctxt.display || ctxt.code; + } + + async definition(context) { + const ctxt = await this._ensureContext(context); + return ctxt ? ctxt.definition : null; + } + + async isAbstract(context) { + await this._ensureContext(context); + const abstractCfg = this.runtime.status?.abstract; + if (abstractCfg?.source === 'constant') { + return !!abstractCfg.value; + } + return false; + } + + async isInactive(context) { + const ctxt = await this._ensureContext(context); + if (!ctxt) return false; + const inactiveCfg = this.runtime.status?.inactive; + if (inactiveCfg?.source === 'concept.active') { + return inactiveCfg.invert === true ? !ctxt.active : !!ctxt.active; + } + return !ctxt.active; + } + + async isDeprecated(context) { + await this._ensureContext(context); + const depCfg = this.runtime.status?.deprecated; + if (depCfg?.source === 'constant') { + return !!depCfg.value; + } + return false; + } + + async getStatus(context) { + const ctxt = await this._ensureContext(context); + if (!ctxt) return null; + + const statusValue = await this.#statusValueForConcept(ctxt.conceptId); + if (statusValue) { + return statusValue; + } + + return ctxt.active ? 'active' : 'inactive'; + } + + async #statusValueForConcept(conceptId) { + const statusPropertyCode = this.runtime?.status?.statusProperty; + if (!statusPropertyCode) { + return null; + } + + const propDef = await this.#resolvePropertyDef(statusPropertyCode); + if (!propDef?.property_id) { + return null; + } + + await this.#ensureStatusCache(propDef.property_id); + return this.statusCache?.values?.get(conceptId) || null; + } + + async #ensureStatusCache(propertyId) { + if (this.statusCache?.propertyId === propertyId && this.statusCache.values instanceof Map) { + return; + } + + if (this.sharedState && this.sharedState.statusByPropertyId instanceof Map) { + const existing = this.sharedState.statusByPropertyId.get(propertyId); + if (existing instanceof Map) { + this.statusCache = { propertyId, values: existing }; + return; + } + + if (!(this.sharedState.statusLoadPromises instanceof Map)) { + this.sharedState.statusLoadPromises = new Map(); + } + + let promise = this.sharedState.statusLoadPromises.get(propertyId); + if (!promise) { + promise = this.#loadStatusMap(propertyId) + .then((values) => { + this.sharedState.statusByPropertyId.set(propertyId, values); + this.sharedState.statusLoadPromises.delete(propertyId); + return values; + }) + .catch((error) => { + this.sharedState.statusLoadPromises.delete(propertyId); + throw error; + }); + this.sharedState.statusLoadPromises.set(propertyId, promise); + } + + const values = await promise; + this.statusCache = { propertyId, values }; + return; + } + + const values = await this.#loadStatusMap(propertyId); + this.statusCache = { propertyId, values }; + } + + async #loadStatusMap(propertyId) { + const rows = await all( + this.db, + `SELECT source_concept_id, + COALESCE(value_text, value_raw) AS value + FROM concept_literal + WHERE property_id = ? + AND active = 1 + AND COALESCE(value_text, value_raw) IS NOT NULL + ORDER BY source_concept_id, literal_id`, + [propertyId] + ); + + const values = new Map(); + for (const row of rows) { + if (!values.has(row.source_concept_id) && row.value != null) { + values.set(row.source_concept_id, row.value); + } + } + return values; + } + + async designations(context, displays) { + const ctxt = await this._ensureContext(context); + if (!ctxt) return; + + // Keep legacy behavior where a primary display is always available as a designation. + displays.addDesignation( + true, + ctxt.active ? 'active' : 'inactive', + this.defLang(), + CodeSystem.makeUseForDisplay(), + ctxt.display || ctxt.code + ); + + // Use batch-prefetched designations when available (from executeFilters), + // otherwise fall back to per-code DB query. + let rows; + if (ctxt._v0Designations) { + rows = ctxt._v0Designations; + } else { + const designationTableRef = this.meta?.designationOrderIndex + ? 'designation INDEXED BY idx_designation_concept_pref_term' + : 'designation'; + rows = await all( + this.db, + `SELECT language_code, use_code, term, preferred, active + FROM ${designationTableRef} + WHERE concept_id = ? + ORDER BY preferred DESC, term`, + [ctxt.conceptId] + ); + } + + for (const row of rows) { + displays.addDesignation( + row.preferred === 1, + row.active === 1 ? 'active' : 'inactive', + row.language_code || this.defLang(), + useFromDesignation(row, this.runtime, this.system()), + row.term + ); + } + + this._listSupplementDesignations(ctxt.code, displays); + } + + async properties(context) { + const ctxt = await this._ensureContext(context); + if (!ctxt) return []; + + const props = []; + props.push({ code: 'inactive', valueBoolean: !ctxt.active }); + + if (!this.meta.hierarchyPropertyId) { + return props; + } + + const parentRows = await all( + this.db, + `SELECT p.code AS target_code + FROM concept_link l + JOIN concept p ON p.concept_id = l.target_concept_id + WHERE l.source_concept_id = ? + AND l.property_id = ? + AND l.edge_set_id = ? + AND l.active = 1`, + [ctxt.conceptId, this.meta.hierarchyPropertyId, this.meta.hierarchyEdgeSetId] + ); + + const parentPropCode = this.runtime.hierarchy?.parentPropertyCode || 'parent'; + for (const row of parentRows) { + props.push({ code: parentPropCode, valueCode: row.target_code }); + } + + return props; + } + + async locate(code) { + if (!code) { + return { context: null, message: 'Empty code' }; + } + + const row = await get( + this.db, + `SELECT concept_id, code, display, definition, active + FROM concept + WHERE cs_id = ? AND code = ?`, + [this.meta.csId, code] + ); + + if (!row) { + return { context: null, message: undefined }; + } + + return { + context: new SqliteRuntimeV0Context(row.concept_id, row.code, row.display, row.definition, row.active === 1), + message: null + }; + } + + async locateMany(codes, allAltCodes = false) { + void allAltCodes; + const out = new Map(); + if (!Array.isArray(codes) || codes.length === 0) { + return out; + } + + const normalized = []; + const seen = new Set(); + for (const raw of codes) { + const code = String(raw || ''); + if (!code || seen.has(code)) { + continue; + } + seen.add(code); + normalized.push(code); + } + if (normalized.length === 0) { + return out; + } + + const batchSize = 500; + for (let i = 0; i < normalized.length; i += batchSize) { + const batch = normalized.slice(i, i + batchSize); + const placeholders = batch.map(() => '?').join(', '); + const rows = await all( + this.db, + `SELECT concept_id, code, display, definition, active + FROM concept + WHERE cs_id = ? + AND code IN (${placeholders})`, + [this.meta.csId, ...batch] + ); + + for (const row of rows) { + out.set(row.code, { + context: new SqliteRuntimeV0Context( + row.concept_id, + row.code, + row.display, + row.definition, + row.active === 1 + ), + message: null + }); + } + } + + return out; + } + + // Backward-compat alias while workers migrate to locateMany. + async locateBatch(codes, allAltCodes = false) { + return this.locateMany(codes, allAltCodes); + } + + async locateIsA(code, parent, disallowParent = false) { + const located = await this.locate(code); + if (!located.context) { + return located; + } + + const parentLocated = await this.locate(parent); + if (!parentLocated.context) { + return { context: null, message: `Parent concept '${parent}' not found` }; + } + + const isA = await this.#isA(parentLocated.context.conceptId, located.context.conceptId, !disallowParent); + if (!isA) { + return { context: null, message: `Code '${code}' is not in hierarchy of '${parent}'` }; + } + + return located; + } + + async iterator(code) { + if (!this.meta.hierarchyPropertyId) { + return this.iteratorAll(); + } + + if (!code) { + if (this.runtime?.iteration?.rootMode === 'all') { + return this.iteratorAll(); + } + return new SqliteRuntimeV0QueryIterator('roots'); + } + + const ctxt = await this._ensureContext(code); + if (!ctxt) return null; + + if (this.runtime?.iteration?.children === false) { + return new SqliteRuntimeV0Iterator([]); + } + + const hasChildren = await this.#conceptHasHierarchyChildren(ctxt.conceptId); + if (!hasChildren) { + return new SqliteRuntimeV0Iterator([]); + } + + return new SqliteRuntimeV0QueryIterator('children', { targetConceptId: ctxt.conceptId }); + } + + async #conceptHasHierarchyChildren(conceptId) { + if (!conceptId || !this.meta.hierarchyPropertyId) { + return false; + } + const targets = await this.#getHierarchyChildTargetSet(); + return targets.has(conceptId); + } + + async #getHierarchyChildTargetSet() { + if (!this.meta.hierarchyPropertyId) { + return new Set(); + } + + const key = `${this.meta.hierarchyPropertyId}:${this.meta.hierarchyEdgeSetId}`; + if (this.sharedState && this.sharedState.childTargetSets instanceof Map) { + const existing = this.sharedState.childTargetSets.get(key); + if (existing instanceof Set) { + return existing; + } + + if (!(this.sharedState.childTargetLoadPromises instanceof Map)) { + this.sharedState.childTargetLoadPromises = new Map(); + } + + let promise = this.sharedState.childTargetLoadPromises.get(key); + if (!promise) { + promise = this.#loadHierarchyChildTargetSet( + this.meta.hierarchyPropertyId, + this.meta.hierarchyEdgeSetId + ).then((set) => { + this.sharedState.childTargetSets.set(key, set); + this.sharedState.childTargetLoadPromises.delete(key); + return set; + }).catch((error) => { + this.sharedState.childTargetLoadPromises.delete(key); + throw error; + }); + this.sharedState.childTargetLoadPromises.set(key, promise); + } + return promise; + } + + return this.#loadHierarchyChildTargetSet(this.meta.hierarchyPropertyId, this.meta.hierarchyEdgeSetId); + } + + async #loadHierarchyChildTargetSet(propertyId, edgeSetId) { + const rows = await all( + this.db, + `SELECT DISTINCT target_concept_id + FROM concept_link + WHERE property_id = ? + AND edge_set_id = ? + AND active = 1`, + [propertyId, edgeSetId] + ); + return new Set(rows.map((r) => r.target_concept_id)); + } + + async iteratorAll() { + return new SqliteRuntimeV0QueryIterator('all'); + } + + async nextContext(iteratorContext) { + if (!iteratorContext) { + return null; + } + + if (this.#isQueryIterator(iteratorContext)) { + while (iteratorContext.cursor >= iteratorContext.rows.length) { + if (iteratorContext.done) { + return null; + } + await this.#loadNextIteratorPage(iteratorContext); + } + + const context = iteratorContext.rows[iteratorContext.cursor]; + iteratorContext.cursor += 1; + return context || null; + } + + if (!(iteratorContext instanceof SqliteRuntimeV0Iterator)) { + return null; + } + + if (iteratorContext.cursor >= iteratorContext.codes.length) { + return null; + } + + const entry = iteratorContext.codes[iteratorContext.cursor]; + iteratorContext.cursor += 1; + if (entry instanceof SqliteRuntimeV0Context) { + return entry; + } + + const located = await this.locate(entry); + return located.context; + } + + async subsumesTest(codeA, codeB) { + const a = await this._ensureContext(codeA); + const b = await this._ensureContext(codeB); + if (!a || !b) return 'not-subsumed'; + + if (a.code === b.code) return 'equivalent'; + + if (await this.#isA(a.conceptId, b.conceptId, true)) return 'subsumes'; + if (await this.#isA(b.conceptId, a.conceptId, true)) return 'subsumed-by'; + return 'not-subsumed'; + } + + async doesFilter(prop, op, _value) { + void _value; + if (!prop || !op) return false; + + const propCfg = this.runtime.filters?.[prop]; + if (propCfg?.operators && Array.isArray(propCfg.operators)) { + return propCfg.operators.includes(op); + } + + if (prop === 'concept') { + return ['=', 'is-a', 'descendent-of', 'in'].includes(op); + } + if (prop === 'code' && op === 'regex') { + return true; + } + + const propertyCfg = await this.#resolvePropertyFilterConfig(prop); + if (propertyCfg?.operators && Array.isArray(propertyCfg.operators)) { + return propertyCfg.operators.includes(op); + } + + return false; + } + + async getPrepContext(iterate, params, excludeInactive, offset = -1, count = -1) { + const ctx = new FilterExecutionContext(iterate); + ctx._v0Excludes = []; + ctx._v0Offset = offset; + ctx._v0Count = count; + // Combine compose-level and request-level active filtering: + // "exclude" wins — client can narrow but can't widen beyond the ValueSet's rule + ctx._v0ExcludeInactive = excludeInactive || !!(params && params.activeOnly); + // Extract designation hints from params (replaces separate prepareDesignations call) + if (params) { + const langs = typeof params.workingLanguages === 'function' ? params.workingLanguages() : null; + ctx._v0DesignationHints = { + includeDesignations: !!params.includeDesignations, + languages: langs || null, + designations: params.designations || null, + }; + } + return ctx; + } + + async searchFilter(filterContext, filter, _sort) { + void _sort; + const searchText = typeof filter === 'string' + ? filter + : (filter && typeof filter.filter === 'string' ? filter.filter : null); + + if (!searchText || !searchText.trim()) { + throw new Error('Invalid search filter'); + } + + // Record intent only — executeFilters() incorporates text search into combined SQL. + filterContext._v0SearchText = searchText.trim(); + } + + async filter(filterContext, prop, op, value) { + // Record raw filter params — all SQL execution defers to executeFilters(). + if (!filterContext._v0IncludeFilters) filterContext._v0IncludeFilters = []; + filterContext._v0IncludeFilters.push({ property: prop, op, value }); + + // Validation-only: check that the property/op combination is supported. + // No SQL execution here — executeFilters() builds and runs the combined query. + + if (prop === 'code' && op === 'regex') { + // Regex can't be expressed in SQL builder — must eagerly materialize + const re = new RegExp(`^${value}$`); + const rows = await all( + this.db, + `SELECT code + FROM concept + WHERE cs_id = ? + ORDER BY code`, + [this.meta.csId] + ); + const codes = rows.map(r => r.code).filter(c => re.test(c)); + filterContext.filters.push(new SqliteRuntimeV0FilterSet(`code-regex:${value}`, codes, true)); + return; + } + + if (prop !== 'concept') { + const propertyCfg = await this.#resolvePropertyFilterConfig(prop); + if (!propertyCfg) { + throw new Error(`Unsupported sqlite runtime filter property '${prop}'`); + } + if (!propertyCfg.operators.includes(op)) { + throw new Error(`Unsupported sqlite runtime filter operator '${op}' for property '${prop}'`); + } + // Eagerly materialize only for special handlers and membership predicates; + // everything else defers to executeFilters. + if (propertyCfg.specialHandler || this.#useMembershipPredicate(filterContext)) { + await this.#filterByProperty(filterContext, propertyCfg, op, value); + } else { + if (!filterContext._v0Deferred) filterContext._v0Deferred = []; + filterContext._v0Deferred.push({ propertyCfg, op, value, filterName: `property-${propertyCfg.propertyCode}-${op}:${value}` }); + } + return; + } + + // concept filter — defer to executeFilters unless membership predicate needed + if (this.#useMembershipPredicate(filterContext)) { + // Membership predicates are used for filterCheck, not iteration — keep eager + if (op === '=') { + filterContext.filters.push(new SqliteRuntimeV0PredicateFilter( + `concept=${value}`, 'concept-equals', { code: value }, true + )); + } else if (op === 'is-a' || op === 'descendent-of') { + const includeSelf = op === 'is-a' + ? (this.runtime?.filters?.concept?.isAIncludesSelf !== false) + : false; + const parent = await this.locate(value); + filterContext.filters.push(new SqliteRuntimeV0PredicateFilter( + `concept-${op}:${value}`, 'concept-hierarchy', + { parentCode: value, ancestorId: parent.context ? parent.context.conceptId : null, + includeSelf, missingMessage: parent.context ? null : `Parent concept '${value}' not found` }, + true + )); + } else if (op === 'in') { + const url = resolveInValueSetUrl(this.system(), value, this.runtime); + filterContext.filters.push(new SqliteRuntimeV0PredicateFilter( + `concept-in:${value}`, 'concept-in', { valueSetUrl: url, rawValue: value }, true + )); + } else { + throw new Error(`Unsupported sqlite runtime filter operator '${op}' for concept`); + } + return; + } + + // For iteration mode: defer to executeFilters which will build combined SQL. + // Only fall back to eager materialization for ops that #buildV0FilterSql can't handle, + // or when executeFilters falls back (no syncDb, unsupported filter). + if (!filterContext._v0Deferred) filterContext._v0Deferred = []; + filterContext._v0Deferred.push({ property: prop, op, value, isConcept: true }); + } + + /** + * Accumulate exclude filter groups for later SQL generation in executeFilters. + * Each call represents one exclude clause; all filters in the array are conjunctive. + * Throws if any filter is unsupported. + * @param {FilterExecutionContext} filterContext + * @param {Object[]} filters - array of {prop, op, value} + */ + async filterExcludeFilters(filterContext, filters) { + if (!filterContext._v0Excludes) filterContext._v0Excludes = []; + // Validate all filters are supported by trying to build SQL for each + for (let i = 0; i < filters.length; i++) { + const f = filters[i]; + if (f.prop === 'code' && f.op === 'in') continue; // concept-code excludes always supported + const result = this.#buildV0FilterSql( + { property: f.prop, op: f.op, value: f.value }, `_val${i}` + ); + if (!result) { + throw new Error(`Unsupported exclude filter: property '${f.prop}' op '${f.op}'`); + } + } + filterContext._v0Excludes.push(filters); + } + + /** + * Record intent to include specific concept codes. + * No SQL — executeFilters() incorporates these into combined SQL. + */ + async includeConcepts(filterContext, codes) { + if (!filterContext._v0IncludeConcepts) filterContext._v0IncludeConcepts = []; + filterContext._v0IncludeConcepts.push(...codes); + } + + /** + * Record intent to exclude specific concept codes. + * No SQL — executeFilters() incorporates these into combined SQL as NOT IN. + */ + async filterExcludeConcepts(filterContext, codes) { + if (!filterContext._v0ExcludeConcepts) filterContext._v0ExcludeConcepts = []; + filterContext._v0ExcludeConcepts.push(...codes); + } + + async executeFilters(filterContext) { + const hasExcludes = filterContext._v0Excludes && filterContext._v0Excludes.length > 0; + const hasOffset = filterContext._v0Offset > 0 || filterContext._v0Count > 0; + const hasIncludeFilters = filterContext._v0IncludeFilters && filterContext._v0IncludeFilters.length > 0; + const hasIncludeConcepts = filterContext._v0IncludeConcepts && filterContext._v0IncludeConcepts.length > 0; + const hasExcludeConcepts = filterContext._v0ExcludeConcepts && filterContext._v0ExcludeConcepts.length > 0; + const hasDeferred = filterContext._v0Deferred && filterContext._v0Deferred.length > 0; + const syncDb = this.#getSyncDb(); + + // Build combined SQL when we have sync DB and something to query. + if (syncDb && (hasIncludeFilters || hasIncludeConcepts)) { + const csId = this.meta.csId; + const allParams = { _csId: csId }; + + // Build include filter SQL + let joins = ''; + let where = ''; + let unsupported = false; + for (let fi = 0; fi < (filterContext._v0IncludeFilters || []).length; fi++) { + const result = this.#buildV0FilterSql(filterContext._v0IncludeFilters[fi], `_i0f${fi}`); + if (!result) { unsupported = true; break; } + joins += result.joins; + where += result.sql; + Object.assign(allParams, result.params); + } + + // Include concept list → WHERE code IN (...) + if (!unsupported && hasIncludeConcepts) { + const placeholders = filterContext._v0IncludeConcepts.map((c, j) => { + allParams[`_ic${j}`] = c; + return `@_ic${j}`; + }).join(','); + where += ` AND c.code IN (${placeholders})`; + } + + if (!unsupported) { + // Exclude inactive codes in SQL when compose or request says to + const activeClause = filterContext._v0ExcludeInactive ? ' AND c.active = 1' : ''; + const innerSql = `SELECT c.concept_id, c.code, c.display, c.definition, c.active` + + ` FROM concept c${joins}` + + ` WHERE c.cs_id = @_csId${where}${activeClause}`; + + // Build exclude SQL + let excludeSql = ''; + for (let i = 0; i < (filterContext._v0Excludes || []).length; i++) { + const excFilters = filterContext._v0Excludes[i]; + // Separate concept-code excludes from property/hierarchy excludes + const codeExclude = excFilters.find(f => f.prop === 'code' && f.op === 'in'); + const propExcludes = excFilters.filter(f => !(f.prop === 'code' && f.op === 'in')); + + if (codeExclude) { + const codes = codeExclude.value.split(','); + const placeholders = codes.map((c, j) => { + allParams[`_ec${i}_${j}`] = c; + return `@_ec${i}_${j}`; + }).join(','); + excludeSql += ` AND t.code NOT IN (${placeholders})`; + } + if (propExcludes.length > 0) { + const mapped = propExcludes.map(f => ({ property: f.prop, op: f.op, value: f.value })); + const clause = this.#buildExcludeClause(mapped, `_e${i}`, allParams); + if (clause) { + excludeSql += ` AND ${clause}`; + } + } + } + + // Exclude concept list → AND code NOT IN (...) + if (hasExcludeConcepts) { + const placeholders = filterContext._v0ExcludeConcepts.map((c, j) => { + allParams[`_xc${j}`] = c; + return `@_xc${j}`; + }).join(','); + excludeSql += ` AND t.code NOT IN (${placeholders})`; + } + + // Intersect with text search filter if present + if (filterContext._v0SearchText) { + const searchText = filterContext._v0SearchText; + const searchCfg = normalizedSearchConfig(this.runtime.search); + if (this.#canUseFtsSearch(searchCfg)) { + const matchText = toFtsMatchText(searchText); + allParams['_searchMatch'] = matchText; + allParams['_searchCsId'] = csId; + // Build UNION of FTS sources → concept_id set + const ftsParts = []; + for (const source of searchCfg.sources) { + if (source === 'display') { + const table = sqlIdentifier(searchCfg.ftsTables.display, 'search_fts_display'); + ftsParts.push( + `SELECT c2.concept_id FROM ${table} f2 JOIN concept c2 ON c2.concept_id = f2.rowid WHERE c2.cs_id = @_searchCsId AND f2.term MATCH @_searchMatch` + ); + } else if (source === 'designation') { + const table = sqlIdentifier(searchCfg.ftsTables.designation, 'search_fts_designation'); + ftsParts.push( + `SELECT d2.concept_id FROM ${table} f2 JOIN designation d2 ON d2.designation_id = f2.rowid WHERE f2.term MATCH @_searchMatch` + ); + } else if (source === 'literal') { + const table = sqlIdentifier(searchCfg.ftsTables.literal, 'search_fts_literal'); + ftsParts.push( + `SELECT cl2.source_concept_id FROM ${table} f2 JOIN concept_literal cl2 ON cl2.literal_id = f2.rowid WHERE f2.term MATCH @_searchMatch` + ); + } + } + if (ftsParts.length > 0) { + excludeSql += ` AND t.concept_id IN (${ftsParts.join(' UNION ')})`; + } + } else { + // LIKE fallback on display + allParams['_searchLike'] = `%${searchText}%`; + excludeSql += ` AND t.display LIKE @_searchLike`; + } + } + + // Return full context rows so filterConcept can use them directly + let sql = `SELECT DISTINCT t.concept_id, t.code, t.display, t.definition, t.active FROM (${innerSql}) AS t` + + ` WHERE 1=1${excludeSql}` + + ` ORDER BY t.code`; + + if (filterContext._v0Count > 0) { + sql += ` LIMIT ${filterContext._v0Count}`; + } + if (filterContext._v0Offset > 0) { + sql += ` OFFSET ${filterContext._v0Offset}`; + } + + if (syncDb._resetRegexpEffort) syncDb._resetRegexpEffort(); + const stmt = syncDb.prepare(sql); + const rows = []; + for (const row of stmt.iterate(allParams)) { + rows.push(row); + } + + // Batch-fetch designations if iteration will need them. + // The worker takes the full path (per-code designations() call) when: + // includeDesignations is true, OR workingLanguages is null/undefined. + // We batch-fetch here to avoid N individual DB queries during iteration. + const hints = filterContext._v0DesignationHints; + const needDesignations = hints && (hints.includeDesignations || !hints.languages); + const designationMap = new Map(); + if (needDesignations && rows.length > 0) { + const conceptIds = rows.map(r => r.concept_id); + const BATCH = 500; + const designationTableRef = this.meta?.designationOrderIndex + ? 'designation INDEXED BY idx_designation_concept_pref_term' + : 'designation'; + for (let i = 0; i < conceptIds.length; i += BATCH) { + const batch = conceptIds.slice(i, i + BATCH); + const dPlaceholders = batch.map(() => '?').join(','); + const dStmt = syncDb.prepare( + `SELECT concept_id, language_code, use_code, term, preferred, active + FROM ${designationTableRef} + WHERE concept_id IN (${dPlaceholders}) + ORDER BY concept_id, preferred DESC, term` + ); + for (const dRow of dStmt.iterate(batch)) { + let arr = designationMap.get(dRow.concept_id); + if (!arr) { arr = []; designationMap.set(dRow.concept_id, arr); } + arr.push(dRow); + } + } + } + + const combinedSet = new SqliteRuntimeV0FilterSet('v0-combined-sql', rows.map(r => r.code), true); + // Attach pre-fetched context rows so filterConcept avoids per-code locate() + combinedSet._v0Rows = rows; + combinedSet._v0RowIndex = new Map(rows.map((r, i) => [r.code, i])); + if (needDesignations) { + combinedSet._v0Designations = designationMap; + } + return [combinedSet]; + } + } + + // Fallback: combined SQL couldn't be built (no syncDb or unsupported filter). + // Materialize each recorded include filter the old-fashioned way. + if (hasIncludeFilters) { + for (const f of filterContext._v0IncludeFilters) { + await this.#materializeFilter(filterContext, f.property, f.op, f.value); + } + } + + return filterContext.filters || []; + } + + /** Eagerly materialize a single include filter into filterContext.filters (fallback path). */ + async #materializeFilter(filterContext, prop, op, value) { + if (prop === 'code' && op === 'regex') { + const re = new RegExp(`^${value}$`); + const rows = await all(this.db, + 'SELECT code FROM concept WHERE cs_id = ? ORDER BY code', [this.meta.csId]); + const codes = rows.map(r => r.code).filter(c => re.test(c)); + filterContext.filters.push(new SqliteRuntimeV0FilterSet(`code-regex:${value}`, codes, true)); + return; + } + if (prop === 'concept') { + if (op === '=') { + const located = await this.locate(value); + filterContext.filters.push(new SqliteRuntimeV0FilterSet( + `concept=${value}`, located.context ? [value] : [], true)); + } else if (op === 'is-a' || op === 'descendent-of') { + const includeSelf = op === 'is-a' + ? (this.runtime?.filters?.concept?.isAIncludesSelf !== false) : false; + const parent = await this.locate(value); + if (parent.context && this.meta.closureRows > 0 && this.meta.useClosure) { + filterContext.filters.push(new SqliteRuntimeV0PagedDescendantFilter( + `concept-${op}:${value}`, parent.context.conceptId, includeSelf)); + } else { + const codes = await this.#descendantCodes(value, includeSelf); + filterContext.filters.push(new SqliteRuntimeV0FilterSet(`concept-${op}:${value}`, codes, true)); + } + } else if (op === 'in') { + const url = resolveInValueSetUrl(this.system(), value, this.runtime); + const rows = await all(this.db, + `SELECT c.code FROM value_set v + JOIN value_set_member m ON m.vs_id = v.vs_id + JOIN concept c ON c.concept_id = m.concept_id + WHERE v.cs_id = ? AND v.url = ? AND m.active = 1 ORDER BY code`, + [this.meta.csId, url]); + filterContext.filters.push(new SqliteRuntimeV0FilterSet(`concept-in:${value}`, rows.map(r => r.code), true)); + } + return; + } + // Property filter + const propertyCfg = await this.#resolvePropertyFilterConfig(prop); + if (propertyCfg) { + await this.#filterByProperty(filterContext, propertyCfg, op, value); + } + } + + capabilities() { + return { + filterPage: true + }; + } + + async filterPage(filterContext, set, count = 256) { + void filterContext; + const pageSize = Math.max(1, Number.isFinite(count) ? Math.floor(count) : 256); + + if (this.#isPredicateFilter(set)) { + return []; + } + + if (this.#isPagedDescendantFilter(set)) { + let start = set.cursor + 1; + while (!set.done && (set.rows.length - start) < pageSize) { + await this.#loadNextDescendantPage(set); + start = set.cursor + 1; + } + if (start >= set.rows.length) { + return []; + } + const end = Math.min(set.rows.length, start + pageSize); + const page = set.rows.slice(start, end); + set.cursor = end - 1; + return page; + } + + if (set instanceof SqliteRuntimeV0FilterSet) { + const start = set.cursor + 1; + if (start >= set.codes.length) { + return []; + } + const end = Math.min(set.codes.length, start + pageSize); + const codes = set.codes.slice(start, end); + set.cursor = end - 1; + return this.#batchLoadContextsByCodes(codes); + } + + return null; + } + + async filterSize(_filterContext, set) { + if (this.#isPredicateFilter(set)) { + return 0; + } + if (this.#isPagedDescendantFilter(set)) { + return set.done ? set.rows.length : 0; + } + return set.codes.length; + } + + async filtersNotClosed(filterContext) { + return (filterContext.filters || []).some(f => !f.closed); + } + + async filterMore(_filterContext, set) { + if (this.#isPredicateFilter(set)) { + return false; + } + if (this.#isPagedDescendantFilter(set)) { + set.cursor += 1; + while (set.cursor >= set.rows.length) { + if (set.done) { + return false; + } + await this.#loadNextDescendantPage(set); + } + return true; + } + set.cursor += 1; + return set.cursor < set.codes.length; + } + + async filterConcept(_filterContext, set) { + if (this.#isPredicateFilter(set)) { + return null; + } + if (this.#isPagedDescendantFilter(set)) { + if (set.cursor < 0 || set.cursor >= set.rows.length) { + return null; + } + return set.rows[set.cursor]; + } + if (set.cursor < 0 || set.cursor >= set.codes.length) { + return null; + } + // Use pre-fetched row from executeFilters if available (avoids per-code locate) + if (set._v0Rows && set._v0RowIndex) { + const idx = set._v0RowIndex.get(set.codes[set.cursor]); + if (idx !== undefined) { + const row = set._v0Rows[idx]; + const ctx = new SqliteRuntimeV0Context(row.concept_id, row.code, row.display, row.definition, row.active); + if (set._v0Designations) { + ctx._v0Designations = set._v0Designations.get(row.concept_id) || []; + } + return ctx; + } + } + const located = await this.locate(set.codes[set.cursor]); + return located.context; + } + + async filterLocate(_filterContext, set, code) { + if (this.#isPredicateFilter(set)) { + return this.#filterLocatePredicate(set, code); + } + if (this.#isPagedDescendantFilter(set)) { + const located = await this.locate(code); + if (!located.context) { + return `Code '${code}' not found in filter set`; + } + const ok = await this.#isA(set.ancestorId, located.context.conceptId, !!set.includeSelf); + return ok ? located.context : `Code '${code}' not found in filter set`; + } + if (!set.has(code)) { + return `Code '${code}' not found in filter set`; + } + const located = await this.locate(code); + return located.context || `Code '${code}' not found`; + } + + async filterCheck(_filterContext, set, concept) { + const ctxt = await this._ensureContext(concept); + if (!ctxt) return false; + if (this.#isPredicateFilter(set)) { + return this.#predicateMatchesContext(set, ctxt); + } + if (this.#isPagedDescendantFilter(set)) { + return this.#isA(set.ancestorId, ctxt.conceptId, !!set.includeSelf); + } + return set.has(ctxt.code); + } + + async buildKnownValueSet(_url, _version) { + void _url; + void _version; + return null; + } + + async #resolvePropertyDef(propertyCode) { + if (!propertyCode) return null; + if (this.propertyDefs.has(propertyCode)) { + return this.propertyDefs.get(propertyCode); + } + + const row = await get( + this.db, + `SELECT property_id, property_code, value_kind + FROM property_def + WHERE cs_id = ? + AND property_code = ? + LIMIT 1`, + [this.meta.csId, propertyCode] + ); + const result = row || null; + this.propertyDefs.set(propertyCode, result); + return result; + } + + async #resolvePropertyFilterConfig(propertyCode) { + if (!propertyCode) return null; + + const filtersCfg = this.runtime.filters?.properties; + if (!filtersCfg) { + const propertyDef = await this.#resolvePropertyDef(propertyCode); + if (!propertyDef) { + return null; + } + return { + propertyId: propertyDef.property_id, + propertyCode: propertyDef.property_code, + operators: ['=', 'in'], + sources: inferSourcesFromValueKind(propertyDef.value_kind), + linkMatch: 'code-only', + value: {}, + specialHandler: null + }; + } + + const aliases = filtersCfg.aliases || {}; + const rawCode = String(propertyCode); + const aliasTarget = aliases[rawCode] ?? aliases[rawCode.toLowerCase()]; + const resolvedCode = aliasTarget || rawCode; + + const byCode = filtersCfg.byCode || {}; + const specific = byCode[resolvedCode] || byCode[rawCode] || null; + if (!specific && filtersCfg.allPropertiesFilterable !== true) { + return null; + } + + const propertyDef = await this.#resolvePropertyDef(resolvedCode); + if (!propertyDef) { + return null; + } + + const operators = Array.isArray(specific?.operators) && specific.operators.length > 0 + ? specific.operators + : (Array.isArray(filtersCfg.defaultOperators) && filtersCfg.defaultOperators.length > 0 + ? filtersCfg.defaultOperators + : ['=']); + + const defaultSources = Array.isArray(filtersCfg.defaultSources) + ? filtersCfg.defaultSources + : inferSourcesFromValueKind(propertyDef.value_kind); + const sources = Array.isArray(specific?.sources) && specific.sources.length > 0 + ? specific.sources + : defaultSources; + + const cleanedSources = dedupSources(sources, propertyDef.value_kind); + const linkMatch = specific?.linkMatch || filtersCfg.defaultLinkMatch || 'code-only'; + const value = { + ...(filtersCfg.defaultValue || {}), + ...(specific?.value || {}) + }; + + return { + propertyId: propertyDef.property_id, + propertyCode: resolvedCode, + operators, + sources: cleanedSources, + linkMatch, + value, + specialHandler: specific?.specialHandler || null + }; + } + + async #filterByProperty(filterContext, propertyCfg, op, value) { + const filterName = `property-${propertyCfg.propertyCode}-${op}:${value}`; + + if (propertyCfg.specialHandler) { + const codes = await this.#runSpecialPropertyHandler(propertyCfg, op, value); + filterContext.filters.push(new SqliteRuntimeV0FilterSet(filterName, codes, true)); + return; + } + + if (this.#useMembershipPredicate(filterContext)) { + const predicate = await this.#buildPropertyPredicateFilter(filterName, propertyCfg, op, value); + if (predicate) { + filterContext.filters.push(predicate); + return; + } + } + + if (op === '=') { + const candidates = normalizedFilterCandidates(value, propertyCfg.value); + if (candidates.length === 0) { + filterContext.filters.push(new SqliteRuntimeV0FilterSet(filterName, [], true)); + return; + } + const codes = await this.#propertyEqualsCodes(propertyCfg, candidates); + filterContext.filters.push(new SqliteRuntimeV0FilterSet(filterName, codes, true)); + return; + } + + if (op === 'in') { + const members = splitFilterValueList(value); + const aggregate = new Set(); + for (const member of members) { + const candidates = normalizedFilterCandidates(member, propertyCfg.value); + if (candidates.length === 0) continue; + const codes = await this.#propertyEqualsCodes(propertyCfg, candidates); + for (const code of codes) { + aggregate.add(code); + } + } + filterContext.filters.push(new SqliteRuntimeV0FilterSet(filterName, Array.from(aggregate).sort(), true)); + return; + } + + if (op === 'exists') { + const codes = await this.#propertyExistsCodes(propertyCfg, value); + filterContext.filters.push(new SqliteRuntimeV0FilterSet(filterName, codes, true)); + return; + } + + if (op === 'regex') { + const codes = await this.#propertyRegexCodes(propertyCfg, value); + filterContext.filters.push(new SqliteRuntimeV0FilterSet(filterName, codes, true)); + return; + } + + throw new Error(`Unsupported sqlite runtime property operator '${op}'`); + } + + async #buildPropertyPredicateFilter(filterName, propertyCfg, op, value) { + const caseSensitive = propertyCfg.value?.caseSensitive === true; + + if (op === '=') { + const candidates = normalizedFilterCandidates(value, propertyCfg.value); + if (candidates.length === 0) { + return new SqliteRuntimeV0FilterSet(filterName, [], true); + } + return new SqliteRuntimeV0PredicateFilter( + filterName, + 'property-filter', + { propertyCfg, op, candidates, caseSensitive }, + true + ); + } + + if (op === 'in') { + const members = splitFilterValueList(value); + const aggregate = new Set(); + for (const member of members) { + const candidates = normalizedFilterCandidates(member, propertyCfg.value); + for (const candidate of candidates) { + aggregate.add(candidate); + } + } + const values = Array.from(aggregate); + if (values.length === 0) { + return new SqliteRuntimeV0FilterSet(filterName, [], true); + } + return new SqliteRuntimeV0PredicateFilter( + filterName, + 'property-filter', + { propertyCfg, op, candidates: values, caseSensitive }, + true + ); + } + + if (op === 'exists') { + const expectExists = String(value ?? 'true').toLowerCase() !== 'false'; + return new SqliteRuntimeV0PredicateFilter( + filterName, + 'property-filter', + { propertyCfg, op, expectExists }, + true + ); + } + + if (op === 'regex') { + try { + new RegExp(String(value || '')); + } catch (error) { + throw new Error(`Invalid regex '${value}': ${error.message}`); + } + return new SqliteRuntimeV0PredicateFilter( + filterName, + 'property-filter', + { propertyCfg, op, pattern: String(value || '') }, + true + ); + } + + return null; + } + + async #propertyEqualsCodes(propertyCfg, candidates) { + const codeSet = new Set(); + const caseSensitive = propertyCfg.value?.caseSensitive === true; + + if (propertyCfg.sources.includes('literal')) { + const rows = await this.#propertyLiteralEqualsRows(propertyCfg.propertyId, candidates, caseSensitive); + for (const row of rows) { + codeSet.add(row.code); + } + } + + if (propertyCfg.sources.includes('link')) { + const rows = await this.#propertyLinkEqualsRows(propertyCfg, candidates, caseSensitive); + for (const row of rows) { + codeSet.add(row.code); + } + } + + return Array.from(codeSet).sort(); + } + + async #propertyRegexCodes(propertyCfg, pattern) { + let regex; + try { + regex = new RegExp(String(pattern || '')); + } catch (error) { + throw new Error(`Invalid regex '${pattern}': ${error.message}`); + } + + const codeSet = new Set(); + + if (propertyCfg.sources.includes('literal')) { + const rows = await all( + this.db, + `SELECT c.code AS code, + COALESCE(cl.value_text, cl.value_raw) AS value + FROM concept_literal cl + JOIN concept c ON c.concept_id = cl.source_concept_id + WHERE cl.property_id = ? + AND cl.active = 1 + AND c.cs_id = ? + AND COALESCE(cl.value_text, cl.value_raw) IS NOT NULL`, + [propertyCfg.propertyId, this.meta.csId] + ); + for (const row of rows) { + if (regex.test(row.value)) { + codeSet.add(row.code); + } + } + } + + if (propertyCfg.sources.includes('link')) { + const rows = await all( + this.db, + `SELECT src.code AS code, + tgt.code AS target_code, + tgt.display AS target_display + FROM concept_link l + JOIN concept src ON src.concept_id = l.source_concept_id + JOIN concept tgt ON tgt.concept_id = l.target_concept_id + WHERE l.property_id = ? + AND l.edge_set_id = ? + AND l.active = 1 + AND src.cs_id = ?`, + [propertyCfg.propertyId, this.meta.hierarchyEdgeSetId, this.meta.csId] + ); + for (const row of rows) { + const codeMatch = row.target_code && regex.test(row.target_code); + const displayMatch = propertyCfg.linkMatch === 'code-or-display' && + row.target_display && regex.test(row.target_display); + if (codeMatch || displayMatch) { + codeSet.add(row.code); + } + } + } + + return Array.from(codeSet).sort(); + } + + async #propertyLiteralEqualsRows(propertyId, candidates, caseSensitive) { + if (!propertyId || !Array.isArray(candidates) || candidates.length === 0) { + return []; + } + + const normalized = candidates.map(v => String(v)); + const placeholders = normalized.map(() => '?').join(', '); + const textPredicate = caseSensitive + ? `cl.value_text IN (${placeholders})` + : `cl.value_text COLLATE NOCASE IN (${placeholders})`; + const rawPredicate = caseSensitive + ? `cl.value_raw IN (${placeholders})` + : `cl.value_raw COLLATE NOCASE IN (${placeholders})`; + + return all( + this.db, + `WITH matched_concepts AS ( + SELECT cl.source_concept_id AS concept_id + FROM concept_literal cl + WHERE cl.property_id = ? + AND cl.active = 1 + AND cl.value_text IS NOT NULL + AND ${textPredicate} + UNION + SELECT cl.source_concept_id AS concept_id + FROM concept_literal cl + WHERE cl.property_id = ? + AND cl.active = 1 + AND cl.value_text IS NULL + AND cl.value_raw IS NOT NULL + AND ${rawPredicate} + ) + SELECT DISTINCT c.code AS code + FROM matched_concepts m + JOIN concept c ON c.concept_id = m.concept_id + WHERE c.cs_id = ? + ORDER BY c.code`, + [propertyId, ...normalized, propertyId, ...normalized, this.meta.csId] + ); + } + + async #propertyLinkEqualsRows(propertyCfg, candidates, caseSensitive) { + if (!propertyCfg?.propertyId || !Array.isArray(candidates) || candidates.length === 0) { + return []; + } + + const normalized = candidates.map(v => String(v)); + const placeholders = normalized.map(() => '?').join(', '); + const codeExpr = caseSensitive + ? `code IN (${placeholders})` + : `code COLLATE NOCASE IN (${placeholders})`; + const displayExpr = caseSensitive + ? `display IN (${placeholders})` + : `display COLLATE NOCASE IN (${placeholders})`; + + const targetSql = [ + `SELECT concept_id + FROM concept + WHERE cs_id = ? + AND ${codeExpr}` + ]; + const params = [this.meta.csId, ...normalized]; + + if (propertyCfg.linkMatch === 'code-or-display') { + targetSql.push( + `SELECT concept_id + FROM concept + WHERE cs_id = ? + AND ${displayExpr}` + ); + params.push(this.meta.csId, ...normalized); + } + + return all( + this.db, + `WITH matched_targets AS ( + ${targetSql.join('\nUNION\n')} + ) + SELECT DISTINCT src.code AS code + FROM concept_link l + JOIN matched_targets mt ON mt.concept_id = l.target_concept_id + JOIN concept src ON src.concept_id = l.source_concept_id + WHERE src.cs_id = ? + AND l.property_id = ? + AND l.edge_set_id = ? + AND l.active = 1 + ORDER BY src.code`, + [...params, this.meta.csId, propertyCfg.propertyId, this.meta.hierarchyEdgeSetId] + ); + } + + async #propertyExistsCodes(propertyCfg, value) { + const expectExists = String(value ?? 'true').toLowerCase() !== 'false'; + const codeSet = new Set(); + + if (propertyCfg.sources.includes('literal')) { + const rows = await all( + this.db, + `SELECT DISTINCT c.code AS code + FROM concept_literal cl + JOIN concept c ON c.concept_id = cl.source_concept_id + WHERE cl.property_id = ? + AND cl.active = 1 + AND c.cs_id = ?`, + [propertyCfg.propertyId, this.meta.csId] + ); + for (const row of rows) { + codeSet.add(row.code); + } + } + + if (propertyCfg.sources.includes('link')) { + const rows = await all( + this.db, + `SELECT DISTINCT src.code AS code + FROM concept_link l + JOIN concept src ON src.concept_id = l.source_concept_id + WHERE l.property_id = ? + AND l.edge_set_id = ? + AND l.active = 1 + AND src.cs_id = ?`, + [propertyCfg.propertyId, this.meta.hierarchyEdgeSetId, this.meta.csId] + ); + for (const row of rows) { + codeSet.add(row.code); + } + } + + if (expectExists) { + return Array.from(codeSet).sort(); + } + + const allRows = await all( + this.db, + `SELECT code + FROM concept + WHERE cs_id = ?`, + [this.meta.csId] + ); + return allRows.map(r => r.code).filter(code => !codeSet.has(code)).sort(); + } + + async #runSpecialPropertyHandler(propertyCfg, op, value) { + const handler = propertyCfg.specialHandler; + if (handler && typeof handler === 'object' && handler.kind === 'derived-link-filter') { + return this.#runDerivedLinkPropertyHandler(propertyCfg, handler, op, value); + } + throw new Error(`Unsupported sqlite runtime special handler '${JSON.stringify(handler)}'`); + } + + async #runDerivedLinkPropertyHandler(propertyCfg, handler, op, value) { + if (!propertyCfg?.propertyId || !handler) { + return []; + } + + if (!['=', 'in'].includes(op)) { + throw new Error(`Unsupported sqlite runtime property operator '${op}' for derived-link-filter`); + } + + const values = this.#normalizedFilterValuesForSpecialHandler(op, value, propertyCfg.value); + if (values.length === 0) { + return []; + } + + const seedCfg = handler.seed || {}; + const seedCodes = new Set(); + const directPrefixes = Array.isArray(seedCfg.directCodePrefixes) + ? seedCfg.directCodePrefixes.map(v => String(v || '')).filter(Boolean) + : []; + const allowAnyDirect = seedCfg.allowAnyDirect === true; + + for (const raw of values) { + if (!raw) continue; + if (allowAnyDirect || directPrefixes.some(prefix => raw.startsWith(prefix))) { + seedCodes.add(raw); + } + } + + let inversePropertyId = null; + if (seedCfg.useCurrentPropertyAsInverse === true) { + inversePropertyId = propertyCfg.propertyId; + } else if (seedCfg.inversePropertyCode) { + const inversePropertyDef = await this.#resolvePropertyDef(seedCfg.inversePropertyCode); + inversePropertyId = inversePropertyDef?.property_id || null; + } + + if (inversePropertyId) { + const reverseMatches = await this.#sourceCodesForTargetCodes(inversePropertyId, values); + for (const code of reverseMatches) { + seedCodes.add(code); + } + } + + if (seedCodes.size === 0) { + return []; + } + + const projectionCfg = handler.projection || {}; + const projectionPropertyCode = projectionCfg.propertyCode; + if (!projectionPropertyCode) { + throw new Error('derived-link-filter handler requires projection.propertyCode'); + } + const projectionPropertyDef = await this.#resolvePropertyDef(projectionPropertyCode); + if (!projectionPropertyDef) { + return []; + } + + const side = projectionCfg.side === 'source' ? 'source' : 'target'; + return this.#codesFromSourceCodesViaProperty( + projectionPropertyDef.property_id, + Array.from(seedCodes), + side + ); + } + + #normalizedFilterValuesForSpecialHandler(op, value, valueCfg) { + const rawValues = op === 'in' + ? splitFilterValueList(value) + : [String(value ?? '').trim()]; + + const out = new Set(); + for (const raw of rawValues) { + const normalized = normalizedFilterCandidates(raw, valueCfg); + for (const entry of normalized) { + if (entry) out.add(entry); + } + } + return Array.from(out); + } + + async #sourceCodesForTargetCodes(propertyId, targetCodes) { + if (!propertyId || !Array.isArray(targetCodes) || targetCodes.length === 0) { + return []; + } + + const placeholders = targetCodes.map(() => '?').join(', '); + const rows = await all( + this.db, + `SELECT DISTINCT src.code AS source_code + FROM concept_link l + JOIN concept src ON src.concept_id = l.source_concept_id + JOIN concept tgt ON tgt.concept_id = l.target_concept_id + WHERE src.cs_id = ? + AND l.property_id = ? + AND l.edge_set_id = ? + AND l.active = 1 + AND tgt.code IN (${placeholders})`, + [this.meta.csId, propertyId, this.meta.hierarchyEdgeSetId, ...targetCodes] + ); + return rows.map(row => row.source_code).filter(Boolean); + } + + async #codesFromSourceCodesViaProperty(propertyId, sourceCodes, resultSide) { + if (!propertyId || !Array.isArray(sourceCodes) || sourceCodes.length === 0) { + return []; + } + + const placeholders = sourceCodes.map(() => '?').join(', '); + const rows = await all( + this.db, + `SELECT DISTINCT src.code AS source_code, + tgt.code AS target_code + FROM concept_link l + JOIN concept src ON src.concept_id = l.source_concept_id + JOIN concept tgt ON tgt.concept_id = l.target_concept_id + WHERE src.cs_id = ? + AND l.property_id = ? + AND l.edge_set_id = ? + AND l.active = 1 + AND src.code IN (${placeholders})`, + [this.meta.csId, propertyId, this.meta.hierarchyEdgeSetId, ...sourceCodes] + ); + + const picked = rows.map((row) => (resultSide === 'source' ? row.source_code : row.target_code)); + return Array.from(new Set(picked.filter(Boolean))).sort(); + } + + #canUseFtsSearch(searchCfg) { + if (searchCfg.mode !== 'fts-broad') { + return false; + } + const tables = searchCfg.ftsTables || {}; + const available = this.meta.searchFtsTables || {}; + + for (const source of searchCfg.sources) { + const table = tables[source]; + if (!table) { + return false; + } + if (available[table] !== true) { + return false; + } + } + return true; + } + + async #searchCodesWithFts(searchText, searchCfg) { + const sqlParts = []; + const params = []; + const activeClause = searchCfg.activeOnly ? ' AND c.active = 1' : ''; + const matchText = toFtsMatchText(searchText); + + for (const source of searchCfg.sources) { + if (source === 'display') { + const table = sqlIdentifier(searchCfg.ftsTables.display, 'search_fts_display'); + sqlParts.push( + `SELECT c.code AS code + FROM ${table} f + JOIN concept c ON c.concept_id = f.rowid + WHERE c.cs_id = ?${activeClause} + AND f.term MATCH ?` + ); + params.push(this.meta.csId, matchText); + continue; + } + + if (source === 'designation') { + const table = sqlIdentifier(searchCfg.ftsTables.designation, 'search_fts_designation'); + const designationClause = searchCfg.designationActiveOnly ? ' AND d.active = 1' : ''; + sqlParts.push( + `SELECT c.code AS code + FROM ${table} f + JOIN designation d ON d.designation_id = f.rowid + JOIN concept c ON c.concept_id = d.concept_id + WHERE c.cs_id = ?${activeClause}${designationClause} + AND f.term MATCH ?` + ); + params.push(this.meta.csId, matchText); + continue; + } + + if (source === 'literal') { + const table = sqlIdentifier(searchCfg.ftsTables.literal, 'search_fts_literal'); + const literalClause = searchCfg.literalActiveOnly ? ' AND cl.active = 1' : ''; + sqlParts.push( + `SELECT c.code AS code + FROM ${table} f + JOIN concept_literal cl ON cl.literal_id = f.rowid + JOIN concept c ON c.concept_id = cl.source_concept_id + WHERE c.cs_id = ?${activeClause}${literalClause} + AND f.term MATCH ?` + ); + params.push(this.meta.csId, matchText); + } + } + + if (sqlParts.length === 0) { + return []; + } + + const rows = await all( + this.db, + `SELECT DISTINCT code + FROM ( + ${sqlParts.join('\nUNION\n')} + ) + ORDER BY code`, + params + ); + return rows.map(r => r.code); + } + + async #searchCodesWithLike(searchText, searchCfg) { + const sqlParts = []; + const params = []; + const likeText = `%${searchText}%`; + const activeClause = searchCfg.activeOnly ? ' AND c.active = 1' : ''; + const likeExpr = searchCfg.likeFallback?.caseInsensitive === false + ? { display: 'c.display LIKE ?', designation: 'd.term LIKE ?', literal: 'COALESCE(cl.value_text, cl.value_raw) LIKE ?' } + : { + display: 'LOWER(c.display) LIKE LOWER(?)', + designation: 'LOWER(d.term) LIKE LOWER(?)', + literal: 'LOWER(COALESCE(cl.value_text, cl.value_raw)) LIKE LOWER(?)' + }; + + for (const source of searchCfg.sources) { + if (source === 'display') { + sqlParts.push( + `SELECT c.code AS code + FROM concept c + WHERE c.cs_id = ?${activeClause} + AND c.display IS NOT NULL + AND ${likeExpr.display}` + ); + params.push(this.meta.csId, likeText); + continue; + } + + if (source === 'designation') { + const designationClause = searchCfg.designationActiveOnly ? ' AND d.active = 1' : ''; + sqlParts.push( + `SELECT c.code AS code + FROM designation d + JOIN concept c ON c.concept_id = d.concept_id + WHERE c.cs_id = ?${activeClause}${designationClause} + AND d.term IS NOT NULL + AND ${likeExpr.designation}` + ); + params.push(this.meta.csId, likeText); + continue; + } + + if (source === 'literal') { + const literalClause = searchCfg.literalActiveOnly ? ' AND cl.active = 1' : ''; + sqlParts.push( + `SELECT c.code AS code + FROM concept_literal cl + JOIN concept c ON c.concept_id = cl.source_concept_id + WHERE c.cs_id = ?${activeClause}${literalClause} + AND COALESCE(cl.value_text, cl.value_raw) IS NOT NULL + AND ${likeExpr.literal}` + ); + params.push(this.meta.csId, likeText); + } + } + + if (sqlParts.length === 0) { + return []; + } + + const rows = await all( + this.db, + `SELECT DISTINCT code + FROM ( + ${sqlParts.join('\nUNION\n')} + ) + ORDER BY code`, + params + ); + return rows.map(r => r.code); + } + + async _ensureContext(code) { + if (!code) { + return null; + } + if (typeof code === 'string') { + const located = await this.locate(code); + return located.context; + } + if (code instanceof SqliteRuntimeV0Context) { + return code; + } + throw new Error(`Unknown context type: ${typeof code}`); + } + + #useMembershipPredicate(filterContext) { + return !!filterContext && filterContext.forIterate === false; + } + + #isPredicateFilter(set) { + return set instanceof SqliteRuntimeV0PredicateFilter; + } + + #isPagedDescendantFilter(set) { + return set instanceof SqliteRuntimeV0PagedDescendantFilter; + } + + #isQueryIterator(iteratorContext) { + return iteratorContext instanceof SqliteRuntimeV0QueryIterator; + } + + async #loadNextIteratorPage(iteratorContext) { + if (!this.#isQueryIterator(iteratorContext) || iteratorContext.done) { + return; + } + + const sql = []; + const params = []; + + if (iteratorContext.mode === 'all') { + sql.push( + `SELECT c.concept_id, c.code, c.display, c.definition, c.active`, + `FROM concept c`, + `WHERE c.cs_id = ?`, + ` AND c.active = 1` + ); + params.push(this.meta.csId); + } else if (iteratorContext.mode === 'roots') { + sql.push( + `SELECT c.concept_id, c.code, c.display, c.definition, c.active`, + `FROM concept c`, + `LEFT JOIN concept_link l`, + ` ON l.source_concept_id = c.concept_id`, + ` AND l.property_id = ?`, + ` AND l.edge_set_id = ?`, + ` AND l.active = 1`, + `WHERE c.cs_id = ?`, + ` AND c.active = 1`, + ` AND l.edge_id IS NULL` + ); + params.push(this.meta.hierarchyPropertyId, this.meta.hierarchyEdgeSetId, this.meta.csId); + } else if (iteratorContext.mode === 'children') { + if (!iteratorContext.targetConceptId) { + iteratorContext.done = true; + return; + } + sql.push( + `SELECT c.concept_id, c.code, c.display, c.definition, c.active`, + `FROM concept_link l`, + `JOIN concept c ON c.concept_id = l.source_concept_id`, + `WHERE l.target_concept_id = ?`, + ` AND l.property_id = ?`, + ` AND l.edge_set_id = ?`, + ` AND l.active = 1`, + ` AND c.cs_id = ?` + ); + params.push( + iteratorContext.targetConceptId, + this.meta.hierarchyPropertyId, + this.meta.hierarchyEdgeSetId, + this.meta.csId + ); + } else { + iteratorContext.done = true; + return; + } + + if (iteratorContext.lastCode !== null) { + sql.push(`AND c.code > ?`); + params.push(iteratorContext.lastCode); + } + + sql.push(`ORDER BY c.code`); + sql.push(`LIMIT ?`); + params.push(iteratorContext.pageSize); + + const rows = await all(this.db, sql.join('\n'), params); + iteratorContext.rows = []; + iteratorContext.cursor = 0; + + if (!rows.length) { + iteratorContext.done = true; + return; + } + + for (const row of rows) { + if (!this.#allowDefaultIterationCode(row.code)) { + continue; + } + iteratorContext.rows.push( + new SqliteRuntimeV0Context( + row.concept_id, + row.code, + row.display, + row.definition, + row.active === 1 + ) + ); + } + + iteratorContext.lastCode = rows[rows.length - 1].code; + if (rows.length < iteratorContext.pageSize) { + iteratorContext.done = true; + } + } + + async #loadNextDescendantPage(set) { + if (!this.#isPagedDescendantFilter(set) || set.done) { + return; + } + + if (!set.strategy) { + const countRow = await get( + this.db, + `SELECT COUNT(*) AS n + FROM closure + WHERE ancestor_id = ?`, + [set.ancestorId] + ); + const rawCount = Math.max(0, countRow?.n || 0); + set.descendantCount = set.includeSelf ? rawCount : Math.max(0, rawCount - 1); + const threshold = Number(this.runtime?.hierarchy?.closure?.conceptScanThreshold || 25000); + set.strategy = set.descendantCount >= threshold ? 'concept-scan' : 'closure-join'; + } + + let rows; + if (set.strategy === 'concept-scan') { + const sql = [ + `SELECT c.concept_id, c.code, c.display, c.definition, c.active`, + `FROM concept c`, + `WHERE c.cs_id = ?`, + ` AND EXISTS (`, + ` SELECT 1`, + ` FROM closure cl`, + ` WHERE cl.ancestor_id = ?`, + ` AND cl.descendant_id = c.concept_id`, + ` )` + ]; + const params = [this.meta.csId, set.ancestorId]; + + if (!set.includeSelf) { + sql.push(`AND c.concept_id <> ?`); + params.push(set.ancestorId); + } + if (set.lastCode !== null) { + sql.push(`AND c.code > ?`); + params.push(set.lastCode); + } + + sql.push(`ORDER BY c.code`); + sql.push(`LIMIT ?`); + params.push(set.pageSize); + rows = await all(this.db, sql.join('\n'), params); + } else { + const sql = [ + `SELECT c.concept_id, c.code, c.display, c.definition, c.active`, + `FROM closure cl`, + `JOIN concept c ON c.concept_id = cl.descendant_id`, + `WHERE cl.ancestor_id = ?` + ]; + const params = [set.ancestorId]; + + if (!set.includeSelf) { + sql.push(`AND cl.descendant_id <> ?`); + params.push(set.ancestorId); + } + if (set.lastCode !== null) { + sql.push(`AND c.code > ?`); + params.push(set.lastCode); + } + + sql.push(`ORDER BY c.code`); + sql.push(`LIMIT ?`); + params.push(set.pageSize); + rows = await all(this.db, sql.join('\n'), params); + } + + if (!rows.length) { + set.done = true; + return; + } + + for (const row of rows) { + set.rows.push( + new SqliteRuntimeV0Context( + row.concept_id, + row.code, + row.display, + row.definition, + row.active === 1 + ) + ); + } + + set.lastCode = rows[rows.length - 1].code; + if (rows.length < set.pageSize) { + set.done = true; + } + } + + async #filterLocatePredicate(set, code) { + if (!code) { + return `Code '${code}' not found in filter set`; + } + const located = await this.locate(code); + if (!located.context) { + return `Code '${code}' not found in filter set`; + } + const ok = await this.#predicateMatchesContext(set, located.context); + if (ok) { + return located.context; + } + return this.#predicateNotFoundMessage(set, code); + } + + async #predicateMatchesContext(set, context) { + const ctxt = await this._ensureContext(context); + if (!ctxt) return false; + + if (set.kind === 'concept-equals') { + return ctxt.code === set.code; + } + + if (set.kind === 'concept-hierarchy') { + if (!set.ancestorId) return false; + return this.#isA(set.ancestorId, ctxt.conceptId, !!set.includeSelf); + } + + if (set.kind === 'concept-in') { + return this.#isConceptInValueSet(ctxt.conceptId, set.valueSetUrl); + } + + if (set.kind === 'property-filter') { + return this.#matchesPropertyPredicate(set, ctxt); + } + + throw new Error(`Unknown predicate filter kind '${set.kind}'`); + } + + #predicateNotFoundMessage(set, code) { + if (set.kind === 'concept-hierarchy') { + if (set.missingMessage) { + return set.missingMessage; + } + return `Code '${code}' is not in hierarchy of '${set.parentCode}'`; + } + + if (set.kind === 'concept-in') { + return `Code '${code}' not found in value set '${set.valueSetUrl}'`; + } + + if (set.kind === 'concept-equals') { + return `Code '${code}' does not equal '${set.code}'`; + } + + return `Code '${code}' not found in filter set`; + } + + async #matchesPropertyPredicate(set, context) { + const propertyCfg = set.propertyCfg; + if (!propertyCfg?.propertyId || !context?.conceptId) { + return false; + } + + if (set.op === '=') { + return this.#conceptMatchesPropertyEquals( + context.conceptId, + propertyCfg, + set.candidates || [], + set.caseSensitive === true + ); + } + + if (set.op === 'in') { + return this.#conceptMatchesPropertyEquals( + context.conceptId, + propertyCfg, + set.candidates || [], + set.caseSensitive === true + ); + } + + if (set.op === 'exists') { + return this.#conceptMatchesPropertyExists( + context.conceptId, + propertyCfg, + set.expectExists !== false + ); + } + + if (set.op === 'regex') { + return this.#conceptMatchesPropertyRegex( + context.conceptId, + propertyCfg, + set.pattern || '' + ); + } + + throw new Error(`Unsupported property predicate operator '${set.op}'`); + } + + async #conceptMatchesPropertyEquals(conceptId, propertyCfg, candidates, caseSensitive) { + if (!Array.isArray(candidates) || candidates.length === 0) { + return false; + } + + const values = candidates.map(v => String(v)); + if (propertyCfg.sources.includes('literal')) { + const literalHit = await this.#conceptHasLiteralEquals(conceptId, propertyCfg.propertyId, values, caseSensitive); + if (literalHit) { + return true; + } + } + + if (propertyCfg.sources.includes('link')) { + const linkHit = await this.#conceptHasLinkEquals(conceptId, propertyCfg, values, caseSensitive); + if (linkHit) { + return true; + } + } + + return false; + } + + async #conceptHasLiteralEquals(conceptId, propertyId, values, caseSensitive) { + if (!conceptId || !propertyId || !Array.isArray(values) || values.length === 0) { + return false; + } + + const placeholders = values.map(() => '?').join(', '); + const textPredicate = caseSensitive + ? `cl.value_text IN (${placeholders})` + : `cl.value_text COLLATE NOCASE IN (${placeholders})`; + const rawPredicate = caseSensitive + ? `cl.value_raw IN (${placeholders})` + : `cl.value_raw COLLATE NOCASE IN (${placeholders})`; + + const row = await get( + this.db, + `SELECT 1 AS found + FROM concept_literal cl + WHERE cl.source_concept_id = ? + AND cl.property_id = ? + AND cl.active = 1 + AND ( + (cl.value_text IS NOT NULL AND ${textPredicate}) + OR + (cl.value_text IS NULL AND cl.value_raw IS NOT NULL AND ${rawPredicate}) + ) + LIMIT 1`, + [conceptId, propertyId, ...values, ...values] + ); + return !!row; + } + + async #conceptHasLinkEquals(conceptId, propertyCfg, values, caseSensitive) { + if (!conceptId || !propertyCfg?.propertyId || !Array.isArray(values) || values.length === 0) { + return false; + } + + const placeholders = values.map(() => '?').join(', '); + const codePredicate = caseSensitive + ? `tgt.code IN (${placeholders})` + : `tgt.code COLLATE NOCASE IN (${placeholders})`; + const displayPredicate = caseSensitive + ? `tgt.display IN (${placeholders})` + : `tgt.display COLLATE NOCASE IN (${placeholders})`; + const where = propertyCfg.linkMatch === 'code-or-display' + ? `(${codePredicate} OR ${displayPredicate})` + : `(${codePredicate})`; + + const params = [ + conceptId, + propertyCfg.propertyId, + this.meta.hierarchyEdgeSetId, + this.meta.csId, + ...values + ]; + if (propertyCfg.linkMatch === 'code-or-display') { + params.push(...values); + } + + const row = await get( + this.db, + `SELECT 1 AS found + FROM concept_link l + JOIN concept tgt ON tgt.concept_id = l.target_concept_id + WHERE l.source_concept_id = ? + AND l.property_id = ? + AND l.edge_set_id = ? + AND l.active = 1 + AND tgt.cs_id = ? + AND ${where} + LIMIT 1`, + params + ); + return !!row; + } + + async #conceptMatchesPropertyExists(conceptId, propertyCfg, expectExists) { + let found = false; + if (propertyCfg.sources.includes('literal')) { + found = found || await this.#conceptHasLiteralAny(conceptId, propertyCfg.propertyId); + } + if (propertyCfg.sources.includes('link')) { + found = found || await this.#conceptHasLinkAny(conceptId, propertyCfg.propertyId); + } + return expectExists ? found : !found; + } + + async #conceptHasLiteralAny(conceptId, propertyId) { + const row = await get( + this.db, + `SELECT 1 AS found + FROM concept_literal + WHERE source_concept_id = ? + AND property_id = ? + AND active = 1 + LIMIT 1`, + [conceptId, propertyId] + ); + return !!row; + } + + async #conceptHasLinkAny(conceptId, propertyId) { + const row = await get( + this.db, + `SELECT 1 AS found + FROM concept_link + WHERE source_concept_id = ? + AND property_id = ? + AND edge_set_id = ? + AND active = 1 + LIMIT 1`, + [conceptId, propertyId, this.meta.hierarchyEdgeSetId] + ); + return !!row; + } + + async #conceptMatchesPropertyRegex(conceptId, propertyCfg, pattern) { + let regex; + try { + regex = new RegExp(String(pattern || '')); + } catch (error) { + throw new Error(`Invalid regex '${pattern}': ${error.message}`); + } + + if (propertyCfg.sources.includes('literal')) { + const rows = await all( + this.db, + `SELECT COALESCE(value_text, value_raw) AS value + FROM concept_literal + WHERE source_concept_id = ? + AND property_id = ? + AND active = 1 + AND COALESCE(value_text, value_raw) IS NOT NULL`, + [conceptId, propertyCfg.propertyId] + ); + for (const row of rows) { + if (row.value && regex.test(row.value)) { + return true; + } + } + } + + if (propertyCfg.sources.includes('link')) { + const rows = await all( + this.db, + `SELECT tgt.code AS target_code, + tgt.display AS target_display + FROM concept_link l + JOIN concept tgt ON tgt.concept_id = l.target_concept_id + WHERE l.source_concept_id = ? + AND l.property_id = ? + AND l.edge_set_id = ? + AND l.active = 1`, + [conceptId, propertyCfg.propertyId, this.meta.hierarchyEdgeSetId] + ); + for (const row of rows) { + const codeMatch = row.target_code && regex.test(row.target_code); + const displayMatch = propertyCfg.linkMatch === 'code-or-display' && + row.target_display && regex.test(row.target_display); + if (codeMatch || displayMatch) { + return true; + } + } + } + + return false; + } + + async #isConceptInValueSet(conceptId, valueSetUrl) { + if (!conceptId || !valueSetUrl) return false; + const row = await get( + this.db, + `SELECT 1 AS found + FROM value_set v + JOIN value_set_member m ON m.vs_id = v.vs_id + WHERE v.cs_id = ? + AND v.url = ? + AND m.active = 1 + AND m.concept_id = ? + LIMIT 1`, + [this.meta.csId, valueSetUrl, conceptId] + ); + return !!row; + } + + async #isA(ancestorId, descendantId, includeSelf) { + if (!this.meta.hierarchyPropertyId) return false; + if (!ancestorId || !descendantId) return false; + if (ancestorId === descendantId && includeSelf) return true; + + if (this.meta.closureRows > 0 && this.meta.useClosure) { + const row = await get( + this.db, + `SELECT 1 AS found + FROM closure + WHERE ancestor_id = ? + AND descendant_id = ? + LIMIT 1`, + [ancestorId, descendantId] + ); + if (!row) return false; + if (!includeSelf && ancestorId === descendantId) return false; + return true; + } + + if (!this.#allowRecursiveHierarchyFallback()) { + return false; + } + + const row = await get( + this.db, + `WITH RECURSIVE descendants(concept_id) AS ( + SELECT ? + UNION + SELECT l.source_concept_id + FROM concept_link l + JOIN descendants d ON d.concept_id = l.target_concept_id + WHERE l.property_id = ? + AND l.edge_set_id = ? + AND l.active = 1 + ) + SELECT 1 AS found + FROM descendants + WHERE concept_id = ? + LIMIT 1`, + [ancestorId, this.meta.hierarchyPropertyId, this.meta.hierarchyEdgeSetId, descendantId] + ); + + if (!row) return false; + if (!includeSelf && ancestorId === descendantId) return false; + return true; + } + + async #descendantCodes(ancestorCode, includeSelf) { + if (!ancestorCode) return []; + const ancestorContext = await this._ensureContext(ancestorCode); + if (!ancestorContext) return []; + const ancestorId = ancestorContext.conceptId; + + if (this.meta.closureRows > 0 && this.meta.useClosure) { + const rows = await all( + this.db, + `SELECT c.code, cl.descendant_id + FROM closure cl + JOIN concept c ON c.concept_id = cl.descendant_id + WHERE cl.ancestor_id = ? + ORDER BY c.code`, + [ancestorId] + ); + return rows + .filter(r => includeSelf || r.descendant_id !== ancestorId) + .map(r => r.code); + } + + if (!this.#allowRecursiveHierarchyFallback()) { + return []; + } + + const rows = await all( + this.db, + `WITH RECURSIVE descendants(concept_id, depth) AS ( + SELECT ?, 0 + UNION + SELECT l.source_concept_id, descendants.depth + 1 + FROM concept_link l + JOIN descendants ON descendants.concept_id = l.target_concept_id + WHERE l.property_id = ? + AND l.edge_set_id = ? + AND l.active = 1 + ) + SELECT c.code, descendants.concept_id + FROM descendants + JOIN concept c ON c.concept_id = descendants.concept_id + ORDER BY c.code`, + [ancestorId, this.meta.hierarchyPropertyId, this.meta.hierarchyEdgeSetId] + ); + + return rows + .filter(r => includeSelf || r.concept_id !== ancestorId) + .map(r => r.code); + } + + async #batchLoadContextsByCodes(codes) { + if (!Array.isArray(codes) || codes.length === 0) { + return []; + } + + const placeholders = codes.map(() => '?').join(', '); + const rows = await all( + this.db, + `SELECT concept_id, code, display, definition, active + FROM concept + WHERE cs_id = ? + AND code IN (${placeholders})`, + [this.meta.csId, ...codes] + ); + + const byCode = new Map(); + for (const row of rows) { + byCode.set(row.code, row); + } + + const contexts = []; + for (const code of codes) { + const row = byCode.get(code); + if (!row) { + continue; + } + contexts.push( + new SqliteRuntimeV0Context( + row.concept_id, + row.code, + row.display, + row.definition, + row.active === 1 + ) + ); + } + + return contexts; + } + + #allowDefaultIterationCode(code) { + if (!this.defaultIterationRegex) { + return true; + } + return this.defaultIterationRegex.test(String(code || '')); + } + + #allowRecursiveHierarchyFallback() { + return this.runtime?.hierarchy?.closure?.fallbackRecursive !== false; + } +} + +class SqliteRuntimeV0FactoryProvider extends CodeSystemFactoryProvider { + + /** + * Register a v0 specialization. Subclass modules call this at require-time + * to declare interest in specific terminologies. + * + * @param {Object} definition + * @param {string} definition.id - Unique identifier (e.g. 'snomed-expressions') + * @param {Function} definition.FactoryClass - Subclass of SqliteRuntimeV0FactoryProvider + * @param {string} [definition.systemPrefix] - URL prefix to match against + * the code system's canonical URI (e.g. 'http://snomed.info/sct') + * @param {string[]} [definition.tags] - All listed tags must be present in + * the db's behaviorFlags.tags for a match + * @param {number} [definition.priority=0] - Higher wins on conflict + */ + static registerSpecialization(definition) { + if (!definition || typeof definition !== 'object') { + throw new Error('registerSpecialization requires an object definition'); + } + if (typeof definition.FactoryClass !== 'function') { + throw new Error('registerSpecialization requires a FactoryClass'); + } + if (!definition.systemPrefix && (!definition.tags || definition.tags.length === 0)) { + throw new Error('registerSpecialization requires systemPrefix and/or tags'); + } + V0_SPECIALIZATION_REGISTRY.push({ + id: String(definition.id || `v0-spec-${V0_SPECIALIZATION_REGISTRY.length + 1}`), + priority: Number.isFinite(definition.priority) ? definition.priority : 0, + systemPrefix: definition.systemPrefix || null, + tags: Array.isArray(definition.tags) ? definition.tags : [], + FactoryClass: definition.FactoryClass + }); + V0_SPECIALIZATION_REGISTRY.sort((a, b) => b.priority - a.priority); + } + + static listSpecializations() { + return V0_SPECIALIZATION_REGISTRY.map(e => ({ + id: e.id, priority: e.priority, + systemPrefix: e.systemPrefix, tags: [...e.tags] + })); + } + + /** + * Load a v0 database file and return the appropriate factory. Probes the + * db metadata (canonical URI + tags), checks the specialization registry, + * and returns a specialized factory if one matches — otherwise the generic. + * + * @param {Object} options + * @param {string} [options.specialization] - Override automatic matching: + * 'none' forces the generic base factory; a specific id (e.g. + * 'snomed-expressions') selects that entry from the registry. + * Omit to use automatic tag/URL matching (the default). + */ + static async createFromMetadata(i18n, dbPath, options = {}) { + const probe = new SqliteRuntimeV0FactoryProvider(i18n, dbPath, options); + await probe.load(); + + const { specialization } = options; + + // 'none' = force generic base, skip registry + if (specialization === 'none') { + return probe; + } + + const system = probe.system() || ''; + const flags = probe._runtime?.behaviorFlags || {}; + const dbTags = new Set(Array.isArray(flags.tags) ? flags.tags : []); + + for (const entry of V0_SPECIALIZATION_REGISTRY) { + // If a specific specialization was requested, only match that id + if (specialization && entry.id !== specialization) continue; + + const urlMatch = !entry.systemPrefix || system.startsWith(entry.systemPrefix); + const tagMatch = entry.tags.length === 0 || entry.tags.every(t => dbTags.has(t)); + if (urlMatch && tagMatch) { + probe.close(); + const resolved = new entry.FactoryClass(i18n, dbPath, options); + await resolved.load(); + return resolved; + } + } + + return probe; + } + + constructor(i18n, dbPath, options = {}) { + super(i18n); + this.dbPath = dbPath; + this.idPrefix = options.idPrefix || 'sqlite-runtime-v0'; + this._loaded = false; + this._loadPromise = null; + this._db = null; + this._meta = null; + this._runtime = null; + this._sharedState = { + statusByPropertyId: new Map(), + statusLoadPromises: new Map(), + childTargetSets: new Map(), + childTargetLoadPromises: new Map() + }; + } + + system() { + return this._meta?.baseUri || null; + } + + version() { + return this._meta?.canonicalUri || this._meta?.version || null; + } + + getPartialVersion() { + const v = this.version(); + if (!v) return null; + const idx = v.indexOf('/version/'); + if (idx === -1) return null; + return v.substring(0, idx); + } + + name() { + return this._meta?.name || this.system() || 'SQLite Runtime'; + } + + defaultVersion() { + return this._meta?.version || 'unknown'; + } + + async load() { + if (this._loaded) { + return; + } + if (!this._loadPromise) { + this._loadPromise = (async () => { + this._db = await openDb(this.dbPath, true); + const db = this._db; + + const codeSystem = await get( + db, + `SELECT cs_id, base_uri, canonical_uri, edition_code, version, name + FROM code_system + ORDER BY cs_id DESC + LIMIT 1`, + [] + ); + + if (!codeSystem) { + throw new Error(`No code_system rows found in ${this.dbPath}`); + } + + const totalRow = await get( + db, + 'SELECT COUNT(*) AS n FROM concept WHERE cs_id = ?', + [codeSystem.cs_id] + ); + + const cfgRows = await all( + db, + `SELECT key, value + FROM cs_config + WHERE cs_id = ?`, + [codeSystem.cs_id] + ); + const cfg = {}; + for (const row of cfgRows) { + cfg[row.key] = parseConfigValue(row.value); + } + + const runtime = buildRuntimeConfig(cfg, codeSystem.base_uri); + const searchCfg = normalizedSearchConfig(runtime.search); + + let hierarchyPropertyCode = runtime.hierarchy?.propertyCode || null; + if (!hierarchyPropertyCode) { + const hierarchyRow = await get( + db, + `SELECT property_code + FROM property_def + WHERE cs_id = ? AND is_hierarchy = 1 + ORDER BY property_id + LIMIT 1`, + [codeSystem.cs_id] + ); + hierarchyPropertyCode = hierarchyRow?.property_code || null; + } + + let hierarchyPropertyId = null; + if (hierarchyPropertyCode) { + const hierarchyPropRow = await get( + db, + `SELECT property_id + FROM property_def + WHERE cs_id = ? AND property_code = ?`, + [codeSystem.cs_id, hierarchyPropertyCode] + ); + hierarchyPropertyId = hierarchyPropRow?.property_id || null; + } + + const closureCountRow = await get(db, `SELECT COUNT(*) AS n FROM closure`, []); + const searchFtsTables = {}; + for (const source of searchCfg.sources) { + const configured = searchCfg.ftsTables?.[source]; + if (!configured) continue; + const table = sqlIdentifier(configured, configured); + if (!table) continue; + const exists = await get( + db, + `SELECT 1 AS found + FROM sqlite_master + WHERE type = 'table' AND name = ? + LIMIT 1`, + [table] + ); + searchFtsTables[table] = !!exists; + } + const designationOrderIndex = await get( + db, + `SELECT 1 AS found + FROM sqlite_master + WHERE type = 'index' AND name = 'idx_designation_concept_pref_term' + LIMIT 1`, + [] + ); + + this._meta = { + csId: codeSystem.cs_id, + baseUri: codeSystem.base_uri, + canonicalUri: codeSystem.canonical_uri, + editionCode: codeSystem.edition_code, + version: codeSystem.version, + name: codeSystem.name || codeSystem.base_uri, + totalConcepts: totalRow ? totalRow.n : 0, + defaultLanguage: runtime.languages?.default || 'en', + closureRows: closureCountRow ? closureCountRow.n : 0, + hierarchyPropertyId, + hierarchyEdgeSetId: runtime.hierarchy?.edgeSetId || 1, + useClosure: runtime.hierarchy?.closure?.enabled !== false, + searchFtsTables, + designationOrderIndex: !!designationOrderIndex + }; + this._runtime = runtime; + this._loaded = true; + })(); + } + await this._loadPromise; + } + + async build(opContext, supplements) { + if (!this._loaded) { + await this.load(); + } + + this.recordUse(); + + return new SqliteRuntimeV0Provider(opContext, supplements, this._db, this._meta, this._runtime, { + ownsDb: false, + sharedState: this._sharedState, + dbPath: this.dbPath + }); + } + + async buildKnownValueSet(url, version) { + if (!this._loaded) { + await this.load(); + } + + if (!url || !this.system() || !url.startsWith(this.system())) { + return null; + } + + if (version && this._meta.canonicalUri && !this._meta.canonicalUri.startsWith(version)) { + return null; + } + + const qIndex = url.indexOf('?'); + if (qIndex < 0) { + return null; + } + + const query = url.substring(qIndex + 1); + const implicit = this._runtime.implicitValueSets || {}; + + if (Array.isArray(implicit.all?.queries) && implicit.all.queries.includes(query)) { + return { + resourceType: 'ValueSet', + url, + version: this._meta.version, + status: 'active', + name: `${sanitizeName(this.system())}All`, + title: `${this.name()} All Concepts`, + description: `All concepts from ${this.name()}`, + compose: { include: [{ system: this.system() }] } + }; + } + + for (const [name, cfg] of Object.entries(implicit)) { + if (!cfg || !cfg.queryPrefix || !cfg.filter) continue; + if (!query.startsWith(cfg.queryPrefix)) continue; + + const suffix = query.substring(cfg.queryPrefix.length); + const filterValue = cfg.filter.valueFromSuffix ? suffix : cfg.filter.value; + return { + resourceType: 'ValueSet', + url, + version: this._meta.version, + status: 'active', + name: `${sanitizeName(this.system())}${name}${suffix}`, + compose: { + include: [{ + system: this.system(), + filter: [{ + property: cfg.filter.property, + op: cfg.filter.op, + value: filterValue + }] + }] + } + }; + } + + return null; + } + + id() { + return `${this.idPrefix}:${this._meta?.version || 'unknown'}`; + } + + close() { + if (!this._db) { + return; + } + this._db.close(); + this._db = null; + this._loaded = false; + this._loadPromise = null; + } +} + +function buildRuntimeConfig(cfg, system) { + const searchCfg = normalizedSearchConfig(cfg['runtime.search']); + + const runtime = { + versioning: cfg['runtime.versioning'] || { algorithm: 'string', partialMatch: true }, + languages: cfg['runtime.languages'] || { default: 'en' }, + designations: cfg['runtime.designations'] || {}, + hierarchy: cfg['runtime.hierarchy'] || { + propertyCode: null, + edgeSetId: 1, + closure: { enabled: true, fallbackRecursive: false } + }, + filters: cfg['runtime.filters'] || { + concept: { operators: ['=', 'is-a', 'descendent-of', 'in'] }, + code: { operators: ['regex'] } + }, + implicitValueSets: cfg['runtime.implicitValueSets'] || defaultImplicitValueSets(system), + status: cfg['runtime.status'] || { + inactive: { source: 'concept.active', invert: true }, + deprecated: { source: 'constant', value: false }, + abstract: { source: 'constant', value: false } + }, + iteration: cfg['runtime.iteration'] || {}, + search: searchCfg, + behaviorFlags: cfg['runtime.behaviorFlags'] || {} + }; + + if (!runtime.hierarchy.edgeSetId) runtime.hierarchy.edgeSetId = 1; + if (!runtime.languages.default) runtime.languages.default = 'en'; + + return runtime; +} + +function normalizedSearchConfig(raw) { + const value = raw || {}; + + const sources = Array.isArray(value.sources) && value.sources.length > 0 + ? value.sources.filter(s => ['display', 'designation', 'literal'].includes(s)) + : ['designation']; + + return { + mode: value.mode || 'like', + activeOnly: value.activeOnly !== false, + designationActiveOnly: value.designationActiveOnly !== false, + literalActiveOnly: value.literalActiveOnly !== false, + sources, + ftsTables: { + display: value.ftsTables?.display || 'search_fts_display', + designation: value.ftsTables?.designation || 'search_fts_designation', + literal: value.ftsTables?.literal || 'search_fts_literal' + }, + likeFallback: { + enabled: value.likeFallback?.enabled !== false, + caseInsensitive: value.likeFallback?.caseInsensitive !== false + } + }; +} + +function defaultImplicitValueSets(system) { + return { + all: { queries: ['fhir_vs', 'fhir_vs=all'] }, + isa: { queryPrefix: 'fhir_vs=isa/', filter: { property: 'concept', op: 'is-a', valueFromSuffix: true } }, + refset: { queryPrefix: 'fhir_vs=refset/', filter: { property: 'concept', op: 'in', valueFromSuffix: true } }, + _system: system + }; +} + +function inferSourcesFromValueKind(valueKind) { + if (valueKind === 'literal') { + return ['literal']; + } + if (valueKind === 'concept') { + return ['link']; + } + return ['literal', 'link']; +} + +function dedupSources(sources, valueKind) { + const input = Array.isArray(sources) && sources.length > 0 + ? sources + : inferSourcesFromValueKind(valueKind); + const cleaned = []; + for (const source of input) { + if ((source === 'literal' || source === 'link') && !cleaned.includes(source)) { + cleaned.push(source); + } + } + if (cleaned.length === 0) { + return inferSourcesFromValueKind(valueKind); + } + return cleaned; +} + +function normalizedFilterCandidates(value, valueCfg) { + const raw = String(value ?? '').trim(); + if (!raw) return []; + + const cfg = valueCfg || {}; + const normalizeCase = cfg.normalizeCase !== false; + const aliases = cfg.aliases || {}; + + const out = new Set(); + out.add(raw); + + const rawKey = normalizeCase ? raw.toLowerCase() : raw; + let alias = aliases[raw]; + if (alias === undefined) { + alias = aliases[rawKey]; + } + if (alias !== undefined && alias !== null && String(alias).trim() !== '') { + out.add(String(alias).trim()); + } + + return Array.from(out); +} + +function splitFilterValueList(value) { + if (Array.isArray(value)) { + return value.map(v => String(v ?? '').trim()).filter(Boolean); + } + return String(value ?? '') + .split(',') + .map(v => v.trim()) + .filter(Boolean); +} + +function resolveInValueSetUrl(system, value, runtime) { + if (typeof value === 'string' && value.startsWith('http://')) return value; + if (typeof value === 'string' && value.startsWith('https://')) return value; + + const refsetCfg = runtime.implicitValueSets?.refset; + if (refsetCfg?.urlTemplate) { + return refsetCfg.urlTemplate + .replace('{system}', system) + .replace('{value}', extractRefsetId(value)); + } + + return `${system}?fhir_vs=refset/${extractRefsetId(value)}`; +} + +function extractRefsetId(value) { + if (!value) return value; + const marker = 'refset/'; + const idx = value.indexOf(marker); + if (idx === -1) return value; + return value.substring(idx + marker.length); +} + +function useFromDesignation(row, runtime, system) { + const map = runtime.designations?.useMapping; + if (map && row.use_code && map[row.use_code]) { + return map[row.use_code]; + } + + if (row.use_code) { + return { + system: runtime.designations?.defaultSystem || system, + code: row.use_code, + display: row.use_code + }; + } + + return CodeSystem.makeUseForDisplay(); +} + +function sanitizeName(system) { + return (system || 'CS').replace(/[^A-Za-z0-9]/g, '').slice(0, 40) || 'CS'; +} + +function toFtsMatchText(text) { + // Use phrase syntax to avoid accidental MATCH operators from user input. + return `"${String(text || '').replace(/"/g, '""')}"`; +} + +function sqlIdentifier(name, fallback) { + const primary = typeof name === 'string' ? name : null; + if (primary && /^[A-Za-z_][A-Za-z0-9_]*$/.test(primary)) { + return primary; + } + if (typeof fallback === 'string' && /^[A-Za-z_][A-Za-z0-9_]*$/.test(fallback)) { + return fallback; + } + return null; +} + +function parseConfigValue(value) { + if (value === null || value === undefined) return null; + if (typeof value !== 'string') return value; + + try { + return JSON.parse(value); + } catch (_error) { + return value; + } +} + +function openDb(dbPath, readOnly) { + return new Promise((resolve, reject) => { + const flags = readOnly ? sqlite3.OPEN_READONLY : sqlite3.OPEN_READWRITE; + const db = new sqlite3.Database(dbPath, flags, (err) => { + if (err) reject(err); + else resolve(db); + }); + }); +} + +function closeDb(db) { + return new Promise((resolve, reject) => { + db.close((err) => { + if (err) reject(err); + else resolve(); + }); + }); +} + +function get(db, sql, params = []) { + return new Promise((resolve, reject) => { + db.get(sql, params, (err, row) => { + if (err) reject(err); + else resolve(row); + }); + }); +} + +function all(db, sql, params = []) { + return new Promise((resolve, reject) => { + db.all(sql, params, (err, rows) => { + if (err) reject(err); + else resolve(rows || []); + }); + }); +} + +module.exports = { + SqliteRuntimeV0FactoryProvider, + SqliteRuntimeV0Provider, + SqliteRuntimeV0Context, + SqliteRuntimeV0FilterSet +}; + +// Auto-discover v0 specializations in this directory. +// Any cs-sqlite-*.js file that calls registerSpecialization() will be loaded. +const { readdirSync } = require('fs'); +const { join } = require('path'); +const _self = 'cs-sqlite-runtime-v0.js'; +for (const f of readdirSync(__dirname)) { + if (f !== _self && f.startsWith('cs-sqlite-') && f.endsWith('.js')) { + try { require(join(__dirname, f)); } catch (_) {} + } +} diff --git a/tx/cs/cs-sqlite-snomed-v0.js b/tx/cs/cs-sqlite-snomed-v0.js new file mode 100644 index 0000000..3e81bff --- /dev/null +++ b/tx/cs/cs-sqlite-snomed-v0.js @@ -0,0 +1,276 @@ +'use strict'; + +/** + * SNOMED-specific subclass of SqliteRuntimeV0Provider that adds + * post-coordinated expression support (parse, validate, display, subsume). + * + * Delegates all standard v0 operations to the parent class and only + * intercepts expression-related calls. + */ + +const { SqliteRuntimeV0Provider, SqliteRuntimeV0Context, SqliteRuntimeV0FactoryProvider } = require('./cs-sqlite-runtime-v0'); +const { createSqliteExpressionServices } = require('./cs-sqlite-expression-adapter'); +const { + SnomedExpressionParser, + SnomedExpressionContext, + SnomedExpression, + SnomedConcept, + SnomedServicesRenderOption, + NO_REFERENCE +} = require('../sct/expressions'); + +class SnomedSqliteV0Provider extends SqliteRuntimeV0Provider { + constructor(opContext, supplements, db, metadata, runtime, options = {}) { + super(opContext, supplements, db, metadata, runtime, options); + this._exprServices = null; + this._exprParser = null; + } + + /** Lazy-init expression services from the sync db. */ + #getExpressionServices() { + if (this._exprServices) return this._exprServices; + const syncDb = this._getOrCreateSyncDb(); + if (!syncDb) return null; + const result = createSqliteExpressionServices(syncDb, this.meta.csId); + this._exprServices = result.expressionServices; + this._exprParser = result.parser; + return this._exprServices; + } + + /** Override _ensureContext to accept SnomedExpressionContext. */ + async _ensureContext(code) { + if (code instanceof SnomedExpressionContext) return code; + return super._ensureContext(code); + } + + /** Extract code string from a SnomedExpressionContext. */ + #exprCode(ctx) { + if (ctx.source) return ctx.source; + if (ctx.expression?.concepts?.[0]?.code) return ctx.expression.concepts[0].code; + return ''; + } + + // ── locate: try expression parse when code not found in DB ──────── + + async locate(code) { + // First try the standard DB lookup + const result = await super.locate(code); + if (result.context) return result; + + // Not found as a simple code — try parsing as SNOMED expression + return this.#tryLocateExpression(code); + } + + #tryLocateExpression(code) { + if (!code || typeof code !== 'string') { + return { context: null, message: undefined }; + } + + // Quick check: expressions contain ':' or '+' — skip pure numeric codes + if (/^\d+$/.test(code)) { + return { context: null, message: undefined }; + } + + const exprSvc = this.#getExpressionServices(); + if (!exprSvc) { + return { context: null, message: undefined }; + } + + try { + const expression = this._exprParser.parse(code); + exprSvc.checkExpression(expression); + return { + context: new SnomedExpressionContext(code, expression), + message: null + }; + } catch (error) { + return { + context: null, + message: `Not a valid expression: ${error.message}` + }; + } + } + + // ── code / display / designations overrides for expressions ──────── + + async code(context) { + if (context instanceof SnomedExpressionContext) { + if (context.isComplex()) { + const exprSvc = this.#getExpressionServices(); + return exprSvc + ? exprSvc.renderExpression(context.expression, SnomedServicesRenderOption.Minimal) + : this.#exprCode(context); + } + return this.#exprCode(context); + } + return super.code(context); + } + + async display(context) { + if (context instanceof SnomedExpressionContext) { + if (context.isComplex()) { + const exprSvc = this.#getExpressionServices(); + return exprSvc + ? exprSvc.renderExpression(context.expression, SnomedServicesRenderOption.FillMissing) + : this.#exprCode(context); + } + const code = this.#exprCode(context); + const dbResult = await super.locate(code); + if (dbResult.context) return dbResult.context.display || code; + return code; + } + return super.display(context); + } + + async designations(context, displays) { + if (context instanceof SnomedExpressionContext && context.isComplex()) { + const disp = await this.display(context); + const { CodeSystem } = require('../library/codesystem'); + displays.addDesignation(true, 'active', this.defLang(), CodeSystem.makeUseForDisplay(), disp); + return; + } + // Simple SnomedExpressionContext — resolve to DB context + if (context instanceof SnomedExpressionContext) { + const code = this.#exprCode(context); + const dbResult = await super.locate(code); + if (dbResult.context) return super.designations(dbResult.context, displays); + return; + } + return super.designations(context, displays); + } + + async properties(context) { + if (context instanceof SnomedExpressionContext && context.isComplex()) { + return [{ code: 'inactive', valueBoolean: false }]; + } + if (context instanceof SnomedExpressionContext) { + const code = this.#exprCode(context); + const dbResult = await super.locate(code); + if (dbResult.context) return super.properties(dbResult.context); + return []; + } + return super.properties(context); + } + + // ── incompleteValidationMessage for expressions ─────────────────── + + async incompleteValidationMessage(context) { + if (context instanceof SnomedExpressionContext && context.isComplex()) { + return 'The expression is grammatically correct and the concepts are valid, but the expression has not been checked against the SNOMED CT concept model (MRCM)'; + } + return null; + } + + // ── subsumesTest: handle expression subsumption ─────────────────── + + async subsumesTest(codeA, codeB) { + // If both are simple codes, use parent's fast path + const isExprA = typeof codeA === 'string' && /[:{+]/.test(codeA); + const isExprB = typeof codeB === 'string' && /[:{+]/.test(codeB); + + if (!isExprA && !isExprB) { + return super.subsumesTest(codeA, codeB); + } + + const exprSvc = this.#getExpressionServices(); + if (!exprSvc) return super.subsumesTest(codeA, codeB); + + try { + const exprA = this._exprParser.parse(typeof codeA === 'string' ? codeA : String(codeA)); + exprSvc.checkExpression(exprA); + const exprB = this._exprParser.parse(typeof codeB === 'string' ? codeB : String(codeB)); + exprSvc.checkExpression(exprB); + + if (exprSvc.expressionsEquivalent(exprA, exprB)) return 'equivalent'; + + const aSubsumesB = exprSvc.expressionSubsumes(exprA, exprB); + if (aSubsumesB) return 'subsumes'; + + const bSubsumesA = exprSvc.expressionSubsumes(exprB, exprA); + if (bSubsumesA) return 'subsumed-by'; + + return 'not-subsumed'; + } catch (_error) { + return 'not-subsumed'; + } + } + + // ── doesFilter: support expressions=true|false ──────────────────── + + async doesFilter(prop, op, value) { + if (prop === 'expressions' && op === '=' && ['true', 'false'].includes(value)) { + return true; + } + return super.doesFilter(prop, op, value); + } + + // ── filterLocate / filterCheck: handle expression contexts ──────── + + async filterLocate(filterContext, set, code) { + // Try parent first — works for simple codes + const result = await super.filterLocate(filterContext, set, code); + if (result && typeof result !== 'string') return result; + + // If parent returned an error string, try as expression + const exprResult = this.#tryLocateExpression(code); + if (exprResult.context) { + // Expression is valid — for "all codes" value sets, expressions composed + // of valid concepts are considered members + return exprResult.context; + } + return result; // Return original error + } + + async filterCheck(filterContext, set, concept) { + if (concept instanceof SnomedExpressionContext) { + // Expressions composed of valid SNOMED concepts pass filter checks + return true; + } + return super.filterCheck(filterContext, set, concept); + } + + // ── isInactive / isAbstract / getStatus for expression contexts ─── + + async isInactive(context) { + if (context instanceof SnomedExpressionContext) { + // Expressions are synthetic — always "active" + if (context.isComplex()) return false; + const code = this.#exprCode(context); + const dbResult = await super.locate(code); + return dbResult.context ? super.isInactive(dbResult.context) : false; + } + return super.isInactive(context); + } + + async isAbstract(context) { + if (context instanceof SnomedExpressionContext && context.isComplex()) return false; + return super.isAbstract(context); + } + + async getStatus(context) { + if (context instanceof SnomedExpressionContext && context.isComplex()) return 'active'; + return super.getStatus(context); + } +} + +class SnomedSqliteV0Factory extends SqliteRuntimeV0FactoryProvider { + async build(opContext, supplements) { + if (!this._loaded) await this.load(); + this.recordUse(); + return new SnomedSqliteV0Provider(opContext, supplements, this._db, this._meta, this._runtime, { + ownsDb: false, + sharedState: this._sharedState, + dbPath: this.dbPath + }); + } +} + +SqliteRuntimeV0FactoryProvider.registerSpecialization({ + id: 'snomed-expressions', + systemPrefix: 'http://snomed.info/sct', + tags: ['snomed'], + priority: 100, + FactoryClass: SnomedSqliteV0Factory +}); + +module.exports = { SnomedSqliteV0Provider, SnomedSqliteV0Factory }; diff --git a/tx/cs/cs-sqlite-v0-specializers.js b/tx/cs/cs-sqlite-v0-specializers.js new file mode 100644 index 0000000..93b351f --- /dev/null +++ b/tx/cs/cs-sqlite-v0-specializers.js @@ -0,0 +1,86 @@ +'use strict'; + +const { SqliteRuntimeV0FactoryProvider } = require('./cs-sqlite-runtime-v0'); + +class LoincImplicitValueSetFactory extends SqliteRuntimeV0FactoryProvider { + async buildKnownValueSet(url, version) { + if (!this._loaded) { + await this.load(); + } + + const system = this.system(); + if (!url || !system || !url.startsWith(`${system}/vs`)) { + return super.buildKnownValueSet(url, version); + } + + if (version && this._meta.canonicalUri && !this._meta.canonicalUri.startsWith(version)) { + return null; + } + + const vsBase = `${system}/vs`; + if (url === vsBase || url === `${vsBase}/`) { + return { + resourceType: 'ValueSet', + url, + version: this._meta.version, + status: 'active', + name: `${sanitizeName(this.name())}All`, + description: `All concepts from ${this.name()}`, + compose: { include: [{ system }] } + }; + } + + if (!url.startsWith(`${vsBase}/`)) { + return super.buildKnownValueSet(url, version); + } + + const token = decodeURIComponent(url.substring(vsBase.length + 1)); + if (token.startsWith('LL')) { + return { + resourceType: 'ValueSet', + url, + version: this._meta.version, + status: 'active', + name: `LOINCAnswerList${sanitizeName(token)}`, + compose: { + include: [{ + system, + filter: [{ property: 'LIST', op: '=', value: token }] + }] + } + }; + } + + if (token.startsWith('LP')) { + return { + resourceType: 'ValueSet', + url, + version: this._meta.version, + status: 'active', + name: `LOINCPart${sanitizeName(token)}`, + compose: { + include: [{ + system, + filter: [{ property: 'concept', op: 'is-a', value: token }] + }] + } + }; + } + + return super.buildKnownValueSet(url, version); + } +} + +function sanitizeName(value) { + return String(value || 'CS').replace(/[^A-Za-z0-9]/g, '').slice(0, 60) || 'CS'; +} + +SqliteRuntimeV0FactoryProvider.registerSpecialization({ + id: 'loinc-implicit-valuesets', + systemPrefix: 'http://loinc.org', + tags: ['loinc', 'implicit-vs-path'], + priority: 100, + FactoryClass: LoincImplicitValueSetFactory +}); + +module.exports = { LoincImplicitValueSetFactory }; diff --git a/tx/importers/import-loinc-sqlite-v0.module.js b/tx/importers/import-loinc-sqlite-v0.module.js new file mode 100644 index 0000000..a35015e --- /dev/null +++ b/tx/importers/import-loinc-sqlite-v0.module.js @@ -0,0 +1,367 @@ +'use strict'; + +const fs = require('fs'); +const path = require('path'); +const inquirer = require('inquirer'); +const sqlite3 = require('sqlite3').verbose(); + +const { BaseTerminologyModule } = require('./tx-import-base'); +const { LoincSqliteV0Importer } = require('./sqlite-v2/import-loinc-v0'); + +class LoincSqliteV0Module extends BaseTerminologyModule { + getName() { + return 'loinc-sqlite-v0'; + } + + getDescription() { + return 'LOINC CSV -> SQLite (clean v0 schema)'; + } + + getSupportedFormats() { + return ['csv', 'directory', 'zip']; + } + + getDefaultConfig() { + return { + verbose: true, + overwrite: false, + skipClosure: false, + dest: './data/loinc-v0.db' + }; + } + + getEstimatedDuration() { + return '10-90 minutes (depends on release size and closure)'; + } + + registerCommands(terminologyCommand, globalOptions) { + terminologyCommand + .command('import') + .description('Import LOINC CSV distribution into SQLite v0 schema') + .option('-s, --source ', 'Source directory or LOINC .zip release') + .option('-d, --dest ', 'Destination SQLite file') + .option('-v, --loinc-version ', 'LOINC version (e.g., 2.81)') + .option('-u, --uri ', 'Canonical URI; overrides default base|version') + .option('--skip-closure', 'Skip closure table generation') + .option('--overwrite', 'Overwrite destination database if it exists') + .option('-y, --yes', 'Skip confirmations') + .action(async (options) => { + await this.handleImportCommand({ ...globalOptions, ...options }); + }); + + terminologyCommand + .command('validate') + .description('Validate source path and discover LOINC CSV files') + .option('-s, --source ', 'Source directory or zip file') + .action(async (options) => { + await this.handleValidateCommand({ ...globalOptions, ...options }); + }); + + terminologyCommand + .command('status') + .description('Show status of a generated SQLite v0 LOINC database') + .option('-d, --dest ', 'Database file path', './data/loinc-v0.db') + .action(async (options) => { + await this.handleStatusCommand({ ...globalOptions, ...options }); + }); + } + + async handleImportCommand(options) { + try { + const config = options.yes + ? this.buildNonInteractiveConfig(options) + : await this.gatherConfig(options); + + if (!options.yes) { + const confirmed = await this.confirmImport(config); + if (!confirmed) { + this.logInfo('Import cancelled'); + return; + } + } + + this.rememberSuccessfulConfig(config); + await this.runImportWithoutConfigSaving(config); + } catch (error) { + this.logError(`Import command failed: ${error.message}`); + if (options.verbose) { + console.error(error.stack); + } + throw error; + } + } + + async gatherConfig(options) { + const baseConfig = await this.gatherCommonConfig(options); + + const config = { + ...baseConfig, + version: options.loincVersion || options.version || baseConfig.version, + uri: options.uri || baseConfig.uri, + skipClosure: !!options.skipClosure + }; + + if (!config.version && !config.uri) { + const answers = await inquirer.prompt([ + { + type: 'input', + name: 'version', + message: 'LOINC version (e.g., 2.81):', + validate: validateVersion + } + ]); + config.version = answers.version; + } + + return config; + } + + buildNonInteractiveConfig(options) { + const config = { + ...this.getDefaultConfig(), + ...options, + source: options.source, + dest: options.dest || this.getDefaultConfig().dest, + version: options.loincVersion || options.version, + uri: options.uri, + skipClosure: !!options.skipClosure, + overwrite: !!options.overwrite, + verbose: !!options.verbose + }; + + if (!config.source) { + throw new Error('source is required when using --yes'); + } + if (!config.version && !config.uri) { + throw new Error('Provide --loinc-version or --uri when using --yes'); + } + if (config.version) { + const valid = validateVersion(config.version); + if (valid !== true) { + throw new Error(valid); + } + } + return config; + } + + async confirmImport(config) { + console.log('\nLOINC SQLite v0 Import Configuration:'); + console.log(` Source: ${config.source}`); + console.log(` Destination: ${config.dest}`); + console.log(` Version: ${config.version || '(auto/none)'}`); + console.log(` URI: ${config.uri || '(auto)'}`); + console.log(` Skip Closure: ${config.skipClosure ? 'Yes' : 'No'}`); + console.log(` Overwrite: ${config.overwrite ? 'Yes' : 'No'}`); + + const answer = await inquirer.prompt([ + { + type: 'confirm', + name: 'confirmed', + message: 'Proceed with import?', + default: true + } + ]); + return answer.confirmed; + } + + async runImportWithoutConfigSaving(config) { + try { + const importer = new LoincSqliteV0Importer(config); + const result = await importer.run(); + this.logSuccess(`LOINC SQLite v0 import complete: ${result.uri}`); + this.logSuccess( + `Concepts: ${result.stats.concepts.toLocaleString()}, ` + + `Designations: ${result.stats.designations.toLocaleString()}, ` + + `Relationships: ${result.stats.relationships.toLocaleString()}, ` + + `ValueSets: ${result.stats.valueSets.toLocaleString()}` + ); + } catch (error) { + this.logError(`LOINC SQLite v0 import failed: ${error.message}`); + if (config.verbose) { + console.error(error.stack); + } + process.exit(1); + } + } + + async handleValidateCommand(options) { + const source = options.source || (await promptForSource()); + if (!fs.existsSync(source)) { + this.logError(`Source does not exist: ${source}`); + return; + } + + const sourceStat = fs.statSync(source); + if (sourceStat.isFile() && source.toLowerCase().endsWith('.zip')) { + console.log('\nZip source provided. Validation confirms path exists; CSV listing occurs at import-time extraction.'); + this.logSuccess('Validation passed'); + return; + } + + const files = LoincSqliteV0Importer.discoverCsvFiles(path.resolve(source)); + + console.log('\nDiscovered CSV files:'); + console.log(` Loinc.csv: ${files.loinc ? files.loinc : '(missing)'}`); + console.log(` Part.csv: ${files.part ? files.part : '(missing)'}`); + console.log(` LoincPartLink_Primary.csv: ${files.partLink ? files.partLink : '(missing)'}`); + console.log(` ComponentHierarchyBySystem: ${files.hierarchy ? files.hierarchy : '(missing)'}`); + console.log(` ConsumerName.csv: ${files.consumerName ? files.consumerName : '(missing)'}`); + console.log(` LinguisticVariants: ${files.linguisticVariants.length}`); + + if (!files.loinc) { + this.logError('Validation failed: Loinc.csv is required'); + return; + } + this.logSuccess('Validation passed'); + } + + async handleStatusCommand(options) { + const dbPath = path.resolve(options.dest || './data/loinc-v0.db'); + if (!fs.existsSync(dbPath)) { + this.logError(`Database not found: ${dbPath}`); + return; + } + + const db = new sqlite3.Database(dbPath, sqlite3.OPEN_READONLY); + try { + const codeSystem = await getRow( + db, + `SELECT cs_id, canonical_uri, version, name, loaded_at + FROM code_system + ORDER BY cs_id DESC + LIMIT 1`, + [] + ); + if (!codeSystem) { + this.logWarning('No code_system rows found'); + return; + } + + const [concepts, designations, relationships, literals, valueSets, valueSetMembers, closure] = await Promise.all([ + getRow(db, 'SELECT COUNT(*) AS n FROM concept WHERE cs_id = ?', [codeSystem.cs_id]), + getRow( + db, + `SELECT COUNT(*) AS n + FROM designation d + JOIN concept c ON c.concept_id = d.concept_id + WHERE c.cs_id = ?`, + [codeSystem.cs_id] + ), + getRow( + db, + `SELECT COUNT(*) AS n + FROM concept_link l + JOIN concept c ON c.concept_id = l.source_concept_id + WHERE c.cs_id = ?`, + [codeSystem.cs_id] + ), + getRow( + db, + `SELECT COUNT(*) AS n + FROM concept_literal cl + JOIN concept c ON c.concept_id = cl.source_concept_id + WHERE c.cs_id = ?`, + [codeSystem.cs_id] + ), + getRow(db, 'SELECT COUNT(*) AS n FROM value_set WHERE cs_id = ?', [codeSystem.cs_id]), + getRow( + db, + `SELECT COUNT(*) AS n + FROM value_set_member m + JOIN value_set v ON v.vs_id = m.vs_id + WHERE v.cs_id = ?`, + [codeSystem.cs_id] + ), + getRow( + db, + `SELECT COUNT(*) AS n + FROM closure cl + JOIN concept c ON c.concept_id = cl.ancestor_id + WHERE c.cs_id = ?`, + [codeSystem.cs_id] + ) + ]); + + const [ftsDisplay, ftsDesignation, ftsLiteral] = await Promise.all([ + getCountIfTableExists(db, 'search_fts_display'), + getCountIfTableExists(db, 'search_fts_designation'), + getCountIfTableExists(db, 'search_fts_literal') + ]); + + console.log('\nLOINC SQLite v0 Status:'); + console.log(` DB: ${dbPath}`); + console.log(` Canonical URI: ${codeSystem.canonical_uri}`); + console.log(` Version: ${codeSystem.version || '(none)'}`); + console.log(` Name: ${codeSystem.name || 'LOINC'}`); + console.log(` Loaded At: ${codeSystem.loaded_at}`); + console.log(` Concepts: ${(concepts?.n || 0).toLocaleString()}`); + console.log(` Designations: ${(designations?.n || 0).toLocaleString()}`); + console.log(` Relationships: ${(relationships?.n || 0).toLocaleString()}`); + console.log(` Literals: ${(literals?.n || 0).toLocaleString()}`); + console.log(` ValueSets: ${(valueSets?.n || 0).toLocaleString()}`); + console.log(` VS Members: ${(valueSetMembers?.n || 0).toLocaleString()}`); + console.log(` Closure rows: ${(closure?.n || 0).toLocaleString()}`); + console.log(` FTS display: ${(ftsDisplay?.n || 0).toLocaleString()}`); + console.log(` FTS desig.: ${(ftsDesignation?.n || 0).toLocaleString()}`); + console.log(` FTS literal: ${(ftsLiteral?.n || 0).toLocaleString()}`); + + this.logSuccess('Status read complete'); + } finally { + await closeDb(db); + } + } +} + +function validateVersion(input) { + if (!input) return 'Version is required'; + if (!/^\d+(\.\d+){1,2}$/.test(String(input).trim())) { + return 'Version should look like 2.81'; + } + return true; +} + +async function promptForSource() { + const answer = await inquirer.prompt([ + { + type: 'input', + name: 'source', + message: 'Source directory or zip:', + validate: (input) => input ? true : 'Source is required' + } + ]); + return answer.source; +} + +function getRow(db, sql, params) { + return new Promise((resolve, reject) => { + db.get(sql, params, (err, row) => { + if (err) reject(err); + else resolve(row); + }); + }); +} + +async function getCountIfTableExists(db, tableName) { + const exists = await getRow( + db, + `SELECT 1 AS found FROM sqlite_master WHERE type = 'table' AND name = ? LIMIT 1`, + [tableName] + ); + if (!exists) { + return { n: 0 }; + } + return getRow(db, `SELECT COUNT(*) AS n FROM ${tableName}`, []); +} + +function closeDb(db) { + return new Promise((resolve, reject) => { + db.close((err) => { + if (err) reject(err); + else resolve(); + }); + }); +} + +module.exports = { + LoincSqliteV0Module +}; diff --git a/tx/importers/import-rxnorm-sqlite-v0.module.js b/tx/importers/import-rxnorm-sqlite-v0.module.js new file mode 100644 index 0000000..64e44ad --- /dev/null +++ b/tx/importers/import-rxnorm-sqlite-v0.module.js @@ -0,0 +1,354 @@ +'use strict'; + +const fs = require('fs'); +const path = require('path'); +const inquirer = require('inquirer'); +const sqlite3 = require('sqlite3').verbose(); + +const { BaseTerminologyModule } = require('./tx-import-base'); +const { RxNormSqliteV0Importer } = require('./sqlite-v2/import-rxnorm-v0'); + +class RxNormSqliteV0Module extends BaseTerminologyModule { + getName() { + return 'rxnorm-sqlite-v0'; + } + + getDescription() { + return 'RxNorm RRF -> SQLite (clean v0 schema)'; + } + + getSupportedFormats() { + return ['rrf', 'directory', 'zip']; + } + + getDefaultConfig() { + return { + verbose: true, + overwrite: false, + skipClosure: false, + dest: './data/rxnorm-v0.db' + }; + } + + getEstimatedDuration() { + return '5-45 minutes (depends on source size and closure)'; + } + + registerCommands(terminologyCommand, globalOptions) { + terminologyCommand + .command('import') + .description('Import RxNorm RRF into SQLite v0 schema') + .option('-s, --source ', 'Source directory or RxNorm .zip release') + .option('-d, --dest ', 'Destination SQLite file') + .option('-v, --rxnorm-version ', 'RxNorm version date (e.g., 02022026)') + .option('-u, --uri ', 'Canonical URI; overrides default base|version') + .option('--skip-closure', 'Skip closure table generation') + .option('--overwrite', 'Overwrite destination database if it exists') + .option('-y, --yes', 'Skip confirmations') + .action(async (options) => { + await this.handleImportCommand({ ...globalOptions, ...options }); + }); + + terminologyCommand + .command('validate') + .description('Validate source path and discover RxNorm RRF files') + .option('-s, --source ', 'Source directory or zip file') + .action(async (options) => { + await this.handleValidateCommand({ ...globalOptions, ...options }); + }); + + terminologyCommand + .command('status') + .description('Show status of a generated SQLite v0 RxNorm database') + .option('-d, --dest ', 'Database file path', './data/rxnorm-v0.db') + .action(async (options) => { + await this.handleStatusCommand({ ...globalOptions, ...options }); + }); + } + + async handleImportCommand(options) { + try { + const config = options.yes + ? this.buildNonInteractiveConfig(options) + : await this.gatherConfig(options); + + if (!options.yes) { + const confirmed = await this.confirmImport(config); + if (!confirmed) { + this.logInfo('Import cancelled'); + return; + } + } + + this.rememberSuccessfulConfig(config); + await this.runImportWithoutConfigSaving(config); + } catch (error) { + this.logError(`Import command failed: ${error.message}`); + if (options.verbose) { + console.error(error.stack); + } + throw error; + } + } + + async gatherConfig(options) { + const baseConfig = await this.gatherCommonConfig(options); + + const config = { + ...baseConfig, + version: options.rxnormVersion || options.version || baseConfig.version, + uri: options.uri || baseConfig.uri, + skipClosure: !!options.skipClosure + }; + + if (!config.version && !config.uri) { + const answers = await inquirer.prompt([ + { + type: 'input', + name: 'version', + message: 'RxNorm version (MMDDYYYY):', + validate: validateVersion + } + ]); + config.version = answers.version; + } + + return config; + } + + buildNonInteractiveConfig(options) { + const config = { + ...this.getDefaultConfig(), + ...options, + source: options.source, + dest: options.dest || this.getDefaultConfig().dest, + version: options.rxnormVersion || options.version, + uri: options.uri, + skipClosure: !!options.skipClosure, + overwrite: !!options.overwrite, + verbose: !!options.verbose + }; + + if (!config.source) { + throw new Error('source is required when using --yes'); + } + if (!config.version && !config.uri) { + throw new Error('Provide --rxnorm-version or --uri when using --yes'); + } + if (config.version) { + const valid = validateVersion(config.version); + if (valid !== true) { + throw new Error(valid); + } + } + + return config; + } + + async confirmImport(config) { + console.log('\nRxNorm SQLite v0 Import Configuration:'); + console.log(` Source: ${config.source}`); + console.log(` Destination: ${config.dest}`); + console.log(` Version: ${config.version || '(auto/none)'}`); + console.log(` URI: ${config.uri || '(auto)'}`); + console.log(` Skip Closure: ${config.skipClosure ? 'Yes' : 'No'}`); + console.log(` Overwrite: ${config.overwrite ? 'Yes' : 'No'}`); + + const answer = await inquirer.prompt([ + { + type: 'confirm', + name: 'confirmed', + message: 'Proceed with import?', + default: true + } + ]); + return answer.confirmed; + } + + async runImportWithoutConfigSaving(config) { + try { + const importer = new RxNormSqliteV0Importer(config); + const result = await importer.run(); + this.logSuccess(`RxNorm SQLite v0 import complete: ${result.uri}`); + this.logSuccess( + `Concepts: ${result.stats.concepts.toLocaleString()}, ` + + `Designations: ${result.stats.designations.toLocaleString()}, ` + + `Relationships: ${result.stats.relationships.toLocaleString()}` + ); + } catch (error) { + this.logError(`RxNorm SQLite v0 import failed: ${error.message}`); + if (config.verbose) { + console.error(error.stack); + } + process.exit(1); + } + } + + async handleValidateCommand(options) { + const source = options.source || (await promptForSource()); + if (!fs.existsSync(source)) { + this.logError(`Source does not exist: ${source}`); + return; + } + + let discoverySource = source; + if (fs.statSync(source).isFile() && source.toLowerCase().endsWith('.zip')) { + console.log('\nZip source provided. Validation confirms path exists; RRF listing occurs at import-time extraction.'); + this.logSuccess('Validation passed'); + return; + } + + discoverySource = path.resolve(discoverySource); + const files = RxNormSqliteV0Importer.discoverRrfFiles(discoverySource); + + console.log('\nDiscovered RRF files:'); + console.log(` RXNCONSO: ${files.rxnconso ? files.rxnconso : '(missing)'}`); + console.log(` RXNREL: ${files.rxnrel ? files.rxnrel : '(missing)'}`); + console.log(` RXNSAT: ${files.rxnsat ? files.rxnsat : '(missing)'}`); + console.log(` RXNSAB: ${files.rxnsab ? files.rxnsab : '(missing)'}`); + + if (!files.rxnconso) { + this.logError('Validation failed: RXNCONSO.RRF is required'); + return; + } + this.logSuccess('Validation passed'); + } + + async handleStatusCommand(options) { + const dbPath = path.resolve(options.dest || './data/rxnorm-v0.db'); + if (!fs.existsSync(dbPath)) { + this.logError(`Database not found: ${dbPath}`); + return; + } + + const db = new sqlite3.Database(dbPath, sqlite3.OPEN_READONLY); + try { + const codeSystem = await getRow( + db, + `SELECT cs_id, canonical_uri, version, name, loaded_at + FROM code_system + ORDER BY cs_id DESC + LIMIT 1`, + [] + ); + if (!codeSystem) { + this.logWarning('No code_system rows found'); + return; + } + + const [concepts, designations, relationships, literals, closure] = await Promise.all([ + getRow(db, 'SELECT COUNT(*) AS n FROM concept WHERE cs_id = ?', [codeSystem.cs_id]), + getRow( + db, + `SELECT COUNT(*) AS n + FROM designation d + JOIN concept c ON c.concept_id = d.concept_id + WHERE c.cs_id = ?`, + [codeSystem.cs_id] + ), + getRow( + db, + `SELECT COUNT(*) AS n + FROM concept_link l + JOIN concept c ON c.concept_id = l.source_concept_id + WHERE c.cs_id = ?`, + [codeSystem.cs_id] + ), + getRow( + db, + `SELECT COUNT(*) AS n + FROM concept_literal cl + JOIN concept c ON c.concept_id = cl.source_concept_id + WHERE c.cs_id = ?`, + [codeSystem.cs_id] + ), + getRow( + db, + `SELECT COUNT(*) AS n + FROM closure cl + JOIN concept c ON c.concept_id = cl.ancestor_id + WHERE c.cs_id = ?`, + [codeSystem.cs_id] + ) + ]); + + const [ftsDisplay, ftsDesignation, ftsLiteral] = await Promise.all([ + getCountIfTableExists(db, 'search_fts_display'), + getCountIfTableExists(db, 'search_fts_designation'), + getCountIfTableExists(db, 'search_fts_literal') + ]); + + console.log('\nRxNorm SQLite v0 Status:'); + console.log(` DB: ${dbPath}`); + console.log(` Canonical URI: ${codeSystem.canonical_uri}`); + console.log(` Version: ${codeSystem.version || '(none)'}`); + console.log(` Name: ${codeSystem.name || 'RxNorm'}`); + console.log(` Loaded At: ${codeSystem.loaded_at}`); + console.log(` Concepts: ${(concepts?.n || 0).toLocaleString()}`); + console.log(` Designations: ${(designations?.n || 0).toLocaleString()}`); + console.log(` Relationships: ${(relationships?.n || 0).toLocaleString()}`); + console.log(` Literals: ${(literals?.n || 0).toLocaleString()}`); + console.log(` Closure rows: ${(closure?.n || 0).toLocaleString()}`); + console.log(` FTS display: ${(ftsDisplay?.n || 0).toLocaleString()}`); + console.log(` FTS desig.: ${(ftsDesignation?.n || 0).toLocaleString()}`); + console.log(` FTS literal: ${(ftsLiteral?.n || 0).toLocaleString()}`); + + this.logSuccess('Status read complete'); + } finally { + await closeDb(db); + } + } +} + +function validateVersion(input) { + if (!input) return 'Version is required'; + if (!/^\d{8}$/.test(input)) return 'Version must be MMDDYYYY'; + return true; +} + +async function promptForSource() { + const answer = await inquirer.prompt([ + { + type: 'input', + name: 'source', + message: 'Source directory or zip:', + validate: (input) => input ? true : 'Source is required' + } + ]); + + return answer.source; +} + +function getRow(db, sql, params) { + return new Promise((resolve, reject) => { + db.get(sql, params, (err, row) => { + if (err) reject(err); + else resolve(row); + }); + }); +} + +async function getCountIfTableExists(db, tableName) { + const exists = await getRow( + db, + `SELECT 1 AS found FROM sqlite_master WHERE type = 'table' AND name = ? LIMIT 1`, + [tableName] + ); + if (!exists) { + return { n: 0 }; + } + return getRow(db, `SELECT COUNT(*) AS n FROM ${tableName}`, []); +} + +function closeDb(db) { + return new Promise((resolve, reject) => { + db.close((err) => { + if (err) reject(err); + else resolve(); + }); + }); +} + +module.exports = { + RxNormSqliteV0Module +}; diff --git a/tx/importers/import-rxnorm.module.js b/tx/importers/import-rxnorm.module.js index 6610914..c82aa19 100644 --- a/tx/importers/import-rxnorm.module.js +++ b/tx/importers/import-rxnorm.module.js @@ -428,6 +428,7 @@ class RxNormModule extends BaseTerminologyModule { 'CREATE INDEX IF NOT EXISTS idx_rxnrel_rel ON RXNREL(REL)', 'CREATE INDEX IF NOT EXISTS idx_rxnrel_rela ON RXNREL(RELA)', 'CREATE INDEX IF NOT EXISTS X_RXNSTY_2 ON RXNSTY(TUI)', + 'CREATE INDEX IF NOT EXISTS idx_rxnsty_rxcui ON RXNSTY(RXCUI)', 'CREATE INDEX IF NOT EXISTS idx_rxnstems_stem_cui ON RXNSTEMS(stem, CUI)' ]; diff --git a/tx/importers/import-sct-sqlite-v0.module.js b/tx/importers/import-sct-sqlite-v0.module.js new file mode 100644 index 0000000..71a88ae --- /dev/null +++ b/tx/importers/import-sct-sqlite-v0.module.js @@ -0,0 +1,415 @@ +'use strict'; + +const fs = require('fs'); +const path = require('path'); +const inquirer = require('inquirer'); +const sqlite3 = require('sqlite3').verbose(); + +const { BaseTerminologyModule } = require('./tx-import-base'); +const { SnomedSqliteV0Importer } = require('./sqlite-v2/import-snomed-v0'); + +class SnomedSqliteV0Module extends BaseTerminologyModule { + getName() { + return 'snomed-sqlite-v0'; + } + + getDescription() { + return 'SNOMED CT RF2 Snapshot -> SQLite (clean v0 schema)'; + } + + getSupportedFormats() { + return ['rf2', 'directory']; + } + + getDefaultConfig() { + return { + verbose: true, + overwrite: false, + snapshotOnly: true, + skipRefsets: false, + skipClosure: false, + edition: '900000000000207008', + dest: './data/snomed-v0.db' + }; + } + + getEstimatedDuration() { + return '30-180 minutes (depends on edition size and closure)'; + } + + registerCommands(terminologyCommand, globalOptions) { + terminologyCommand + .command('import') + .description('Import SNOMED CT RF2 into SQLite v0 schema') + .option('-s, --source ', 'Source directory containing RF2 files (ideally Snapshot root)') + .option('-d, --dest ', 'Destination SQLite file') + .option('-e, --edition ', 'Edition code (e.g., 900000000000207008 or 731000124108)') + .option('--snomed-version ', 'Version date in YYYYMMDD format') + .option('-u, --uri ', 'Canonical SNOMED URI; overrides edition+version') + .option('--skip-refsets', 'Skip refset/value-set membership import') + .option('--skip-closure', 'Deprecated/ignored: closure is always built') + .option('--include-non-snapshot', 'Include non-Snapshot RF2 files in discovery') + .option('--overwrite', 'Overwrite destination database if it exists') + .option('-y, --yes', 'Skip confirmations') + .action(async (options) => { + await this.handleImportCommand({ ...globalOptions, ...options }); + }); + + terminologyCommand + .command('validate') + .description('Validate source path and discover RF2 file classes') + .option('-s, --source ', 'Source directory') + .option('--include-non-snapshot', 'Include non-Snapshot RF2 files in discovery') + .action(async (options) => { + await this.handleValidateCommand({ ...globalOptions, ...options }); + }); + + terminologyCommand + .command('status') + .description('Show status of a generated SQLite v0 SNOMED database') + .option('-d, --dest ', 'Database file path', './data/snomed-v0.db') + .action(async (options) => { + await this.handleStatusCommand({ ...globalOptions, ...options }); + }); + } + + async handleImportCommand(options) { + try { + const config = options.yes + ? this.buildNonInteractiveConfig(options) + : await this.gatherConfig(options); + + if (!options.yes) { + const confirmed = await this.confirmImport(config); + if (!confirmed) { + this.logInfo('Import cancelled'); + return; + } + } + + this.rememberSuccessfulConfig(config); + await this.runImportWithoutConfigSaving(config); + } catch (error) { + this.logError(`Import command failed: ${error.message}`); + if (options.verbose) { + console.error(error.stack); + } + throw error; + } + } + + async gatherConfig(options) { + const baseConfig = await this.gatherCommonConfig(options); + + const config = { + ...baseConfig, + edition: options.edition || baseConfig.edition || '900000000000207008', + version: options.snomedVersion || options.version || baseConfig.version, + uri: options.uri || baseConfig.uri, + skipRefsets: !!options.skipRefsets, + skipClosure: false, + snapshotOnly: !options.includeNonSnapshot + }; + + if (!config.uri && !config.version) { + const answers = await inquirer.prompt([ + { + type: 'input', + name: 'version', + message: 'SNOMED version (YYYYMMDD):', + validate: validateVersion + } + ]); + config.version = answers.version; + } + + const parsed = parseSnomedUri(config.uri); + if (!config.version && parsed.version) { + config.version = parsed.version; + } + if (!config.edition && parsed.edition) { + config.edition = parsed.edition; + } + + if (!config.uri) { + config.uri = `http://snomed.info/sct/${config.edition}/version/${config.version}`; + } + + if (!options.dest && shouldAutoAssignDest(config.dest) && config.version) { + config.dest = buildDefaultDest(config.edition, config.version); + } + if (options.skipClosure) { + this.logWarning('--skip-closure ignored: closure is always built'); + } + + return config; + } + + buildNonInteractiveConfig(options) { + const parsed = parseSnomedUri(options.uri); + const edition = options.edition || this.getDefaultConfig().edition || parsed.edition; + const version = options.snomedVersion || options.version || parsed.version; + const config = { + ...this.getDefaultConfig(), + ...options, + source: options.source, + dest: options.dest || buildDefaultDest(edition, version), + edition, + version, + uri: options.uri, + skipRefsets: !!options.skipRefsets, + skipClosure: false, + snapshotOnly: !options.includeNonSnapshot, + overwrite: !!options.overwrite, + verbose: !!options.verbose + }; + + if (!config.uri && config.edition && config.version) { + config.uri = `http://snomed.info/sct/${config.edition}/version/${config.version}`; + } + + if (!config.source) { + throw new Error('source is required when using --yes'); + } + if (!config.uri && !config.version) { + throw new Error('Provide --uri or --snomed-version with --edition when using --yes'); + } + if (options.skipClosure) { + this.logWarning('--skip-closure ignored: closure is always built'); + } + + return config; + } + + async confirmImport(config) { + console.log('\nSNOMED SQLite v0 Import Configuration:'); + console.log(` Source: ${config.source}`); + console.log(` Destination: ${config.dest}`); + console.log(` URI: ${config.uri}`); + console.log(` SnapshotOnly: ${config.snapshotOnly ? 'Yes' : 'No'}`); + console.log(` Skip Refsets: ${config.skipRefsets ? 'Yes' : 'No'}`); + console.log(` Skip Closure: ${config.skipClosure ? 'Yes' : 'No'}`); + console.log(` Overwrite: ${config.overwrite ? 'Yes' : 'No'}`); + + const answer = await inquirer.prompt([ + { + type: 'confirm', + name: 'confirmed', + message: 'Proceed with import?', + default: true + } + ]); + + return answer.confirmed; + } + + async runImportWithoutConfigSaving(config) { + try { + const importer = new SnomedSqliteV0Importer(config); + const result = await importer.run(); + this.logSuccess(`SNOMED SQLite v0 import complete: ${result.uri}`); + this.logSuccess(`Concepts: ${result.stats.concepts.toLocaleString()}, Descriptions: ${result.stats.descriptions.toLocaleString()}, Relationships: ${result.stats.relationships.toLocaleString()}`); + } catch (error) { + this.logError(`SNOMED SQLite v0 import failed: ${error.message}`); + if (config.verbose) { + console.error(error.stack); + } + process.exit(1); + } + } + + async handleValidateCommand(options) { + const source = options.source || (await promptForSource()); + + if (!fs.existsSync(source)) { + this.logError(`Source does not exist: ${source}`); + return; + } + + const snapshotOnly = !options.includeNonSnapshot; + const files = SnomedSqliteV0Importer.discoverRf2Files(source, { snapshotOnly }); + + console.log('\nDiscovered RF2 file classes:'); + console.log(` Concepts: ${files.concepts.length}`); + console.log(` Descriptions: ${files.descriptions.length}`); + console.log(` Relationships: ${files.relationships.length}`); + console.log(` Concrete Values: ${files.concreteValues.length}`); + console.log(` Language Refsets: ${files.languageRefsets.length}`); + console.log(` Any Refsets: ${files.refsets.length}`); + + const ok = files.concepts.length > 0 && files.descriptions.length > 0; + if (ok) { + this.logSuccess('Validation passed'); + } else { + this.logError('Validation failed: concepts and descriptions are required'); + } + } + + async handleStatusCommand(options) { + const dbPath = path.resolve(options.dest || './data/snomed-v0.db'); + + if (!fs.existsSync(dbPath)) { + this.logError(`Database not found: ${dbPath}`); + return; + } + + const db = new sqlite3.Database(dbPath, sqlite3.OPEN_READONLY); + + try { + const codeSystem = await getRow(db, 'SELECT cs_id, canonical_uri, edition_code, version, loaded_at FROM code_system ORDER BY cs_id DESC LIMIT 1', []); + if (!codeSystem) { + this.logWarning('No code_system rows found'); + return; + } + + const [concepts, descriptions, relationships, refsets, closure] = await Promise.all([ + getRow(db, 'SELECT COUNT(*) AS n FROM concept WHERE cs_id = ?', [codeSystem.cs_id]), + getRow( + db, + `SELECT COUNT(*) AS n + FROM designation d + JOIN concept c ON c.concept_id = d.concept_id + WHERE c.cs_id = ?`, + [codeSystem.cs_id] + ), + getRow( + db, + `SELECT COUNT(*) AS n + FROM concept_link l + JOIN concept c ON c.concept_id = l.source_concept_id + WHERE c.cs_id = ?`, + [codeSystem.cs_id] + ), + getRow(db, 'SELECT COUNT(*) AS n FROM value_set WHERE cs_id = ?', [codeSystem.cs_id]), + getRow( + db, + `SELECT COUNT(*) AS n + FROM closure cl + JOIN concept c ON c.concept_id = cl.ancestor_id + WHERE c.cs_id = ?`, + [codeSystem.cs_id] + ) + ]); + + const [ftsDisplay, ftsDesignation, ftsLiteral] = await Promise.all([ + getCountIfTableExists(db, 'search_fts_display'), + getCountIfTableExists(db, 'search_fts_designation'), + getCountIfTableExists(db, 'search_fts_literal') + ]); + + console.log('\nSNOMED SQLite v0 Status:'); + console.log(` DB: ${dbPath}`); + console.log(` Canonical URI: ${codeSystem.canonical_uri}`); + console.log(` Edition: ${codeSystem.edition_code || '(none)'}`); + console.log(` Version: ${codeSystem.version || '(none)'}`); + console.log(` Loaded At: ${codeSystem.loaded_at}`); + console.log(` Concepts: ${(concepts?.n || 0).toLocaleString()}`); + console.log(` Descriptions: ${(descriptions?.n || 0).toLocaleString()}`); + console.log(` Relationships: ${(relationships?.n || 0).toLocaleString()}`); + console.log(` Refsets: ${(refsets?.n || 0).toLocaleString()}`); + console.log(` Closure rows: ${(closure?.n || 0).toLocaleString()}`); + console.log(` FTS display: ${(ftsDisplay?.n || 0).toLocaleString()}`); + console.log(` FTS desig.: ${(ftsDesignation?.n || 0).toLocaleString()}`); + console.log(` FTS literal: ${(ftsLiteral?.n || 0).toLocaleString()}`); + + this.logSuccess('Status read complete'); + } finally { + await closeDb(db); + } + } +} + +function validateVersion(input) { + if (!input) return 'Version is required'; + if (!/^\d{8}$/.test(input)) return 'Version must be YYYYMMDD'; + + const year = Number(input.slice(0, 4)); + const month = Number(input.slice(4, 6)); + const day = Number(input.slice(6, 8)); + + if (year < 1900 || year > 2100) return 'Invalid year'; + if (month < 1 || month > 12) return 'Invalid month'; + if (day < 1 || day > 31) return 'Invalid day'; + + return true; +} + +async function promptForSource() { + const answer = await inquirer.prompt([ + { + type: 'input', + name: 'source', + message: 'Source directory:', + validate: (input) => input ? true : 'Source is required' + } + ]); + + return answer.source; +} + +function getRow(db, sql, params) { + return new Promise((resolve, reject) => { + db.get(sql, params, (err, row) => { + if (err) reject(err); + else resolve(row); + }); + }); +} + +async function getCountIfTableExists(db, tableName) { + const exists = await getRow( + db, + `SELECT 1 AS found FROM sqlite_master WHERE type = 'table' AND name = ? LIMIT 1`, + [tableName] + ); + if (!exists) { + return { n: 0 }; + } + return getRow(db, `SELECT COUNT(*) AS n FROM ${tableName}`, []); +} + +function closeDb(db) { + return new Promise((resolve, reject) => { + db.close((err) => { + if (err) reject(err); + else resolve(); + }); + }); +} + +function parseSnomedUri(uri) { + if (!uri || typeof uri !== 'string') { + return { edition: null, version: null }; + } + const m = uri.match(/^https?:\/\/snomed\.info\/sct\/([^/]+)\/version\/(\d{8})$/i); + if (!m) { + return { edition: null, version: null }; + } + return { edition: m[1], version: m[2] }; +} + +function buildDefaultDest(edition, version) { + if (!version) { + return path.resolve('./data/snomed-v0.db'); + } + const label = + edition === '900000000000207008' + ? 'intl' + : edition === '731000124108' + ? 'us' + : String(edition || 'edition'); + return path.resolve(`./data/sct_${label}_${version}.v0.db`); +} + +function shouldAutoAssignDest(dest) { + if (!dest) return true; + const resolved = path.resolve(dest); + return ( + resolved === path.resolve('./data/snomed-v0.db') || + resolved === path.resolve('./data/snomed-sqlite-v0.db') + ); +} + +module.exports = { + SnomedSqliteV0Module +}; diff --git a/tx/importers/sqlite-v2/README.md b/tx/importers/sqlite-v2/README.md new file mode 100644 index 0000000..664ab6a --- /dev/null +++ b/tx/importers/sqlite-v2/README.md @@ -0,0 +1,90 @@ +# SQLite v0 Importers + +This folder contains clean-start terminology import pipelines targeting the shared SQLite v0 schema. + +Naming note: +- `v0i` was used for some earlier local artifacts during indexing/closure experiments. +- Schema version is still SQLite `v0`; there is no separate `v0i` schema. +- Keep one canonical full DB per terminology/version (closure + FTS) and avoid keeping experimental side files in active cache paths. + +Developer docs: +- `docs/SQLITE_RUNTIME_CONFIG_CONTRACT.md` (contract-level key reference) +- `docs/SQLITE_METADATA_DEVELOPER_GUIDE.md` (annotated SNOMED/LOINC/RxNorm examples) + +Metadata policy: +- Importers now emit runtime-driving metadata only (`runtime.*` keys). +- Legacy duplicate keys (`schemaVersion`, `sourceKind`, `display`, etc.) are intentionally not emitted. + +## SNOMED import command + +Use `tx-import`: + +```bash +tx-import snomed-sqlite-v0 import \ + --yes \ + --source /path/to/Snapshot \ + --dest /path/to/sct_intl_20250201.v0.db \ + --edition 900000000000207008 \ + --snomed-version 20250201 \ + --overwrite +``` + +Use `--skip-closure` only for importer bring-up/debug. Production builds should include full closure. +Recursive fallback is available but now opt-in (`runtime.hierarchy.closure.fallbackRecursive=true`); default is fail-closed. + +Importer now also builds broad trigram FTS tables used by runtime text filtering: +- `search_fts_display` +- `search_fts_designation` +- `search_fts_literal` + +Runtime is configured FTS-first with LIKE fallback via `runtime.search` in `cs_config`. + +## RxNorm import command + +Use `tx-import`: + +```bash +tx-import rxnorm-sqlite-v0 import \ + --yes \ + --source /path/to/RxNorm_full_02022026.zip \ + --dest /path/to/rxnorm_02022026.v0.db \ + --rxnorm-version 02022026 \ + --overwrite +``` + +Use `--skip-closure` for faster iteration imports. + +## LOINC import command + +Use `tx-import`: + +```bash +tx-import loinc-sqlite-v0 import \ + --yes \ + --source /path/to/Loinc_2.81.zip \ + --dest /path/to/loinc_2.81.v0.db \ + --loinc-version 2.81 \ + --overwrite +``` + +Use `--skip-closure` for faster iteration imports. + +## Runtime source type + +`Library` now accepts: + +- `sqlite-v0:` (preferred generic source type) +- `snomed-sqlite-v0:` (alias to `sqlite-v0`) +- `loinc-sqlite-v0:` (alias to `sqlite-v0`) +- `rxnorm-sqlite-v0:` (alias to `sqlite-v0`) + +Loader behavior is generic. If specialized factory behavior is needed, metadata tags +(`runtime.behaviorFlags.tags`) are matched against factories registered through +`SqliteRuntimeV0FactoryProvider.registerSpecializedFactory(...)`. + +Use `!` after the type to mark the default for a code system when multiple versions are loaded: + +- `sqlite-v0!:sct_intl_20250201.v0.db` (default) +- `sqlite-v0:sct_us_20250301.v0.db` (additional version) + +Example config: `tx/tx.snomed-v0.yml`. diff --git a/tx/importers/sqlite-v2/import-loinc-v0.js b/tx/importers/sqlite-v2/import-loinc-v0.js new file mode 100644 index 0000000..c7996aa --- /dev/null +++ b/tx/importers/sqlite-v2/import-loinc-v0.js @@ -0,0 +1,1512 @@ +'use strict'; + +const fs = require('fs'); +const os = require('os'); +const path = require('path'); +const readline = require('readline'); +const { execFileSync } = require('child_process'); +const sqlite3 = require('sqlite3').verbose(); + +const BASE_URI = 'http://loinc.org'; +const PARENT_PROPERTY_CODE = 'parent'; +const EDGE_SET_PRIMARY = 1; + +const MAX_SQL_PARAMS = 900; +const FLUSH_ROW_TARGET = 5000; + +const PART_TYPE_PROPERTIES = [ + 'COMPONENT', + 'PROPERTY', + 'TIME_ASPCT', + 'SYSTEM', + 'SCALE_TYP', + 'METHOD_TYP', + 'CLASS', + 'DOCUMENT.TYPEOFSETTING', + 'DOCUMENT.TYPEOFSERVICE', + 'DOCUMENT.ROLE', + 'DOCUMENT.SUBJECT', + 'DOCUMENT.KIND', + 'SUPER.SYSTEM', + 'RAD.ANATOMIC.LOCATION', + 'RAD.ANATOMIC.LOCATION.LATERALITY', + 'RAD.ANATOMIC.LOCATION.REGION.IMAGED', + 'RAD.GUIDANCE.FOR.ACTION', + 'RAD.GUIDANCE.FOR.APPROACH', + 'RAD.MANEUVER.MANEUVER.TYPE', + 'RAD.MODALITY.MODALITY.SUBTYPE', + 'RAD.MODALITY.MODALITY.TYPE', + 'RAD.PHARMACEUTICAL.ROUTE', + 'RAD.PHARMACEUTICAL.SUBSTANCE.GIVEN', + 'RAD.REASON.FOR.EXAM', + 'RAD.TIMING', + 'RAD.VIEW.AGGREGATION', + 'RAD.VIEW.VIEW.TYPE', + 'CHALLENGE', + 'ADJUSTMENT', + 'COUNT', + 'DIVISOR', + 'TIME.MODIFIER', + 'SUFFIX' +]; + +const PART_TYPE_NORMALIZATION = { + TIME: 'TIME_ASPCT', + SCALE: 'SCALE_TYP', + METHOD: 'METHOD_TYP' +}; + +const LITERAL_COLUMN_MAP = [ + { property: 'CLASS', column: 'CLASS' }, + { property: 'COMPONENT', column: 'COMPONENT' }, + { property: 'PROPERTY', column: 'PROPERTY' }, + { property: 'TIME_ASPCT', column: 'TIME_ASPCT' }, + { property: 'SYSTEM', column: 'SYSTEM' }, + { property: 'SCALE_TYP', column: 'SCALE_TYP' }, + { property: 'METHOD_TYP', column: 'METHOD_TYP' }, + { property: 'ORDER_OBS', column: 'ORDER_OBS' }, + { property: 'CLASSTYPE', column: 'CLASSTYPE' }, + { property: 'STATUS', column: 'STATUS' }, + { property: 'EXAMPLE_UNITS', column: 'EXAMPLE_UNITS' }, + { property: 'EXAMPLE_UCUM_UNITS', column: 'EXAMPLE_UCUM_UNITS' }, + { property: 'UNITSREQUIRED', column: 'UNITSREQUIRED' }, + { property: 'FORMULA', column: 'FORMULA' }, + { property: 'SURVEY_QUEST_TEXT', column: 'SURVEY_QUEST_TEXT' }, + { property: 'DefinitionDescription', column: 'DefinitionDescription' }, + { property: 'EXTERNAL_COPYRIGHT_NOTICE', column: 'EXTERNAL_COPYRIGHT_NOTICE' }, + { property: 'RELATEDNAMES2', column: 'RELATEDNAMES2' } +]; + +class LoincSqliteV0Importer { + constructor(config = {}) { + const detectedVersion = detectVersionFromPath(config.source); + this.config = { + source: config.source, + dest: config.dest, + version: config.version || detectedVersion || null, + uri: config.uri, + skipClosure: !!config.skipClosure, + verbose: !!config.verbose, + overwrite: !!config.overwrite + }; + + if (!this.config.uri) { + this.config.uri = this.config.version ? `${BASE_URI}|${this.config.version}` : BASE_URI; + } + + this.db = null; + this.csId = null; + this.auditRunId = null; + + this.sourceRoot = null; + this.extractedTempDir = null; + + this.propertyIdByCode = new Map(); + this.conceptIdByCode = new Map(); + this.classPartByName = new Map(); + this.loincClassByCode = new Map(); + + this.nextConceptId = 1; + this.hierarchyPropertyId = null; + + this.stats = { + concepts: 0, + designations: 0, + relationships: 0, + literals: 0, + valueSets: 0, + valueSetMembers: 0, + closureRows: 0, + ftsDisplayRows: 0, + ftsDesignationRows: 0, + ftsLiteralRows: 0 + }; + } + + static discoverCsvFiles(source) { + const files = { + loinc: null, + part: null, + partLink: null, + hierarchy: null, + consumerName: null, + answerList: null, + answerListLink: null, + linguisticVariants: [] + }; + scanDirectoryForLoincFiles(source, files); + return files; + } + + async run() { + if (!this.config.source || !this.config.dest) { + throw new Error('source and dest are required'); + } + + await this.prepareSource(); + const files = LoincSqliteV0Importer.discoverCsvFiles(this.sourceRoot); + if (!files.loinc) { + throw new Error('Loinc.csv was not found'); + } + + await this.openDatabase(); + await this.createSchema(); + + try { + await this.startAudit(); + await this.createCodeSystem(); + await this.ensurePropertyDefinitions(); + + this.log( + `Discovered files: Loinc=${bool(files.loinc)}, Part=${bool(files.part)}, ` + + `PartLink=${bool(files.partLink)}, Hierarchy=${bool(files.hierarchy)}, ` + + `ConsumerName=${bool(files.consumerName)}, AnswerList=${bool(files.answerList)}, ` + + `AnswerLink=${bool(files.answerListLink)}, LingVariants=${files.linguisticVariants.length}` + ); + + await this.importConcepts(files); + await this.importDesignations(files); + await this.importRelationships(files); + await this.importLiterals(files); + await this.buildSearchIndexes(); + + if (!this.config.skipClosure) { + await this.buildClosure(files); + } + + await this.writeCsConfig(); + await this.finalizeDatabase(); + await this.completeAudit('success', null); + } catch (error) { + await this.completeAudit('failed', error); + throw error; + } finally { + await this.closeDatabase(); + await this.cleanupSource(); + } + + return { + csId: this.csId, + uri: this.config.uri, + stats: this.stats + }; + } + + async prepareSource() { + const src = path.resolve(this.config.source); + if (!fs.existsSync(src)) { + throw new Error(`Source does not exist: ${src}`); + } + + const stat = fs.statSync(src); + if (stat.isDirectory()) { + this.sourceRoot = src; + return; + } + + if (!stat.isFile()) { + throw new Error(`Unsupported source type: ${src}`); + } + if (!src.toLowerCase().endsWith('.zip')) { + throw new Error('Source must be a LOINC directory or a .zip file'); + } + + this.extractedTempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'loinc-sqlite-v0-')); + this.log(`Extracting ${src} to ${this.extractedTempDir} ...`); + try { + execFileSync('unzip', ['-q', src, '-d', this.extractedTempDir], { + stdio: 'pipe' + }); + } catch (error) { + throw new Error(`Failed to extract zip '${src}': ${error.message}`); + } + this.sourceRoot = this.extractedTempDir; + } + + async cleanupSource() { + if (this.extractedTempDir && fs.existsSync(this.extractedTempDir)) { + fs.rmSync(this.extractedTempDir, { recursive: true, force: true }); + this.log(`Removed temporary extraction directory: ${this.extractedTempDir}`); + } + this.extractedTempDir = null; + this.sourceRoot = null; + } + + async openDatabase() { + const dir = path.dirname(this.config.dest); + fs.mkdirSync(dir, { recursive: true }); + + if (fs.existsSync(this.config.dest)) { + if (!this.config.overwrite) { + throw new Error(`Destination exists: ${this.config.dest} (use --overwrite)`); + } + fs.unlinkSync(this.config.dest); + } + + this.db = await openSqlite(this.config.dest); + await this.exec('PRAGMA foreign_keys = OFF'); + await this.exec('PRAGMA journal_mode = WAL'); + await this.exec('PRAGMA synchronous = OFF'); + await this.exec('PRAGMA cache_size = -64000'); + await this.exec('PRAGMA temp_store = MEMORY'); + } + + async closeDatabase() { + if (!this.db) return; + await closeSqlite(this.db); + this.db = null; + } + + async createSchema() { + const schemaPath = path.join(__dirname, 'schema-v0.sql'); + const ddl = fs.readFileSync(schemaPath, 'utf8'); + await this.exec(ddl); + } + + async startAudit() { + const result = await this.runSql( + `INSERT INTO load_audit (started_at, source_path, target_db, terminology, edition_code, version, status) + VALUES (CURRENT_TIMESTAMP, ?, ?, 'loinc', NULL, ?, 'running')`, + [this.config.source, this.config.dest, this.config.version || null] + ); + this.auditRunId = result.lastID; + } + + async completeAudit(status, error) { + if (!this.auditRunId) return; + + const payload = { + uri: this.config.uri, + version: this.config.version || null, + stats: this.stats + }; + if (error) { + payload.error = { + message: error.message, + stack: this.config.verbose ? error.stack : undefined + }; + } + + await this.runSql( + `UPDATE load_audit + SET completed_at = CURRENT_TIMESTAMP, + status = ?, + stats_json = ? + WHERE run_id = ?`, + [status, JSON.stringify(payload), this.auditRunId] + ); + } + + async createCodeSystem() { + const result = await this.runSql( + `INSERT INTO code_system (base_uri, edition_code, version, canonical_uri, name, source_kind) + VALUES (?, NULL, ?, ?, 'LOINC', 'loinc-csv')`, + [BASE_URI, this.config.version || null, this.config.uri] + ); + this.csId = result.lastID; + } + + async ensurePropertyDefinitions() { + this.hierarchyPropertyId = await this.ensureProperty(PARENT_PROPERTY_CODE, 'concept', 1, 'parent'); + for (const partType of PART_TYPE_PROPERTIES) { + await this.ensureProperty(partType, 'concept', 0, partType); + } + for (const item of LITERAL_COLUMN_MAP) { + await this.ensureProperty(item.property, 'literal', 0, item.property); + } + await this.ensureProperty('LIST', 'literal', 0, 'LIST'); + await this.ensureProperty('Answer', 'concept', 0, 'Answer'); + await this.ensureProperty('answers-for', 'concept', 0, 'answers-for'); + await this.ensureProperty('AnswerList', 'concept', 0, 'AnswerList'); + } + + async importConcepts(files) { + this.log('Importing concepts...'); + + const rows = []; + let loincCount = 0; + let partCount = 0; + let hierarchyNodeCount = 0; + let answerListCount = 0; + let answerCodeCount = 0; + + for await (const row of readCsv(files.loinc)) { + const code = trim(row.LOINC_NUM); + if (!code) continue; + + const display = trim(row.LONG_COMMON_NAME) || trim(row.DisplayName) || trim(row.SHORTNAME) || code; + const definition = trim(row.DefinitionDescription) || null; + const active = isActiveLoincStatus(row.STATUS) ? 1 : 0; + const conceptId = this.nextConceptId++; + + rows.push([conceptId, this.csId, code, active, display, definition]); + this.conceptIdByCode.set(code, conceptId); + + const className = trim(row.CLASS); + if (className) { + this.loincClassByCode.set(code, className.toUpperCase()); + } + + loincCount += 1; + if (rows.length >= FLUSH_ROW_TARGET) { + await this.bulkInsert( + `INSERT INTO concept (concept_id, cs_id, code, active, display, definition)`, + 6, + rows + ); + rows.length = 0; + } + } + + if (files.part) { + for await (const row of readCsv(files.part)) { + const code = trim(row.PartNumber); + if (!code || this.conceptIdByCode.has(code)) continue; + + // Keep parity with legacy LOINC provider: + // PartName is the primary display; PartDisplayName is a designation. + const display = trim(row.PartName) || trim(row.PartDisplayName) || code; + const active = String(row.Status || '').toUpperCase() === 'ACTIVE' ? 1 : 0; + const conceptId = this.nextConceptId++; + + rows.push([conceptId, this.csId, code, active, display, null]); + this.conceptIdByCode.set(code, conceptId); + partCount += 1; + + const partTypeName = trim(row.PartTypeName); + const partName = trim(row.PartName); + if (partTypeName === 'CLASS' && partName) { + this.classPartByName.set(partName.toUpperCase(), code); + } + + if (rows.length >= FLUSH_ROW_TARGET) { + await this.bulkInsert( + `INSERT INTO concept (concept_id, cs_id, code, active, display, definition)`, + 6, + rows + ); + rows.length = 0; + } + } + } + + if (files.hierarchy) { + for await (const row of readCsv(files.hierarchy)) { + const code = trim(row.CODE); + const parent = trim(row.IMMEDIATE_PARENT); + + if (code && !this.conceptIdByCode.has(code)) { + const conceptId = this.nextConceptId++; + rows.push([conceptId, this.csId, code, 1, trim(row.CODE_TEXT) || code, null]); + this.conceptIdByCode.set(code, conceptId); + hierarchyNodeCount += 1; + } + + if (parent && !this.conceptIdByCode.has(parent)) { + const parentId = this.nextConceptId++; + rows.push([parentId, this.csId, parent, 1, parent, null]); + this.conceptIdByCode.set(parent, parentId); + hierarchyNodeCount += 1; + } + + if (rows.length >= FLUSH_ROW_TARGET) { + await this.bulkInsert( + `INSERT INTO concept (concept_id, cs_id, code, active, display, definition)`, + 6, + rows + ); + rows.length = 0; + } + } + } + + if (files.answerList) { + for await (const row of readCsv(files.answerList)) { + const listCode = trim(row.AnswerListId); + if (listCode && !this.conceptIdByCode.has(listCode)) { + const conceptId = this.nextConceptId++; + const display = trim(row.AnswerListName) || listCode; + rows.push([conceptId, this.csId, listCode, 1, display, trim(row.Description) || null]); + this.conceptIdByCode.set(listCode, conceptId); + answerListCount += 1; + } + + const answerCode = trim(row.AnswerStringId); + if (answerCode && !this.conceptIdByCode.has(answerCode)) { + const conceptId = this.nextConceptId++; + const display = trim(row.DisplayText) || answerCode; + rows.push([conceptId, this.csId, answerCode, 1, display, trim(row.Description) || null]); + this.conceptIdByCode.set(answerCode, conceptId); + answerCodeCount += 1; + } + + if (rows.length >= FLUSH_ROW_TARGET) { + await this.bulkInsert( + `INSERT INTO concept (concept_id, cs_id, code, active, display, definition)`, + 6, + rows + ); + rows.length = 0; + } + } + } + + if (rows.length > 0) { + await this.bulkInsert( + `INSERT INTO concept (concept_id, cs_id, code, active, display, definition)`, + 6, + rows + ); + } + + this.stats.concepts = this.conceptIdByCode.size; + this.log( + `Concept import complete: loinc=${loincCount.toLocaleString()}, parts=${partCount.toLocaleString()}, ` + + `extraHierarchy=${hierarchyNodeCount.toLocaleString()}, ` + + `answerLists=${answerListCount.toLocaleString()}, answers=${answerCodeCount.toLocaleString()}, ` + + `total=${this.stats.concepts.toLocaleString()}` + ); + } + + async importDesignations(files) { + this.log('Importing designations...'); + + const rows = []; + let imported = 0; + + for await (const row of readCsv(files.loinc)) { + const code = trim(row.LOINC_NUM); + const conceptId = this.conceptIdByCode.get(code); + if (!conceptId) continue; + + const active = isActiveLoincStatus(row.STATUS) ? 1 : 0; + + const longName = trim(row.LONG_COMMON_NAME); + if (longName) { + rows.push([conceptId, active, 'en-US', 'LONG_COMMON_NAME', longName, 1]); + imported += 1; + } + + const shortName = trim(row.SHORTNAME); + if (shortName) { + rows.push([conceptId, active, 'en-US', 'SHORTNAME', shortName, 0]); + imported += 1; + } + + const displayName = trim(row.DisplayName); + if (displayName && displayName !== longName) { + rows.push([conceptId, active, 'en-US', 'DisplayName', displayName, 0]); + imported += 1; + } + + const consumerName = trim(row.CONSUMER_NAME); + if (consumerName) { + rows.push([conceptId, active, 'en-US', 'ConsumerName', consumerName, 0]); + imported += 1; + } + + if (rows.length >= FLUSH_ROW_TARGET) { + await this.bulkInsert( + `INSERT INTO designation (concept_id, active, language_code, use_code, term, preferred)`, + 6, + rows + ); + rows.length = 0; + } + } + + if (files.consumerName) { + for await (const row of readCsv(files.consumerName)) { + const code = trim(row.LoincNumber); + const conceptId = this.conceptIdByCode.get(code); + const consumer = trim(row.ConsumerName); + if (!conceptId || !consumer) continue; + + rows.push([conceptId, 1, 'en-US', 'ConsumerName', consumer, 0]); + imported += 1; + + if (rows.length >= FLUSH_ROW_TARGET) { + await this.bulkInsert( + `INSERT INTO designation (concept_id, active, language_code, use_code, term, preferred)`, + 6, + rows + ); + rows.length = 0; + } + } + } + + for (const variantFile of files.linguisticVariants) { + const lang = languageFromVariantFilename(path.basename(variantFile)); + if (!lang) continue; + + for await (const row of readCsv(variantFile)) { + const code = trim(row.LOINC_NUM); + const conceptId = this.conceptIdByCode.get(code); + if (!conceptId) continue; + + const longName = trim(row.LONG_COMMON_NAME); + const shortName = trim(row.SHORTNAME); + const variantDisplay = trim(row.LinguisticVariantDisplayName); + if (longName) { + rows.push([conceptId, 1, lang, 'LONG_COMMON_NAME', longName, 0]); + imported += 1; + } + if (shortName) { + rows.push([conceptId, 1, lang, 'SHORTNAME', shortName, 0]); + imported += 1; + } + if (variantDisplay) { + rows.push([conceptId, 1, lang, 'LinguisticVariantDisplayName', variantDisplay, 0]); + imported += 1; + } + + if (rows.length >= FLUSH_ROW_TARGET) { + await this.bulkInsert( + `INSERT INTO designation (concept_id, active, language_code, use_code, term, preferred)`, + 6, + rows + ); + rows.length = 0; + } + } + } + + if (files.part) { + for await (const row of readCsv(files.part)) { + const code = trim(row.PartNumber); + const conceptId = this.conceptIdByCode.get(code); + if (!conceptId) continue; + + const display = trim(row.PartDisplayName) || trim(row.PartName); + if (!display) continue; + + rows.push([conceptId, 1, 'en-US', 'DisplayName', display, 0]); + imported += 1; + + if (rows.length >= FLUSH_ROW_TARGET) { + await this.bulkInsert( + `INSERT INTO designation (concept_id, active, language_code, use_code, term, preferred)`, + 6, + rows + ); + rows.length = 0; + } + } + } + + if (rows.length > 0) { + await this.bulkInsert( + `INSERT INTO designation (concept_id, active, language_code, use_code, term, preferred)`, + 6, + rows + ); + } + + this.stats.designations = imported; + this.log(`Designation import complete: ${imported.toLocaleString()} rows`); + } + + async importRelationships(files) { + this.log('Importing relationships...'); + + const rows = []; + let imported = 0; + let classLinks = 0; + let hierarchyLinks = 0; + let partLinks = 0; + let answerLinks = 0; + let answerForLinks = 0; + + if (files.partLink) { + for await (const row of readCsv(files.partLink)) { + const sourceCode = trim(row.LoincNumber); + const targetCode = trim(row.PartNumber); + const partTypeRaw = trim(row.PartTypeName); + const partType = PART_TYPE_NORMALIZATION[partTypeRaw] || partTypeRaw; + if (!sourceCode || !targetCode || !partType) continue; + + const sourceConceptId = this.conceptIdByCode.get(sourceCode); + const targetConceptId = this.conceptIdByCode.get(targetCode); + const propertyId = this.propertyIdByCode.get(partType); + if (!sourceConceptId || !targetConceptId || !propertyId) continue; + + rows.push([EDGE_SET_PRIMARY, sourceConceptId, propertyId, targetConceptId, 0, 1]); + imported += 1; + partLinks += 1; + + if (rows.length >= FLUSH_ROW_TARGET) { + await this.bulkInsert( + `INSERT OR IGNORE INTO concept_link (edge_set_id, source_concept_id, property_id, target_concept_id, group_id, active)`, + 6, + rows + ); + rows.length = 0; + } + } + } + + if (files.hierarchy) { + for await (const row of readCsv(files.hierarchy)) { + const childCode = trim(row.CODE); + const parentCode = trim(row.IMMEDIATE_PARENT); + if (!childCode || !parentCode || childCode === parentCode) continue; + + const childConceptId = this.conceptIdByCode.get(childCode); + const parentConceptId = this.conceptIdByCode.get(parentCode); + if (!childConceptId || !parentConceptId) continue; + + rows.push([EDGE_SET_PRIMARY, childConceptId, this.hierarchyPropertyId, parentConceptId, 0, 1]); + imported += 1; + hierarchyLinks += 1; + + if (rows.length >= FLUSH_ROW_TARGET) { + await this.bulkInsert( + `INSERT OR IGNORE INTO concept_link (edge_set_id, source_concept_id, property_id, target_concept_id, group_id, active)`, + 6, + rows + ); + rows.length = 0; + } + } + } + + const classPropertyId = this.propertyIdByCode.get('CLASS'); + if (classPropertyId) { + for (const [loincCode, className] of this.loincClassByCode.entries()) { + const classPartCode = this.classPartByName.get(className); + if (!classPartCode) continue; + + const loincConceptId = this.conceptIdByCode.get(loincCode); + const classConceptId = this.conceptIdByCode.get(classPartCode); + if (!loincConceptId || !classConceptId) continue; + + rows.push([EDGE_SET_PRIMARY, loincConceptId, classPropertyId, classConceptId, 0, 1]); + imported += 1; + classLinks += 1; + + if (rows.length >= FLUSH_ROW_TARGET) { + await this.bulkInsert( + `INSERT OR IGNORE INTO concept_link (edge_set_id, source_concept_id, property_id, target_concept_id, group_id, active)`, + 6, + rows + ); + rows.length = 0; + } + } + } + + const answerPropertyId = this.propertyIdByCode.get('Answer'); + if (answerPropertyId && files.answerList) { + for await (const row of readCsv(files.answerList)) { + const listCode = trim(row.AnswerListId); + const answerCode = trim(row.AnswerStringId); + if (!listCode || !answerCode) continue; + + const listConceptId = this.conceptIdByCode.get(listCode); + const answerConceptId = this.conceptIdByCode.get(answerCode); + if (!listConceptId || !answerConceptId) continue; + + rows.push([EDGE_SET_PRIMARY, listConceptId, answerPropertyId, answerConceptId, 0, 1]); + imported += 1; + answerLinks += 1; + + if (rows.length >= FLUSH_ROW_TARGET) { + await this.bulkInsert( + `INSERT OR IGNORE INTO concept_link (edge_set_id, source_concept_id, property_id, target_concept_id, group_id, active)`, + 6, + rows + ); + rows.length = 0; + } + } + } + + const answersForPropertyId = this.propertyIdByCode.get('answers-for'); + if (answersForPropertyId && files.answerListLink) { + for await (const row of readCsv(files.answerListLink)) { + const loincCode = trim(row.LoincNumber); + const listCode = trim(row.AnswerListId); + if (!loincCode || !listCode) continue; + + const loincConceptId = this.conceptIdByCode.get(loincCode); + const listConceptId = this.conceptIdByCode.get(listCode); + if (!loincConceptId || !listConceptId) continue; + + rows.push([EDGE_SET_PRIMARY, listConceptId, answersForPropertyId, loincConceptId, 0, 1]); + imported += 1; + answerForLinks += 1; + + if (rows.length >= FLUSH_ROW_TARGET) { + await this.bulkInsert( + `INSERT OR IGNORE INTO concept_link (edge_set_id, source_concept_id, property_id, target_concept_id, group_id, active)`, + 6, + rows + ); + rows.length = 0; + } + } + } + + if (rows.length > 0) { + await this.bulkInsert( + `INSERT OR IGNORE INTO concept_link (edge_set_id, source_concept_id, property_id, target_concept_id, group_id, active)`, + 6, + rows + ); + } + + this.stats.relationships = imported; + this.log( + `Relationship import complete: total=${imported.toLocaleString()} ` + + `(part=${partLinks.toLocaleString()}, hierarchy=${hierarchyLinks.toLocaleString()}, ` + + `class=${classLinks.toLocaleString()}, answer=${answerLinks.toLocaleString()}, ` + + `answers-for=${answerForLinks.toLocaleString()})` + ); + } + + async importLiterals(files) { + this.log('Importing literal properties...'); + + const rows = []; + let imported = 0; + for await (const row of readCsv(files.loinc)) { + const code = trim(row.LOINC_NUM); + const conceptId = this.conceptIdByCode.get(code); + if (!conceptId) continue; + + for (const spec of LITERAL_COLUMN_MAP) { + const raw = trim(row[spec.column]); + if (!raw) continue; + const propertyId = this.propertyIdByCode.get(spec.property); + if (!propertyId) continue; + + const parsed = parseLiteralValue(raw); + rows.push([ + EDGE_SET_PRIMARY, + conceptId, + propertyId, + 0, + 1, + raw, + parsed.valueText, + parsed.valueNum, + parsed.valueBool + ]); + imported += 1; + + if (rows.length >= FLUSH_ROW_TARGET) { + await this.bulkInsert( + `INSERT INTO concept_literal (edge_set_id, source_concept_id, property_id, group_id, active, value_raw, value_text, value_num, value_bool)`, + 9, + rows + ); + rows.length = 0; + } + } + } + + const listPropertyId = this.propertyIdByCode.get('LIST'); + if (listPropertyId && files.answerList) { + for await (const row of readCsv(files.answerList)) { + const answerCode = trim(row.AnswerStringId); + const listCode = trim(row.AnswerListId); + const conceptId = this.conceptIdByCode.get(answerCode); + if (!conceptId || !listCode) continue; + + const parsed = parseLiteralValue(listCode); + rows.push([ + EDGE_SET_PRIMARY, + conceptId, + listPropertyId, + 0, + 1, + listCode, + parsed.valueText, + parsed.valueNum, + parsed.valueBool + ]); + imported += 1; + + if (rows.length >= FLUSH_ROW_TARGET) { + await this.bulkInsert( + `INSERT INTO concept_literal (edge_set_id, source_concept_id, property_id, group_id, active, value_raw, value_text, value_num, value_bool)`, + 9, + rows + ); + rows.length = 0; + } + } + } + + if (rows.length > 0) { + await this.bulkInsert( + `INSERT INTO concept_literal (edge_set_id, source_concept_id, property_id, group_id, active, value_raw, value_text, value_num, value_bool)`, + 9, + rows + ); + } + + this.stats.literals = imported; + this.log(`Literal import complete: ${imported.toLocaleString()} rows`); + } + + async buildClosure(files) { + this.log('Building transitive closure...'); + await this.exec('BEGIN TRANSACTION'); + try { + await this.exec('DELETE FROM closure'); + + await this.runSql( + `INSERT OR IGNORE INTO closure (ancestor_id, descendant_id) + SELECT concept_id, concept_id + FROM concept + WHERE cs_id = ?`, + [this.csId] + ); + + let insertedEdges = 0; + if (files.hierarchy) { + const rows = []; + for await (const row of readCsv(files.hierarchy)) { + const code = trim(row.CODE); + const pathToRoot = trim(row.PATH_TO_ROOT); + const descendantId = this.conceptIdByCode.get(code); + if (!descendantId || !pathToRoot) continue; + + const ancestorCodes = pathToRoot.split('.').map(v => v.trim()).filter(Boolean); + for (const ancestorCode of ancestorCodes) { + const ancestorId = this.conceptIdByCode.get(ancestorCode); + if (!ancestorId || ancestorId === descendantId) continue; + rows.push([ancestorId, descendantId]); + insertedEdges += 1; + + if (rows.length >= FLUSH_ROW_TARGET) { + await this.bulkInsert( + `INSERT OR IGNORE INTO closure (ancestor_id, descendant_id)`, + 2, + rows, + { wrapTransaction: false } + ); + rows.length = 0; + } + } + } + + if (rows.length > 0) { + await this.bulkInsert( + `INSERT OR IGNORE INTO closure (ancestor_id, descendant_id)`, + 2, + rows, + { wrapTransaction: false } + ); + } + } else { + await this.runSql( + `INSERT OR IGNORE INTO closure (ancestor_id, descendant_id) + SELECT l.target_concept_id, l.source_concept_id + FROM concept_link l + JOIN concept c ON c.concept_id = l.source_concept_id + WHERE c.cs_id = ? + AND l.active = 1 + AND l.property_id = ? + AND l.edge_set_id = ?`, + [this.csId, this.hierarchyPropertyId, EDGE_SET_PRIMARY] + ); + + await this.exec(` + CREATE TEMP TABLE IF NOT EXISTS _closure_frontier ( + ancestor_id INTEGER NOT NULL, + descendant_id INTEGER NOT NULL, + depth INTEGER NOT NULL, + PRIMARY KEY (ancestor_id, descendant_id) + ) WITHOUT ROWID; + + CREATE TEMP TABLE IF NOT EXISTS _closure_next ( + ancestor_id INTEGER NOT NULL, + descendant_id INTEGER NOT NULL, + depth INTEGER NOT NULL, + PRIMARY KEY (ancestor_id, descendant_id) + ) WITHOUT ROWID; + `); + + await this.exec('DELETE FROM _closure_frontier'); + await this.exec('DELETE FROM _closure_next'); + await this.runSql( + `INSERT OR IGNORE INTO _closure_frontier (ancestor_id, descendant_id, depth) + SELECT l.target_concept_id, l.source_concept_id, 1 + FROM concept_link l + JOIN concept c ON c.concept_id = l.source_concept_id + WHERE c.cs_id = ? + AND l.active = 1 + AND l.property_id = ? + AND l.edge_set_id = ?`, + [this.csId, this.hierarchyPropertyId, EDGE_SET_PRIMARY] + ); + + let keepGoing = true; + while (keepGoing) { + await this.exec('DELETE FROM _closure_next'); + await this.runSql( + `INSERT OR IGNORE INTO _closure_next (ancestor_id, descendant_id, depth) + SELECT f.ancestor_id, l.source_concept_id, f.depth + 1 + FROM _closure_frontier f + JOIN concept_link l + ON l.property_id = ? + AND l.edge_set_id = ? + AND l.active = 1 + AND l.target_concept_id = f.descendant_id + WHERE NOT EXISTS ( + SELECT 1 + FROM closure c + WHERE c.ancestor_id = f.ancestor_id + AND c.descendant_id = l.source_concept_id + )`, + [this.hierarchyPropertyId, EDGE_SET_PRIMARY] + ); + + const nextRow = await this.get('SELECT COUNT(*) AS n FROM _closure_next', []); + const n = nextRow ? nextRow.n : 0; + if (n === 0) { + keepGoing = false; + break; + } + + await this.runSql( + `INSERT OR IGNORE INTO closure (ancestor_id, descendant_id) + SELECT ancestor_id, descendant_id + FROM _closure_next`, + [] + ); + await this.exec('DELETE FROM _closure_frontier'); + await this.exec( + `INSERT OR IGNORE INTO _closure_frontier (ancestor_id, descendant_id, depth) + SELECT ancestor_id, descendant_id, depth + FROM _closure_next` + ); + } + + await this.exec('DELETE FROM _closure_frontier'); + await this.exec('DELETE FROM _closure_next'); + } + + await this.exec('COMMIT'); + if (this.config.verbose) { + this.log(` Closure path edges considered: ${insertedEdges.toLocaleString()}`); + } + } catch (error) { + await this.exec('ROLLBACK'); + throw error; + } + + const row = await this.get('SELECT COUNT(*) AS n FROM closure', []); + this.stats.closureRows = row ? row.n : 0; + this.log(`Closure complete: ${this.stats.closureRows.toLocaleString()} rows`); + } + + async buildSearchIndexes() { + this.log('Building broad text search indexes (display/designation/literal)...'); + await this.exec('BEGIN TRANSACTION'); + try { + await this.exec('DELETE FROM search_fts_display'); + await this.exec('DELETE FROM search_fts_designation'); + await this.exec('DELETE FROM search_fts_literal'); + + const display = await this.runSql( + `INSERT INTO search_fts_display(rowid, term) + SELECT concept_id, trim(display) + FROM concept + WHERE cs_id = ? + AND display IS NOT NULL + AND trim(display) <> ''`, + [this.csId] + ); + + const designation = await this.runSql( + `INSERT INTO search_fts_designation(rowid, term) + SELECT d.designation_id, trim(d.term) + FROM designation d + JOIN concept c ON c.concept_id = d.concept_id + WHERE c.cs_id = ? + AND d.term IS NOT NULL + AND trim(d.term) <> ''`, + [this.csId] + ); + + const literal = await this.runSql( + `INSERT INTO search_fts_literal(rowid, term) + SELECT literal_id, txt + FROM ( + SELECT cl.literal_id AS literal_id, + trim(COALESCE(NULLIF(cl.value_text, ''), NULLIF(cl.value_raw, ''))) AS txt + FROM concept_literal cl + JOIN concept c ON c.concept_id = cl.source_concept_id + WHERE c.cs_id = ? + ) x + WHERE txt IS NOT NULL + AND txt <> ''`, + [this.csId] + ); + + await this.exec(`INSERT INTO search_fts_display(search_fts_display) VALUES ('optimize')`); + await this.exec(`INSERT INTO search_fts_designation(search_fts_designation) VALUES ('optimize')`); + await this.exec(`INSERT INTO search_fts_literal(search_fts_literal) VALUES ('optimize')`); + + await this.exec('COMMIT'); + + this.stats.ftsDisplayRows = display.changes || 0; + this.stats.ftsDesignationRows = designation.changes || 0; + this.stats.ftsLiteralRows = literal.changes || 0; + + this.log( + `Search index complete: display=${this.stats.ftsDisplayRows.toLocaleString()}, ` + + `designation=${this.stats.ftsDesignationRows.toLocaleString()}, ` + + `literal=${this.stats.ftsLiteralRows.toLocaleString()}` + ); + } catch (error) { + await this.exec('ROLLBACK'); + throw error; + } + } + + async writeCsConfig() { + const runtimeSearch = { + mode: 'fts-broad', + activeOnly: true, + designationActiveOnly: true, + literalActiveOnly: true, + sources: ['display', 'designation', 'literal'], + ftsTables: { + display: 'search_fts_display', + designation: 'search_fts_designation', + literal: 'search_fts_literal' + }, + likeFallback: { enabled: true, caseInsensitive: true } + }; + + const runtimeFilters = { + concept: { operators: ['=', 'is-a', 'descendent-of', 'in'], isAIncludesSelf: false }, + code: { operators: ['regex'] }, + properties: { + allPropertiesFilterable: true, + defaultOperators: ['='], + defaultSources: ['literal', 'link'], + defaultLinkMatch: 'code-or-display', + defaultValue: { normalizeCase: true }, + aliases: { + 'document-kind': 'DOCUMENT.KIND' + }, + byCode: { + CLASS: { + operators: ['=', 'regex'], + sources: ['literal', 'link'], + linkMatch: 'code-or-display' + }, + COMPONENT: { + operators: ['=', 'regex'], + sources: ['literal', 'link'], + linkMatch: 'code-or-display' + }, + PROPERTY: { + operators: ['=', 'regex'], + sources: ['literal', 'link'], + linkMatch: 'code-or-display' + }, + TIME_ASPCT: { + operators: ['=', 'regex'], + sources: ['literal', 'link'], + linkMatch: 'code-or-display' + }, + SYSTEM: { + operators: ['=', 'regex'], + sources: ['literal', 'link'], + linkMatch: 'code-or-display' + }, + SCALE_TYP: { + operators: ['='], + sources: ['literal', 'link'], + linkMatch: 'code-or-display', + value: { + normalizeCase: true, + aliases: { + doc: 'Doc', + 'lp32888-7': 'Doc' + } + } + }, + METHOD_TYP: { + operators: ['=', 'regex'], + sources: ['literal', 'link'], + linkMatch: 'code-or-display' + }, + ORDER_OBS: { + operators: ['='], + sources: ['literal'], + value: { + normalizeCase: true, + aliases: { + order: 'Order', + observation: 'Observation', + both: 'Both' + } + } + }, + CLASSTYPE: { + operators: ['='], + sources: ['literal'], + value: { + normalizeCase: true, + aliases: { + 'laboratory class': '1', + 'clinical class': '2', + 'claims attachments': '3', + surveys: '4' + } + } + }, + STATUS: { + operators: ['='], + sources: ['literal'], + value: { + normalizeCase: true, + aliases: { + active: 'ACTIVE', + inactive: 'INACTIVE' + } + } + }, + LIST: { + operators: ['='], + sources: ['literal'] + }, + 'DOCUMENT.KIND': { + operators: ['=', 'exists'], + sources: ['link'], + linkMatch: 'code-or-display' + }, + 'answers-for': { + operators: ['=', 'in'], + sources: ['link'], + specialHandler: { + kind: 'derived-link-filter', + seed: { + // Raw LL* values are already answer-list concept codes. + directCodePrefixes: ['LL'], + // Non-LL inputs can be resolved to answer-list codes through inverse links. + inversePropertyCode: 'answers-for' + }, + projection: { + // Then project list -> answer links to produce the final candidate code set. + propertyCode: 'Answer', + side: 'target' + } + } + } + } + } + }; + + const runtimeImplicitValueSets = { + all: { queries: ['fhir_vs', 'fhir_vs=all'] }, + isa: { queryPrefix: 'fhir_vs=isa/', filter: { property: 'concept', op: 'is-a', valueFromSuffix: true } } + }; + + const runtimeDesignations = { + defaultSystem: BASE_URI, + useMapping: { + LONG_COMMON_NAME: { system: BASE_URI, code: 'LONG_COMMON_NAME', display: 'Long common name' }, + SHORTNAME: { system: BASE_URI, code: 'SHORTNAME', display: 'Short name' }, + DisplayName: { system: BASE_URI, code: 'DisplayName', display: 'Display name' }, + ConsumerName: { system: BASE_URI, code: 'ConsumerName', display: 'Consumer name' } + } + }; + + const configRows = [ + ['runtime.versioning', JSON.stringify({ algorithm: 'string', partialMatch: false, output: 'version' })], + ['runtime.languages', JSON.stringify({ default: 'en-US' })], + ['runtime.designations', JSON.stringify(runtimeDesignations)], + ['runtime.hierarchy', JSON.stringify({ + propertyCode: PARENT_PROPERTY_CODE, + edgeSetId: EDGE_SET_PRIMARY, + closure: { enabled: true, fallbackRecursive: false } + })], + ['runtime.filters', JSON.stringify(runtimeFilters)], + ['runtime.implicitValueSets', JSON.stringify(runtimeImplicitValueSets)], + ['runtime.status', JSON.stringify({ + inactive: { source: 'concept.active', invert: true }, + statusProperty: 'STATUS', + deprecated: { source: 'constant', value: false }, + abstract: { source: 'constant', value: false } + })], + ['runtime.iteration', JSON.stringify({ + defaultCodeRegex: '^[0-9]{3,}.*', + rootMode: 'all', + children: false + })], + ['runtime.search', JSON.stringify(runtimeSearch)], + ['runtime.behaviorFlags', JSON.stringify({ + tags: ['loinc', 'implicit-vs-path'] + })] + ]; + + for (const [key, value] of configRows) { + await this.runSql( + `INSERT OR REPLACE INTO cs_config (cs_id, key, value) + VALUES (?, ?, ?)`, + [this.csId, key, typeof value === 'string' ? value : JSON.stringify(value)] + ); + } + } + + async finalizeDatabase() { + this.log('Finalizing SQLite database...'); + await this.exec('ANALYZE'); + await this.exec('PRAGMA journal_mode = DELETE'); + await this.exec('PRAGMA synchronous = NORMAL'); + await this.exec('VACUUM'); + } + + async ensureProperty(propertyCode, valueKind, isHierarchy, display) { + if (!propertyCode) return null; + if (this.propertyIdByCode.has(propertyCode)) { + return this.propertyIdByCode.get(propertyCode); + } + + await this.runSql( + `INSERT OR IGNORE INTO property_def (cs_id, property_code, value_kind, is_hierarchy, display) + VALUES (?, ?, ?, ?, ?)`, + [this.csId, propertyCode, valueKind, isHierarchy, display || propertyCode] + ); + + const row = await this.get( + `SELECT property_id + FROM property_def + WHERE cs_id = ? AND property_code = ?`, + [this.csId, propertyCode] + ); + if (!row) return null; + + this.propertyIdByCode.set(propertyCode, row.property_id); + return row.property_id; + } + + async bulkInsert(sqlPrefix, columnCount, rows, options = {}) { + if (!rows.length) return; + + const chunkSize = Math.max(1, Math.floor(MAX_SQL_PARAMS / columnCount)); + const wrapTransaction = options.wrapTransaction !== false; + + if (wrapTransaction) { + await this.exec('BEGIN TRANSACTION'); + } + + try { + for (let i = 0; i < rows.length; i += chunkSize) { + const chunk = rows.slice(i, i + chunkSize); + const placeholders = chunk.map(() => `(${new Array(columnCount).fill('?').join(',')})`).join(','); + const flat = []; + for (const row of chunk) { + for (const value of row) flat.push(value); + } + await this.runSql(`${sqlPrefix} VALUES ${placeholders}`, flat); + } + if (wrapTransaction) { + await this.exec('COMMIT'); + } + } catch (error) { + if (wrapTransaction) { + await this.exec('ROLLBACK'); + } + throw error; + } + } + + async runSql(sql, params = []) { + return new Promise((resolve, reject) => { + this.db.run(sql, params, function onRun(err) { + if (err) reject(err); + else resolve({ changes: this.changes || 0, lastID: this.lastID }); + }); + }); + } + + async get(sql, params = []) { + return new Promise((resolve, reject) => { + this.db.get(sql, params, (err, row) => { + if (err) reject(err); + else resolve(row); + }); + }); + } + + async exec(sql) { + return new Promise((resolve, reject) => { + this.db.exec(sql, (err) => { + if (err) reject(err); + else resolve(); + }); + }); + } + + log(message) { + if (!this.config.verbose) return; + console.log(message); + } +} + +function detectVersionFromPath(value) { + if (!value) return null; + const text = String(value); + const match = text.match(/Loinc[_-]?(\d+\.\d+(?:\.\d+)?)/i); + return match ? match[1] : null; +} + +function isActiveLoincStatus(status) { + const s = String(status || '').toUpperCase(); + return s === 'ACTIVE' || s === 'TRIAL'; +} + +function parseLiteralValue(raw) { + const text = String(raw || '').trim(); + if (!text) { + return { valueText: null, valueNum: null, valueBool: null }; + } + + if (text === 'true' || text === 'false') { + return { valueText: text, valueNum: null, valueBool: text === 'true' ? 1 : 0 }; + } + + if (/^-?\d+(\.\d+)?$/.test(text)) { + const n = Number(text); + return { + valueText: text, + valueNum: Number.isFinite(n) ? n : null, + valueBool: null + }; + } + + return { valueText: text, valueNum: null, valueBool: null }; +} + +function trim(value) { + if (value === null || value === undefined) return ''; + return String(value).trim(); +} + +function scanDirectoryForLoincFiles(dir, files) { + if (!dir || !fs.existsSync(dir)) return; + + const entries = fs.readdirSync(dir, { withFileTypes: true }); + for (const entry of entries) { + const fullPath = path.join(dir, entry.name); + if (entry.isDirectory()) { + if (!entry.name.startsWith('.')) { + scanDirectoryForLoincFiles(fullPath, files); + } + continue; + } + if (!entry.isFile()) continue; + + const name = entry.name; + if (name === 'Loinc.csv') files.loinc = fullPath; + else if (name === 'Part.csv') files.part = fullPath; + else if (name === 'LoincPartLink_Primary.csv') files.partLink = fullPath; + else if (name === 'ComponentHierarchyBySystem.csv') files.hierarchy = fullPath; + else if (name === 'ConsumerName.csv') files.consumerName = fullPath; + else if (name === 'AnswerList.csv') files.answerList = fullPath; + else if (name === 'LoincAnswerListLink.csv') files.answerListLink = fullPath; + else if (name.endsWith('LinguisticVariant.csv')) files.linguisticVariants.push(fullPath); + } +} + +function languageFromVariantFilename(fileName) { + const match = fileName.match(/^([a-z]{2})([A-Z]{2})/); + if (!match) return null; + return `${match[1]}-${match[2]}`; +} + +function parseCsvLine(line) { + const result = []; + let field = ''; + let inQuotes = false; + + for (let i = 0; i < line.length; i += 1) { + const c = line[i]; + if (inQuotes) { + if (c === '"') { + if (i + 1 < line.length && line[i + 1] === '"') { + field += '"'; + i += 1; + } else { + inQuotes = false; + } + } else { + field += c; + } + } else if (c === '"') { + inQuotes = true; + } else if (c === ',') { + result.push(field); + field = ''; + } else { + field += c; + } + } + result.push(field); + return result; +} + +async function* readCsv(filePath) { + const stream = fs.createReadStream(filePath); + const rl = readline.createInterface({ input: stream, crlfDelay: Infinity }); + + let headers = null; + for await (const line of rl) { + if (!headers) { + headers = parseCsvLine(line).map((h) => h.replace(/^\uFEFF/, '')); + continue; + } + if (!line) continue; + + const values = parseCsvLine(line); + const row = {}; + for (let i = 0; i < headers.length; i += 1) { + row[headers[i]] = values[i] || ''; + } + yield row; + } +} + +function openSqlite(filePath) { + return new Promise((resolve, reject) => { + const db = new sqlite3.Database(filePath, (err) => { + if (err) reject(err); + else resolve(db); + }); + }); +} + +function closeSqlite(db) { + return new Promise((resolve, reject) => { + db.close((err) => { + if (err) reject(err); + else resolve(); + }); + }); +} + +function bool(value) { + return value ? 'yes' : 'no'; +} + +module.exports = { + LoincSqliteV0Importer, + constants: { + BASE_URI, + PARENT_PROPERTY_CODE, + EDGE_SET_PRIMARY, + PART_TYPE_PROPERTIES, + PART_TYPE_NORMALIZATION, + LITERAL_COLUMN_MAP + } +}; diff --git a/tx/importers/sqlite-v2/import-rxnorm-v0.js b/tx/importers/sqlite-v2/import-rxnorm-v0.js new file mode 100644 index 0000000..0d9d8ec --- /dev/null +++ b/tx/importers/sqlite-v2/import-rxnorm-v0.js @@ -0,0 +1,1343 @@ +'use strict'; + +const fs = require('fs'); +const os = require('os'); +const path = require('path'); +const readline = require('readline'); +const { execFileSync } = require('child_process'); +const sqlite3 = require('sqlite3').verbose(); + +const BASE_URI = 'http://www.nlm.nih.gov/research/umls/rxnorm'; +const IS_A_PROPERTY_CODE = 'isa'; +const TTY_PROPERTY_CODE = 'TTY'; +const STY_PROPERTY_CODE = 'STY'; + +const EDGE_SET_INFERRED = 1; + +const MAX_SQL_PARAMS = 900; +const FLUSH_ROW_TARGET = 5000; + +const TTY_PRIORITY = ['PSN', 'SCD', 'SBD', 'GPCK', 'BPCK', 'IN', 'MIN', 'PIN', 'BN']; +const PREFERRED_TTYS = new Set(['PSN', 'SCD', 'SBD']); + +const IMPORTABLE_ATNS = new Set([ + 'NDC', + 'RXN_AVAILABLE_STRENGTH', + 'RXN_HUMAN_DRUG', + 'RXN_VET_DRUG', + 'RXN_STRENGTH', + 'RXN_QUANTITY', + 'RXTERM_FORM', + 'RXN_ACTIVATED', + 'RXN_OBSOLETED', + 'RXN_QUALITATIVE_DISTINCTION', + 'RXN_BN_CARDINALITY', + 'RXN_IN_EXPRESSED_FLAG', + 'RXCUI_STATUS' +]); + +class RxNormSqliteV0Importer { + constructor(config = {}) { + this.config = { + source: config.source, + dest: config.dest, + version: normalizeVersion(config.version) || detectVersionFromPath(config.source), + uri: config.uri, + skipClosure: false, // Closure is always built — required for is-a queries + verbose: !!config.verbose, + overwrite: !!config.overwrite + }; + + if (!this.config.uri) { + this.config.uri = this.config.version ? `${BASE_URI}|${this.config.version}` : BASE_URI; + } + + this.db = null; + this.csId = null; + this.auditRunId = null; + + this.sourceRoot = null; + this.extractedTempDir = null; + + this.propertyIdByCode = new Map(); + this.conceptIdByCode = new Map(); + this.nextConceptId = 1; + this.isAPropertyId = null; + this.ttyPropertyId = null; + + this.stats = { + concepts: 0, + designations: 0, + relationships: 0, + literals: 0, + closureRows: 0, + ftsDisplayRows: 0, + ftsDesignationRows: 0, + ftsLiteralRows: 0 + }; + } + + static discoverRrfFiles(source) { + const files = { + rxnconso: null, + rxnrel: null, + rxnsat: null, + rxnsab: null, + rxnsty: null, + rxnatomarchive: null + }; + scanDirectoryForRrf(source, files); + return files; + } + + async run() { + if (!this.config.source || !this.config.dest) { + throw new Error('source and dest are required'); + } + + await this.prepareSource(); + const files = RxNormSqliteV0Importer.discoverRrfFiles(this.sourceRoot); + + if (!files.rxnconso) { + throw new Error('RXNCONSO.RRF was not found'); + } + + if (!this.config.version) { + this.config.version = await detectVersionFromRxnSab(files.rxnsab); + if (this.config.version && this.config.uri === BASE_URI) { + this.config.uri = `${BASE_URI}|${this.config.version}`; + } + } + + await this.openDatabase(); + await this.createSchema(); + + try { + await this.startAudit(); + await this.createCodeSystem(); + + this.log( + `Discovered files: RXNCONSO=${bool(files.rxnconso)}, RXNREL=${bool(files.rxnrel)}, ` + + `RXNSAT=${bool(files.rxnsat)}, RXNSAB=${bool(files.rxnsab)}, RXNSTY=${bool(files.rxnsty)}, ` + + `RXNATOMARCHIVE=${bool(files.rxnatomarchive)}` + ); + + await this.importConcepts(files.rxnconso); + await this.importArchivedConcepts(files.rxnatomarchive); + await this.importDesignations(files.rxnconso); + await this.importRelationships(files.rxnrel); + await this.importAttributes(files.rxnsat); + await this.importSemanticTypes(files.rxnsty); + await this.buildSearchIndexes(); + await this.buildClosure(); + + await this.writeCsConfig(); + await this.finalizeDatabase(); + await this.completeAudit('success', null); + } catch (error) { + await this.completeAudit('failed', error); + throw error; + } finally { + await this.closeDatabase(); + await this.cleanupSource(); + } + + return { + csId: this.csId, + uri: this.config.uri, + stats: this.stats + }; + } + + async prepareSource() { + const src = path.resolve(this.config.source); + if (!fs.existsSync(src)) { + throw new Error(`Source does not exist: ${src}`); + } + + const stat = fs.statSync(src); + if (stat.isDirectory()) { + this.sourceRoot = src; + return; + } + + if (!stat.isFile()) { + throw new Error(`Unsupported source type: ${src}`); + } + + if (!src.toLowerCase().endsWith('.zip')) { + throw new Error('Source must be an RXNORM directory or a .zip file'); + } + + this.extractedTempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'rxnorm-sqlite-v0-')); + this.log(`Extracting ${src} to ${this.extractedTempDir} ...`); + + try { + execFileSync('unzip', ['-q', src, '-d', this.extractedTempDir], { + stdio: 'pipe' + }); + } catch (error) { + throw new Error(`Failed to extract zip '${src}': ${error.message}`); + } + + this.sourceRoot = this.extractedTempDir; + } + + async cleanupSource() { + if (this.extractedTempDir && fs.existsSync(this.extractedTempDir)) { + fs.rmSync(this.extractedTempDir, { recursive: true, force: true }); + this.log(`Removed temporary extraction directory: ${this.extractedTempDir}`); + } + this.extractedTempDir = null; + this.sourceRoot = null; + } + + async openDatabase() { + const dir = path.dirname(this.config.dest); + fs.mkdirSync(dir, { recursive: true }); + + if (fs.existsSync(this.config.dest)) { + if (!this.config.overwrite) { + throw new Error(`Destination exists: ${this.config.dest} (use --overwrite)`); + } + fs.unlinkSync(this.config.dest); + } + + this.db = await openSqlite(this.config.dest); + await this.exec('PRAGMA foreign_keys = OFF'); + await this.exec('PRAGMA journal_mode = WAL'); + await this.exec('PRAGMA synchronous = OFF'); + await this.exec('PRAGMA cache_size = -64000'); + await this.exec('PRAGMA temp_store = MEMORY'); + } + + async closeDatabase() { + if (!this.db) return; + await closeSqlite(this.db); + this.db = null; + } + + async createSchema() { + const schemaPath = path.join(__dirname, 'schema-v0.sql'); + const ddl = fs.readFileSync(schemaPath, 'utf8'); + await this.exec(ddl); + } + + async startAudit() { + const result = await this.runSql( + `INSERT INTO load_audit (started_at, source_path, target_db, terminology, edition_code, version, status) + VALUES (CURRENT_TIMESTAMP, ?, ?, 'rxnorm', NULL, ?, 'running')`, + [this.config.source, this.config.dest, this.config.version || null] + ); + this.auditRunId = result.lastID; + } + + async completeAudit(status, error) { + if (!this.auditRunId) return; + + const payload = { + uri: this.config.uri, + version: this.config.version || null, + stats: this.stats + }; + + if (error) { + payload.error = { + message: error.message, + stack: this.config.verbose ? error.stack : undefined + }; + } + + await this.runSql( + `UPDATE load_audit + SET completed_at = CURRENT_TIMESTAMP, + status = ?, + stats_json = ? + WHERE run_id = ?`, + [status, JSON.stringify(payload), this.auditRunId] + ); + } + + async createCodeSystem() { + const result = await this.runSql( + `INSERT INTO code_system (base_uri, edition_code, version, canonical_uri, name, source_kind) + VALUES (?, NULL, ?, ?, 'RxNorm', 'rrf')`, + [BASE_URI, this.config.version || null, this.config.uri] + ); + this.csId = result.lastID; + + await this.runSql( + `INSERT OR IGNORE INTO property_def (cs_id, property_code, value_kind, is_hierarchy, display) + VALUES (?, ?, 'concept', 1, 'is-a')`, + [this.csId, IS_A_PROPERTY_CODE] + ); + const isARow = await this.get( + `SELECT property_id + FROM property_def + WHERE cs_id = ? AND property_code = ?`, + [this.csId, IS_A_PROPERTY_CODE] + ); + if (!isARow) { + throw new Error(`Unable to resolve property_id for '${IS_A_PROPERTY_CODE}'`); + } + this.isAPropertyId = isARow.property_id; + this.propertyIdByCode.set(IS_A_PROPERTY_CODE, isARow.property_id); + + await this.runSql( + `INSERT OR IGNORE INTO property_def (cs_id, property_code, value_kind, is_hierarchy, display) + VALUES (?, ?, 'literal', 0, 'TTY')`, + [this.csId, TTY_PROPERTY_CODE] + ); + const ttyRow = await this.get( + `SELECT property_id + FROM property_def + WHERE cs_id = ? AND property_code = ?`, + [this.csId, TTY_PROPERTY_CODE] + ); + if (!ttyRow) { + throw new Error(`Unable to resolve property_id for '${TTY_PROPERTY_CODE}'`); + } + this.ttyPropertyId = ttyRow.property_id; + this.propertyIdByCode.set(TTY_PROPERTY_CODE, ttyRow.property_id); + + await this.runSql( + `INSERT OR IGNORE INTO property_def (cs_id, property_code, value_kind, is_hierarchy, display) + VALUES (?, ?, 'literal', 0, 'STY')`, + [this.csId, STY_PROPERTY_CODE] + ); + const styRow = await this.get( + `SELECT property_id + FROM property_def + WHERE cs_id = ? AND property_code = ?`, + [this.csId, STY_PROPERTY_CODE] + ); + if (!styRow) { + throw new Error(`Unable to resolve property_id for '${STY_PROPERTY_CODE}'`); + } + this.styPropertyId = styRow.property_id; + this.propertyIdByCode.set(STY_PROPERTY_CODE, styRow.property_id); + } + + async importConcepts(rxnconsoFile) { + this.log('Importing concepts from RXNCONSO.RRF...'); + + const concepts = new Map(); + let scanned = 0; + let matched = 0; + + for await (const cols of readRrf(rxnconsoFile)) { + if (cols.length < 17) continue; + + scanned += 1; + + const rxcui = cols[0]; + const sab = cols[11]; + const tty = cols[12]; + const str = (cols[14] || '').trim(); + const suppress = cols[16]; + + if (sab !== 'RXNORM' || !rxcui) continue; + matched += 1; + + const rank = ttyRank(tty); + const active = isSuppressed(suppress) ? 0 : 1; + const existing = concepts.get(rxcui); + if (!existing) { + const ttys = new Map(); + if (tty) { + ttys.set(tty, active); + } + concepts.set(rxcui, { + display: str || rxcui, + active, + tty: tty || null, + rank, + ttys + }); + } else { + if (active === 1) { + existing.active = 1; + } + if (tty) { + const prev = existing.ttys.get(tty) || 0; + if (!existing.ttys.has(tty) || active > prev) { + existing.ttys.set(tty, active); + } + } + + // Keep the best display according to configured TTY priority. + if ((str && rank < existing.rank) || !existing.display) { + existing.display = str || rxcui; + existing.rank = rank; + existing.tty = tty || existing.tty; + } + } + } + + this.log(` scanned rows: ${scanned.toLocaleString()}, RXNORM rows: ${matched.toLocaleString()}`); + this.log(` unique RXCUIs: ${concepts.size.toLocaleString()}`); + + const conceptRows = []; + const ttyLiteralRows = []; + let ttyLiteralCount = 0; + let imported = 0; + + for (const [rxcui, info] of concepts.entries()) { + const conceptId = this.nextConceptId++; + this.conceptIdByCode.set(rxcui, conceptId); + + conceptRows.push([ + conceptId, + this.csId, + rxcui, + info.active, + info.display || rxcui, + null + ]); + + for (const [ttyValue, ttyActive] of info.ttys.entries()) { + ttyLiteralRows.push([ + EDGE_SET_INFERRED, + conceptId, + this.ttyPropertyId, + 0, + ttyActive, + ttyValue, + ttyValue, + null, + null + ]); + ttyLiteralCount += 1; + } + + imported += 1; + + if (conceptRows.length >= FLUSH_ROW_TARGET) { + await this.bulkInsert( + `INSERT INTO concept (concept_id, cs_id, code, active, display, definition)`, + 6, + conceptRows + ); + conceptRows.length = 0; + } + + if (ttyLiteralRows.length >= FLUSH_ROW_TARGET) { + await this.bulkInsert( + `INSERT INTO concept_literal (edge_set_id, source_concept_id, property_id, group_id, active, value_raw, value_text, value_num, value_bool)`, + 9, + ttyLiteralRows + ); + ttyLiteralRows.length = 0; + } + } + + if (conceptRows.length > 0) { + await this.bulkInsert( + `INSERT INTO concept (concept_id, cs_id, code, active, display, definition)`, + 6, + conceptRows + ); + } + if (ttyLiteralRows.length > 0) { + await this.bulkInsert( + `INSERT INTO concept_literal (edge_set_id, source_concept_id, property_id, group_id, active, value_raw, value_text, value_num, value_bool)`, + 9, + ttyLiteralRows + ); + } + + this.stats.concepts = imported; + this.stats.literals += ttyLiteralCount; + this.log(`Concept import complete: ${imported.toLocaleString()} concepts`); + } + + async importArchivedConcepts(archiveFile) { + if (!archiveFile) { + this.log('RXNATOMARCHIVE.RRF not found — skipping archived concept import'); + return; + } + this.log('Importing archived concepts from RXNATOMARCHIVE.RRF...'); + + // RXNATOMARCHIVE columns: RXAUI[0] AUI[1] STR[2] ARCHIVE_TIMESTAMP[3] + // CREATED_TIMESTAMP[4] UPDATED_TIMESTAMP[5] CODE[6] IS_BRAND[7] LAT[8] + // LAST_RELEASED[9] SAUI[10] VSAB[11] RXCUI[12] SAB[13] TTY[14] MERGED_TO_RXCUI[15] + + // First pass: collect best display per RXCUI not already in concept table + const archived = new Map(); + let scanned = 0; + + for await (const cols of readRrf(archiveFile)) { + if (cols.length < 16) continue; + scanned += 1; + + const sab = cols[13]; + if (sab !== 'RXNORM') continue; + + const rxcui = cols[12]; + if (!rxcui || this.conceptIdByCode.has(rxcui)) continue; + + const str = (cols[2] || '').trim(); + const tty = cols[14] || null; + const rank = ttyRank(tty); + + const existing = archived.get(rxcui); + if (!existing || rank < existing.rank) { + archived.set(rxcui, { display: str || rxcui, tty, rank }); + } + } + + if (archived.size === 0) { + this.log(` scanned ${scanned.toLocaleString()} rows, no new archived concepts`); + return; + } + + const conceptRows = []; + const ttyLiteralRows = []; + let imported = 0; + + for (const [rxcui, info] of archived.entries()) { + const conceptId = this.nextConceptId++; + this.conceptIdByCode.set(rxcui, conceptId); + + conceptRows.push([conceptId, this.csId, rxcui, 0, info.display, null]); + + if (info.tty) { + ttyLiteralRows.push([ + EDGE_SET_INFERRED, conceptId, this.ttyPropertyId, 0, 0, + info.tty, info.tty, null, null + ]); + } + imported += 1; + + if (conceptRows.length >= FLUSH_ROW_TARGET) { + await this.bulkInsert( + `INSERT INTO concept (concept_id, cs_id, code, active, display, definition)`, + 6, conceptRows + ); + conceptRows.length = 0; + } + if (ttyLiteralRows.length >= FLUSH_ROW_TARGET) { + await this.bulkInsert( + `INSERT INTO concept_literal (edge_set_id, source_concept_id, property_id, group_id, active, value_raw, value_text, value_num, value_bool)`, + 9, ttyLiteralRows + ); + ttyLiteralRows.length = 0; + } + } + + if (conceptRows.length > 0) { + await this.bulkInsert( + `INSERT INTO concept (concept_id, cs_id, code, active, display, definition)`, + 6, conceptRows + ); + } + if (ttyLiteralRows.length > 0) { + await this.bulkInsert( + `INSERT INTO concept_literal (edge_set_id, source_concept_id, property_id, group_id, active, value_raw, value_text, value_num, value_bool)`, + 9, ttyLiteralRows + ); + } + + this.stats.concepts += imported; + this.stats.literals += ttyLiteralRows.length; + this.log(`Archived concept import complete: ${imported.toLocaleString()} concepts from ${scanned.toLocaleString()} archive rows`); + } + + async importDesignations(rxnconsoFile) { + this.log('Importing designations from RXNCONSO.RRF...'); + + const rows = []; + let imported = 0; + + for await (const cols of readRrf(rxnconsoFile)) { + if (cols.length < 17) continue; + + const rxcui = cols[0]; + const sab = cols[11]; + const tty = cols[12]; + const str = (cols[14] || '').trim(); + const suppress = cols[16]; + + if (sab !== 'RXNORM' || !rxcui || !str) continue; + + const conceptId = this.conceptIdByCode.get(rxcui); + if (!conceptId) continue; + + rows.push([ + conceptId, + isSuppressed(suppress) ? 0 : 1, + 'en', + tty || null, + str, + PREFERRED_TTYS.has(tty) ? 1 : 0 + ]); + imported += 1; + + if (rows.length >= FLUSH_ROW_TARGET) { + await this.bulkInsert( + `INSERT INTO designation (concept_id, active, language_code, use_code, term, preferred)`, + 6, + rows + ); + rows.length = 0; + } + } + + if (rows.length > 0) { + await this.bulkInsert( + `INSERT INTO designation (concept_id, active, language_code, use_code, term, preferred)`, + 6, + rows + ); + } + + this.stats.designations = imported; + this.log(`Designation import complete: ${imported.toLocaleString()} rows`); + } + + async importRelationships(rxnrelFile) { + if (!rxnrelFile) { + this.log('RXNREL.RRF not found; skipping relationships'); + return; + } + + this.log('Importing relationships from RXNREL.RRF...'); + + const relaCodes = new Set(); + for await (const cols of readRrf(rxnrelFile)) { + if (cols.length < 15) continue; + const sab = cols[10]; + const rela = cols[7]; + if (sab === 'RXNORM' && rela) { + relaCodes.add(rela); + } + } + + for (const rela of relaCodes) { + await this.ensureProperty(rela, 'concept', rela === IS_A_PROPERTY_CODE ? 1 : 0); + } + + const rows = []; + let imported = 0; + let skipped = 0; + + for await (const cols of readRrf(rxnrelFile)) { + if (cols.length < 15) continue; + + const rxcui1 = cols[0]; + const rxcui2 = cols[4]; + const rela = cols[7]; + const sab = cols[10]; + const suppress = cols[14]; + + if (sab !== 'RXNORM' || !rela) { + skipped += 1; + continue; + } + + const sourceConceptId = this.conceptIdByCode.get(rxcui2); + const targetConceptId = this.conceptIdByCode.get(rxcui1); + const propertyId = this.propertyIdByCode.get(rela); + + if (!sourceConceptId || !targetConceptId || !propertyId) { + skipped += 1; + continue; + } + + rows.push([ + EDGE_SET_INFERRED, + sourceConceptId, + propertyId, + targetConceptId, + 0, + isSuppressed(suppress) ? 0 : 1 + ]); + imported += 1; + + if (rows.length >= FLUSH_ROW_TARGET) { + await this.bulkInsert( + `INSERT OR IGNORE INTO concept_link (edge_set_id, source_concept_id, property_id, target_concept_id, group_id, active)`, + 6, + rows + ); + rows.length = 0; + } + } + + if (rows.length > 0) { + await this.bulkInsert( + `INSERT OR IGNORE INTO concept_link (edge_set_id, source_concept_id, property_id, target_concept_id, group_id, active)`, + 6, + rows + ); + } + + this.stats.relationships = imported; + this.log(`Relationship import complete: ${imported.toLocaleString()} rows (skipped ${skipped.toLocaleString()})`); + } + + async importAttributes(rxnsatFile) { + if (!rxnsatFile) { + this.log('RXNSAT.RRF not found; skipping attributes'); + return; + } + + this.log('Importing attributes from RXNSAT.RRF...'); + + const atnCodes = new Set(); + for await (const cols of readRrf(rxnsatFile)) { + if (cols.length < 13) continue; + const atn = cols[8]; + const sab = cols[9]; + if (sab === 'RXNORM' && atn && IMPORTABLE_ATNS.has(atn)) { + atnCodes.add(atn); + } + } + + for (const atn of atnCodes) { + await this.ensureProperty(atn, 'literal', 0); + } + + const rows = []; + let imported = 0; + + for await (const cols of readRrf(rxnsatFile)) { + if (cols.length < 13) continue; + + const rxcui = cols[0]; + const atn = cols[8]; + const sab = cols[9]; + const atv = (cols[10] || '').trim(); + const suppress = cols[11]; + + if (sab !== 'RXNORM' || !IMPORTABLE_ATNS.has(atn) || !atv) continue; + + const conceptId = this.conceptIdByCode.get(rxcui); + const propertyId = this.propertyIdByCode.get(atn); + if (!conceptId || !propertyId) continue; + + const value = parseAttributeLiteral(atv); + rows.push([ + EDGE_SET_INFERRED, + conceptId, + propertyId, + 0, + isSuppressed(suppress) ? 0 : 1, + atv, + value.valueText, + value.valueNum, + value.valueBool + ]); + imported += 1; + + if (rows.length >= FLUSH_ROW_TARGET) { + await this.bulkInsert( + `INSERT INTO concept_literal (edge_set_id, source_concept_id, property_id, group_id, active, value_raw, value_text, value_num, value_bool)`, + 9, + rows + ); + rows.length = 0; + } + } + + if (rows.length > 0) { + await this.bulkInsert( + `INSERT INTO concept_literal (edge_set_id, source_concept_id, property_id, group_id, active, value_raw, value_text, value_num, value_bool)`, + 9, + rows + ); + } + + this.stats.literals += imported; + this.log(`Attribute import complete: ${imported.toLocaleString()} rows`); + } + + async importSemanticTypes(rxnstyFile) { + if (!rxnstyFile) { + this.log('RXNSTY.RRF not found; skipping semantic types'); + return; + } + + this.log('Importing semantic types from RXNSTY.RRF...'); + + const rows = []; + let imported = 0; + + for await (const cols of readRrf(rxnstyFile)) { + if (cols.length < 4) continue; + + const rxcui = cols[0]; + const tui = cols[1]; + if (!rxcui || !tui) continue; + + const conceptId = this.conceptIdByCode.get(rxcui); + if (!conceptId) continue; + + rows.push([ + EDGE_SET_INFERRED, + conceptId, + this.styPropertyId, + 0, + 1, + tui, + tui, + null, + null + ]); + imported += 1; + + if (rows.length >= FLUSH_ROW_TARGET) { + await this.bulkInsert( + `INSERT INTO concept_literal (edge_set_id, source_concept_id, property_id, group_id, active, value_raw, value_text, value_num, value_bool)`, + 9, + rows + ); + rows.length = 0; + } + } + + if (rows.length > 0) { + await this.bulkInsert( + `INSERT INTO concept_literal (edge_set_id, source_concept_id, property_id, group_id, active, value_raw, value_text, value_num, value_bool)`, + 9, + rows + ); + } + + this.stats.literals += imported; + this.log(`Semantic type import complete: ${imported.toLocaleString()} rows`); + } + + async buildClosure() { + this.log('Building transitive closure (is-a)...'); + if (!this.isAPropertyId) { + this.log('No is-a property id found; skipping closure'); + return; + } + + await this.exec('BEGIN TRANSACTION'); + try { + await this.exec('DELETE FROM closure'); + + await this.exec(` + CREATE TEMP TABLE IF NOT EXISTS _closure_frontier ( + ancestor_id INTEGER NOT NULL, + descendant_id INTEGER NOT NULL, + depth INTEGER NOT NULL, + PRIMARY KEY (ancestor_id, descendant_id) + ) WITHOUT ROWID; + + CREATE TEMP TABLE IF NOT EXISTS _closure_next ( + ancestor_id INTEGER NOT NULL, + descendant_id INTEGER NOT NULL, + depth INTEGER NOT NULL, + PRIMARY KEY (ancestor_id, descendant_id) + ) WITHOUT ROWID; + + CREATE INDEX IF NOT EXISTS _idx_closure_frontier_desc + ON _closure_frontier(descendant_id, ancestor_id); + `); + + await this.exec('DELETE FROM _closure_frontier'); + await this.exec('DELETE FROM _closure_next'); + + await this.runSql( + `INSERT OR IGNORE INTO closure (ancestor_id, descendant_id) + SELECT concept_id, concept_id + FROM concept + WHERE cs_id = ?`, + [this.csId] + ); + + await this.runSql( + `INSERT OR IGNORE INTO closure (ancestor_id, descendant_id) + SELECT l.target_concept_id, l.source_concept_id + FROM concept_link l + JOIN concept c ON c.concept_id = l.source_concept_id + WHERE c.cs_id = ? + AND l.active = 1 + AND l.property_id = ? + AND l.edge_set_id = ?`, + [this.csId, this.isAPropertyId, EDGE_SET_INFERRED] + ); + + await this.runSql( + `INSERT OR IGNORE INTO _closure_frontier (ancestor_id, descendant_id, depth) + SELECT l.target_concept_id, l.source_concept_id, 1 + FROM concept_link l + JOIN concept c ON c.concept_id = l.source_concept_id + WHERE c.cs_id = ? + AND l.active = 1 + AND l.property_id = ? + AND l.edge_set_id = ?`, + [this.csId, this.isAPropertyId, EDGE_SET_INFERRED] + ); + + let iteration = 0; + let cumulativeNew = 0; + let hasNext = true; + while (hasNext) { + await this.exec('DELETE FROM _closure_next'); + + await this.runSql( + `INSERT OR IGNORE INTO _closure_next (ancestor_id, descendant_id, depth) + SELECT f.ancestor_id, l.source_concept_id, f.depth + 1 + FROM _closure_frontier f + JOIN concept_link l + ON l.property_id = ? + AND l.edge_set_id = ? + AND l.active = 1 + AND l.target_concept_id = f.descendant_id + WHERE NOT EXISTS ( + SELECT 1 + FROM closure c + WHERE c.ancestor_id = f.ancestor_id + AND c.descendant_id = l.source_concept_id + )`, + [this.isAPropertyId, EDGE_SET_INFERRED] + ); + + const nextRow = await this.get('SELECT COUNT(*) AS n FROM _closure_next', []); + const nextCount = nextRow ? nextRow.n : 0; + if (nextCount === 0) { + hasNext = false; + if (this.config.verbose) { + this.log(` closure iteration ${iteration + 1}: +0 rows`); + } + break; + } + + await this.runSql( + `INSERT OR IGNORE INTO closure (ancestor_id, descendant_id) + SELECT ancestor_id, descendant_id + FROM _closure_next`, + [] + ); + + await this.exec('DELETE FROM _closure_frontier'); + await this.exec( + `INSERT OR IGNORE INTO _closure_frontier (ancestor_id, descendant_id, depth) + SELECT ancestor_id, descendant_id, depth + FROM _closure_next` + ); + + iteration += 1; + cumulativeNew += nextCount; + if (this.config.verbose || iteration % 5 === 0) { + this.log(` closure iteration ${iteration}: +${nextCount.toLocaleString()} rows (cumulative ${cumulativeNew.toLocaleString()})`); + } + } + + await this.exec('DELETE FROM _closure_frontier'); + await this.exec('DELETE FROM _closure_next'); + + await this.exec('COMMIT'); + } catch (error) { + await this.exec('ROLLBACK'); + throw error; + } + + const row = await this.get('SELECT COUNT(*) AS n FROM closure', []); + this.stats.closureRows = row ? row.n : 0; + this.log(`Closure complete: ${this.stats.closureRows.toLocaleString()} rows`); + } + + async buildSearchIndexes() { + this.log('Building broad text search indexes (display/designation/literal)...'); + + await this.exec('BEGIN TRANSACTION'); + try { + await this.exec('DELETE FROM search_fts_display'); + await this.exec('DELETE FROM search_fts_designation'); + await this.exec('DELETE FROM search_fts_literal'); + + const display = await this.runSql( + `INSERT INTO search_fts_display(rowid, term) + SELECT concept_id, trim(display) + FROM concept + WHERE cs_id = ? + AND display IS NOT NULL + AND trim(display) <> ''`, + [this.csId] + ); + + const designation = await this.runSql( + `INSERT INTO search_fts_designation(rowid, term) + SELECT d.designation_id, trim(d.term) + FROM designation d + JOIN concept c ON c.concept_id = d.concept_id + WHERE c.cs_id = ? + AND d.term IS NOT NULL + AND trim(d.term) <> ''`, + [this.csId] + ); + + const literal = await this.runSql( + `INSERT INTO search_fts_literal(rowid, term) + SELECT literal_id, txt + FROM ( + SELECT cl.literal_id AS literal_id, + trim(COALESCE(NULLIF(cl.value_text, ''), NULLIF(cl.value_raw, ''))) AS txt + FROM concept_literal cl + JOIN concept c ON c.concept_id = cl.source_concept_id + WHERE c.cs_id = ? + ) x + WHERE txt IS NOT NULL + AND txt <> ''`, + [this.csId] + ); + + await this.exec(`INSERT INTO search_fts_display(search_fts_display) VALUES ('optimize')`); + await this.exec(`INSERT INTO search_fts_designation(search_fts_designation) VALUES ('optimize')`); + await this.exec(`INSERT INTO search_fts_literal(search_fts_literal) VALUES ('optimize')`); + + await this.exec('COMMIT'); + + this.stats.ftsDisplayRows = display.changes || 0; + this.stats.ftsDesignationRows = designation.changes || 0; + this.stats.ftsLiteralRows = literal.changes || 0; + + this.log( + `Search index complete: display=${this.stats.ftsDisplayRows.toLocaleString()}, ` + + `designation=${this.stats.ftsDesignationRows.toLocaleString()}, ` + + `literal=${this.stats.ftsLiteralRows.toLocaleString()}` + ); + } catch (error) { + await this.exec('ROLLBACK'); + throw error; + } + } + + async writeCsConfig() { + const runtimeSearch = { + mode: 'fts-broad', + activeOnly: true, + designationActiveOnly: true, + literalActiveOnly: true, + sources: ['display', 'designation', 'literal'], + ftsTables: { + display: 'search_fts_display', + designation: 'search_fts_designation', + literal: 'search_fts_literal' + }, + likeFallback: { enabled: true, caseInsensitive: true } + }; + + const runtimeFilters = { + concept: { operators: ['=', 'is-a', 'descendent-of'] }, + code: { operators: ['regex'] }, + properties: { + aliases: { + tty: TTY_PROPERTY_CODE, + TTY: TTY_PROPERTY_CODE, + sty: STY_PROPERTY_CODE + }, + byCode: { + [TTY_PROPERTY_CODE]: { + operators: ['=', 'in'], + sources: ['literal'], + value: { + normalizeCase: true + } + }, + [STY_PROPERTY_CODE]: { + operators: ['=', 'in'], + sources: ['literal'] + } + } + } + }; + + const runtimeDesignations = { + defaultSystem: BASE_URI, + useMapping: { + PSN: { system: BASE_URI, code: 'PSN', display: 'Prescribable Name' }, + SCD: { system: BASE_URI, code: 'SCD', display: 'Semantic Clinical Drug' }, + SBD: { system: BASE_URI, code: 'SBD', display: 'Semantic Branded Drug' }, + SY: { system: BASE_URI, code: 'SY', display: 'Synonym' } + } + }; + + const configRows = [ + ['runtime.versioning', JSON.stringify({ algorithm: 'string', partialMatch: false })], + ['runtime.languages', JSON.stringify({ default: 'en' })], + ['runtime.designations', JSON.stringify(runtimeDesignations)], + ['runtime.hierarchy', JSON.stringify({ + propertyCode: IS_A_PROPERTY_CODE, + edgeSetId: EDGE_SET_INFERRED, + closure: { enabled: true, fallbackRecursive: false } + })], + ['runtime.filters', JSON.stringify(runtimeFilters)], + ['runtime.implicitValueSets', JSON.stringify({ + all: { queries: ['fhir_vs', 'fhir_vs=all'] }, + isa: { queryPrefix: 'fhir_vs=isa/', filter: { property: 'concept', op: 'is-a', valueFromSuffix: true } } + })], + ['runtime.status', JSON.stringify({ + inactive: { source: 'concept.active', invert: true }, + deprecated: { source: 'constant', value: false }, + abstract: { source: 'constant', value: false } + })], + ['runtime.search', JSON.stringify(runtimeSearch)], + ['runtime.behaviorFlags', JSON.stringify({ + tags: ['rxnorm'] + })] + ]; + + for (const [key, value] of configRows) { + await this.runSql( + `INSERT OR REPLACE INTO cs_config (cs_id, key, value) + VALUES (?, ?, ?)`, + [this.csId, key, typeof value === 'string' ? value : JSON.stringify(value)] + ); + } + } + + async finalizeDatabase() { + this.log('Finalizing SQLite database...'); + await this.exec('ANALYZE'); + await this.exec('PRAGMA journal_mode = DELETE'); + await this.exec('PRAGMA synchronous = NORMAL'); + await this.exec('VACUUM'); + } + + async ensureProperty(propertyCode, valueKind, isHierarchy) { + if (!propertyCode) return null; + if (this.propertyIdByCode.has(propertyCode)) { + return this.propertyIdByCode.get(propertyCode); + } + + await this.runSql( + `INSERT OR IGNORE INTO property_def (cs_id, property_code, value_kind, is_hierarchy) + VALUES (?, ?, ?, ?)`, + [this.csId, propertyCode, valueKind, isHierarchy] + ); + + const row = await this.get( + `SELECT property_id + FROM property_def + WHERE cs_id = ? AND property_code = ?`, + [this.csId, propertyCode] + ); + if (!row) return null; + + this.propertyIdByCode.set(propertyCode, row.property_id); + if (propertyCode === IS_A_PROPERTY_CODE) { + this.isAPropertyId = row.property_id; + } + return row.property_id; + } + + async bulkInsert(sqlPrefix, columnCount, rows) { + if (!rows.length) return; + + const chunkSize = Math.max(1, Math.floor(MAX_SQL_PARAMS / columnCount)); + await this.exec('BEGIN TRANSACTION'); + + try { + for (let i = 0; i < rows.length; i += chunkSize) { + const chunk = rows.slice(i, i + chunkSize); + const placeholders = chunk.map(() => `(${new Array(columnCount).fill('?').join(',')})`).join(','); + const flat = []; + for (const row of chunk) { + for (const value of row) flat.push(value); + } + await this.runSql(`${sqlPrefix} VALUES ${placeholders}`, flat); + } + + await this.exec('COMMIT'); + } catch (error) { + await this.exec('ROLLBACK'); + throw error; + } + } + + async runSql(sql, params = []) { + return new Promise((resolve, reject) => { + this.db.run(sql, params, function onRun(err) { + if (err) { + reject(err); + } else { + resolve({ changes: this.changes || 0, lastID: this.lastID }); + } + }); + }); + } + + async get(sql, params = []) { + return new Promise((resolve, reject) => { + this.db.get(sql, params, (err, row) => { + if (err) reject(err); + else resolve(row); + }); + }); + } + + async exec(sql) { + return new Promise((resolve, reject) => { + this.db.exec(sql, (err) => { + if (err) reject(err); + else resolve(); + }); + }); + } + + log(message) { + if (!this.config.verbose) return; + console.log(message); + } +} + +function normalizeVersion(version) { + if (!version) return null; + const text = String(version).trim(); + if (/^\d{8}$/.test(text)) { + return text; + } + return null; +} + +function detectVersionFromPath(value) { + if (!value) return null; + const text = String(value); + const specific = text.match(/RxNorm[_-]full[_-](\d{8})/i); + if (specific) { + return specific[1]; + } + const generic = text.match(/(\d{8})/); + return generic ? generic[1] : null; +} + +async function detectVersionFromRxnSab(rxnsabFile) { + if (!rxnsabFile || !fs.existsSync(rxnsabFile)) { + return null; + } + + for await (const cols of readRrf(rxnsabFile)) { + if (cols.length < 7) continue; + const rsab = cols[3]; + const sver = cols[6] || ''; + if (rsab !== 'RXNORM') continue; + + // Typical form: 20AA_250804F -> YYMMDD embedded. + const yyMMdd = sver.match(/(\d{6})/); + if (yyMMdd) { + const y = yyMMdd[1].slice(0, 2); + const m = yyMMdd[1].slice(2, 4); + const d = yyMMdd[1].slice(4, 6); + return `${m}${d}20${y}`; + } + } + return null; +} + +function ttyRank(tty) { + const idx = TTY_PRIORITY.indexOf(tty); + return idx === -1 ? Number.MAX_SAFE_INTEGER : idx; +} + +function isSuppressed(suppressFlag) { + return suppressFlag === 'O' || suppressFlag === 'E'; +} + +function parseAttributeLiteral(raw) { + const text = String(raw || '').trim(); + if (!text) { + return { + valueText: null, + valueNum: null, + valueBool: null + }; + } + + if (text === 'true' || text === 'false') { + return { + valueText: text, + valueNum: null, + valueBool: text === 'true' ? 1 : 0 + }; + } + + if (/^-?\d+(\.\d+)?$/.test(text)) { + const n = Number(text); + return { + valueText: text, + valueNum: Number.isFinite(n) ? n : null, + valueBool: null + }; + } + + return { + valueText: text, + valueNum: null, + valueBool: null + }; +} + +function scanDirectoryForRrf(dir, files) { + if (!dir || !fs.existsSync(dir)) return; + + const entries = fs.readdirSync(dir, { withFileTypes: true }); + for (const entry of entries) { + const fullPath = path.join(dir, entry.name); + if (entry.isDirectory()) { + if (!entry.name.startsWith('.')) { + scanDirectoryForRrf(fullPath, files); + } + continue; + } + if (!entry.isFile()) continue; + + const name = entry.name.toUpperCase(); + if (name === 'RXNCONSO.RRF') files.rxnconso = fullPath; + else if (name === 'RXNREL.RRF') files.rxnrel = fullPath; + else if (name === 'RXNSAT.RRF') files.rxnsat = fullPath; + else if (name === 'RXNSAB.RRF') files.rxnsab = fullPath; + else if (name === 'RXNSTY.RRF') files.rxnsty = fullPath; + else if (name === 'RXNATOMARCHIVE.RRF') files.rxnatomarchive = fullPath; + } +} + +async function* readRrf(filePath) { + const stream = fs.createReadStream(filePath); + const rl = readline.createInterface({ input: stream, crlfDelay: Infinity }); + + for await (const line of rl) { + if (!line) continue; + yield line.split('|'); + } +} + +function openSqlite(filePath) { + return new Promise((resolve, reject) => { + const db = new sqlite3.Database(filePath, (err) => { + if (err) reject(err); + else resolve(db); + }); + }); +} + +function closeSqlite(db) { + return new Promise((resolve, reject) => { + db.close((err) => { + if (err) reject(err); + else resolve(); + }); + }); +} + +function bool(value) { + return value ? 'yes' : 'no'; +} + +module.exports = { + RxNormSqliteV0Importer, + constants: { + BASE_URI, + IS_A_PROPERTY_CODE, + TTY_PROPERTY_CODE, + EDGE_SET_INFERRED, + TTY_PRIORITY, + PREFERRED_TTYS, + IMPORTABLE_ATNS + } +}; diff --git a/tx/importers/sqlite-v2/import-snomed-v0.js b/tx/importers/sqlite-v2/import-snomed-v0.js new file mode 100644 index 0000000..d892884 --- /dev/null +++ b/tx/importers/sqlite-v2/import-snomed-v0.js @@ -0,0 +1,1167 @@ +'use strict'; + +const fs = require('fs'); +const path = require('path'); +const readline = require('readline'); +const sqlite3 = require('sqlite3').verbose(); + +const BASE_URI = 'http://snomed.info/sct'; + +const IS_A_TYPE_ID = '116680003'; +const FSN_TYPE_ID = '900000000000003001'; +const SYNONYM_TYPE_ID = '900000000000013009'; + +const CHAR_INFERRED = '900000000000011006'; +const CHAR_STATED = '900000000000010007'; +const CHAR_ADDITIONAL = '900000000000227009'; + +const ACCEPTABILITY_PREFERRED = '900000000000548007'; + +const EDGE_SET_INFERRED = 1; +const EDGE_SET_STATED = 2; +const EDGE_SET_ADDITIONAL = 3; + +const MAX_SQL_PARAMS = 900; +const FLUSH_ROW_TARGET = 5000; + +class SnomedSqliteV0Importer { + constructor(config = {}) { + this.config = { + source: config.source, + dest: config.dest, + edition: config.edition || '900000000000207008', + version: config.version, + uri: config.uri, + snapshotOnly: config.snapshotOnly !== false, + skipRefsets: !!config.skipRefsets, + skipClosure: !!config.skipClosure, + verbose: !!config.verbose, + overwrite: !!config.overwrite + }; + + this.db = null; + this.csId = null; + this.auditRunId = null; + + this.preferredDescriptions = new Set(); + this.seenPropertyCodes = new Set(); + this.propertyIdByCode = new Map(); + this.conceptIdByCode = new Map(); + this.nextConceptId = 1; + this.isAPropertyId = null; + + this.stats = { + concepts: 0, + descriptions: 0, + relationships: 0, + concreteValues: 0, + refsets: 0, + refsetMembers: 0, + closureRows: 0, + ftsDisplayRows: 0, + ftsDesignationRows: 0, + ftsLiteralRows: 0 + }; + + const parsed = parseEditionAndVersion(this.config.uri); + if (parsed.edition && !config.edition) { + this.config.edition = parsed.edition; + } + if (parsed.version && !config.version) { + this.config.version = parsed.version; + } + if (!this.config.uri && this.config.edition && this.config.version) { + this.config.uri = `${BASE_URI}/${this.config.edition}/version/${this.config.version}`; + } + } + + static discoverRf2Files(source, { snapshotOnly = true } = {}) { + const files = { + concepts: [], + descriptions: [], + relationships: [], + concreteValues: [], + languageRefsets: [], + refsets: [] + }; + + scanDirectory(source, files, snapshotOnly); + return files; + } + + async run() { + if (!this.config.source || !this.config.dest) { + throw new Error('source and dest are required'); + } + if (!this.config.uri) { + throw new Error('Either uri or (edition + version) is required'); + } + if (!this.config.version) { + throw new Error('Version (YYYYMMDD) is required for v0 imports'); + } + + await this.openDatabase(); + await this.createSchema(); + + try { + await this.startAudit(); + await this.createCodeSystem(); + + const files = SnomedSqliteV0Importer.discoverRf2Files(this.config.source, { + snapshotOnly: this.config.snapshotOnly + }); + + this.log(`Discovered files: concepts=${files.concepts.length}, descriptions=${files.descriptions.length}, relationships=${files.relationships.length}, concrete=${files.concreteValues.length}, languageRefsets=${files.languageRefsets.length}, refsets=${files.refsets.length}`); + + if (files.concepts.length === 0) { + throw new Error('No concept Snapshot files found'); + } + if (files.descriptions.length === 0) { + throw new Error('No description Snapshot files found'); + } + + await this.importLanguagePreferences(files.languageRefsets); + await this.importConcepts(files.concepts); + await this.importDescriptions(files.descriptions); + await this.deriveConceptDisplays(); + await this.importRelationships(files.relationships); + await this.importConcreteValues(files.concreteValues); + + if (!this.config.skipRefsets) { + await this.importRefsets(files.refsets); + } + + await this.buildSearchIndexes(); + + if (!this.config.skipClosure) { + await this.buildClosure(); + } + + await this.writeCsConfig(); + await this.finalizeDatabase(); + await this.completeAudit('success', null); + } catch (error) { + await this.completeAudit('failed', error); + throw error; + } finally { + await this.closeDatabase(); + } + + return { + csId: this.csId, + uri: this.config.uri, + stats: this.stats + }; + } + + async openDatabase() { + const dir = path.dirname(this.config.dest); + fs.mkdirSync(dir, { recursive: true }); + + if (fs.existsSync(this.config.dest)) { + if (!this.config.overwrite) { + throw new Error(`Destination exists: ${this.config.dest} (use --overwrite)`); + } + fs.unlinkSync(this.config.dest); + } + + this.db = await openSqlite(this.config.dest); + + await this.exec('PRAGMA foreign_keys = OFF'); + await this.exec('PRAGMA journal_mode = WAL'); + await this.exec('PRAGMA synchronous = OFF'); + await this.exec('PRAGMA cache_size = -64000'); + await this.exec('PRAGMA temp_store = MEMORY'); + } + + async closeDatabase() { + if (!this.db) return; + await closeSqlite(this.db); + this.db = null; + } + + async createSchema() { + const schemaPath = path.join(__dirname, 'schema-v0.sql'); + const ddl = fs.readFileSync(schemaPath, 'utf8'); + await this.exec(ddl); + } + + async startAudit() { + const result = await this.runSql( + `INSERT INTO load_audit (started_at, source_path, target_db, terminology, edition_code, version, status) + VALUES (CURRENT_TIMESTAMP, ?, ?, 'snomed', ?, ?, 'running')`, + [this.config.source, this.config.dest, this.config.edition || null, this.config.version || null] + ); + this.auditRunId = result.lastID; + } + + async completeAudit(status, error) { + if (!this.auditRunId) return; + + const payload = { + uri: this.config.uri, + stats: this.stats + }; + + if (error) { + payload.error = { + message: error.message, + stack: this.config.verbose ? error.stack : undefined + }; + } + + await this.runSql( + `UPDATE load_audit + SET completed_at = CURRENT_TIMESTAMP, + status = ?, + stats_json = ? + WHERE run_id = ?`, + [status, JSON.stringify(payload), this.auditRunId] + ); + } + + async createCodeSystem() { + const result = await this.runSql( + `INSERT INTO code_system (base_uri, edition_code, version, canonical_uri, name, source_kind) + VALUES (?, ?, ?, ?, ?, ?)`, + [ + BASE_URI, + this.config.edition || null, + this.config.version || null, + this.config.uri, + snomedName(this.config.edition), + 'rf2-snapshot' + ] + ); + + this.csId = result.lastID; + + await this.runSql( + `INSERT OR IGNORE INTO property_def (cs_id, property_code, value_kind, is_hierarchy, display) + VALUES (?, ?, 'concept', 1, 'is-a')`, + [this.csId, IS_A_TYPE_ID] + ); + this.seenPropertyCodes.add(IS_A_TYPE_ID); + const isARow = await this.get( + `SELECT property_id + FROM property_def + WHERE cs_id = ? AND property_code = ?`, + [this.csId, IS_A_TYPE_ID] + ); + if (!isARow) { + throw new Error(`Unable to resolve property_id for ${IS_A_TYPE_ID}`); + } + this.propertyIdByCode.set(IS_A_TYPE_ID, isARow.property_id); + this.isAPropertyId = isARow.property_id; + } + + async importLanguagePreferences(files) { + if (!files || files.length === 0) { + this.log('No language refset files found; preferred flags will be limited'); + return; + } + + this.log(`Importing language preference markers from ${files.length} files...`); + + let count = 0; + for (const file of files) { + for await (const cols of readTsv(file)) { + if (cols.length < 7) continue; + const active = cols[2] === '1'; + const descriptionId = cols[5]; + const acceptabilityId = cols[6]; + + if (!active) continue; + if (acceptabilityId !== ACCEPTABILITY_PREFERRED) continue; + + this.preferredDescriptions.add(descriptionId); + count += 1; + } + } + + this.log(`Captured ${this.preferredDescriptions.size.toLocaleString()} preferred description ids (${count.toLocaleString()} active rows)`); + } + + async importConcepts(files) { + this.log(`Importing concepts from ${files.length} files...`); + + const rows = []; + let imported = 0; + + for (const file of files) { + for await (const cols of readTsv(file)) { + if (cols.length < 5) continue; + + const code = cols[0]; + const active = cols[2] === '1' ? 1 : 0; + const conceptId = this.nextConceptId++; + + rows.push([ + conceptId, + this.csId, + code, + active, + null, + null + ]); + + this.conceptIdByCode.set(code, conceptId); + imported += 1; + + if (rows.length >= FLUSH_ROW_TARGET) { + await this.bulkInsert( + `INSERT INTO concept (concept_id, cs_id, code, active, display, definition)`, + 6, + rows + ); + rows.length = 0; + this.log(` concepts imported: ${imported.toLocaleString()}`); + } + } + } + + if (rows.length > 0) { + await this.bulkInsert( + `INSERT INTO concept (concept_id, cs_id, code, active, display, definition)`, + 6, + rows + ); + } + + this.stats.concepts = imported; + this.log(`Concept import complete: ${imported.toLocaleString()}`); + } + + async importDescriptions(files) { + this.log(`Importing descriptions from ${files.length} files...`); + + const rows = []; + let imported = 0; + + for (const file of files) { + for await (const cols of readTsv(file)) { + if (cols.length < 9) continue; + + const descriptionId = cols[0]; + const active = cols[2] === '1' ? 1 : 0; + const conceptCode = cols[4]; + const languageCode = cols[5] || null; + const typeId = cols[6] || null; + const term = cols[7] || ''; + const conceptId = this.conceptIdByCode.get(conceptCode); + + if (!conceptId) continue; + + const useCode = mapUseCode(typeId); + const preferred = (typeId === FSN_TYPE_ID || this.preferredDescriptions.has(descriptionId)) ? 1 : 0; + + rows.push([ + conceptId, + active, + languageCode, + useCode, + term, + preferred + ]); + + imported += 1; + + if (rows.length >= FLUSH_ROW_TARGET) { + await this.bulkInsert( + `INSERT INTO designation (concept_id, active, language_code, use_code, term, preferred)`, + 6, + rows + ); + rows.length = 0; + this.log(` descriptions imported: ${imported.toLocaleString()}`); + } + } + } + + if (rows.length > 0) { + await this.bulkInsert( + `INSERT INTO designation (concept_id, active, language_code, use_code, term, preferred)`, + 6, + rows + ); + } + + this.stats.descriptions = imported; + this.log(`Description import complete: ${imported.toLocaleString()}`); + } + + async deriveConceptDisplays() { + this.log('Deriving concept display values from designations...'); + + await this.runSql( + `UPDATE concept + SET display = COALESCE( + ( + SELECT d.term + FROM designation d + WHERE d.concept_id = concept.concept_id + AND d.active = 1 + ORDER BY d.designation_id ASC + LIMIT 1 + ), + concept.code + ) + WHERE cs_id = ?`, + [this.csId] + ); + } + + async importRelationships(files) { + this.log(`Importing relationships from ${files.length} files...`); + + const rows = []; + let imported = 0; + + for (const file of files) { + for await (const cols of readTsv(file)) { + if (cols.length < 10) continue; + + const active = cols[2] === '1' ? 1 : 0; + const sourceCode = cols[4]; + const targetCode = cols[5]; + const groupId = parseInt(cols[6], 10) || 0; + const typeId = cols[7] || null; + const characteristicTypeId = cols[8] || null; + const sourceConceptId = this.conceptIdByCode.get(sourceCode); + const targetConceptId = this.conceptIdByCode.get(targetCode); + if (!sourceConceptId || !targetConceptId) continue; + + const propertyId = await this.ensureProperty(typeId, 'concept', typeId === IS_A_TYPE_ID ? 1 : 0); + if (!propertyId) continue; + + rows.push([ + edgeSetIdFromCharacteristic(characteristicTypeId), + sourceConceptId, + propertyId, + targetConceptId, + groupId, + active, + ]); + + imported += 1; + + if (rows.length >= FLUSH_ROW_TARGET) { + await this.bulkInsert( + `INSERT INTO concept_link (edge_set_id, source_concept_id, property_id, target_concept_id, group_id, active)`, + 6, + rows + ); + rows.length = 0; + this.log(` relationships imported: ${imported.toLocaleString()}`); + } + } + } + + if (rows.length > 0) { + await this.bulkInsert( + `INSERT INTO concept_link (edge_set_id, source_concept_id, property_id, target_concept_id, group_id, active)`, + 6, + rows + ); + } + + this.stats.relationships = imported; + this.log(`Relationship import complete: ${imported.toLocaleString()}`); + } + + async importConcreteValues(files) { + if (!files || files.length === 0) { + this.log('No concrete value files found; skipping'); + return; + } + + this.log(`Importing concrete values from ${files.length} files...`); + + const rows = []; + let imported = 0; + + for (const file of files) { + for await (const cols of readTsv(file)) { + if (cols.length < 10) continue; + + const active = cols[2] === '1' ? 1 : 0; + const sourceCode = cols[4]; + const rawValue = cols[5]; + const groupId = parseInt(cols[6], 10) || 0; + const typeId = cols[7] || null; + const characteristicTypeId = cols[8] || null; + const sourceConceptId = this.conceptIdByCode.get(sourceCode); + if (!sourceConceptId) continue; + + const propertyId = await this.ensureProperty(typeId, 'literal', 0); + if (!propertyId) continue; + + const parsed = parseConcreteValue(rawValue); + + rows.push([ + edgeSetIdFromCharacteristic(characteristicTypeId), + sourceConceptId, + propertyId, + groupId, + active, + rawValue, + parsed.valueText, + parsed.valueNum, + parsed.valueBool, + ]); + + imported += 1; + + if (rows.length >= FLUSH_ROW_TARGET) { + await this.bulkInsert( + `INSERT INTO concept_literal (edge_set_id, source_concept_id, property_id, group_id, active, value_raw, value_text, value_num, value_bool)`, + 9, + rows + ); + rows.length = 0; + this.log(` concrete values imported: ${imported.toLocaleString()}`); + } + } + } + + if (rows.length > 0) { + await this.bulkInsert( + `INSERT INTO concept_literal (edge_set_id, source_concept_id, property_id, group_id, active, value_raw, value_text, value_num, value_bool)`, + 9, + rows + ); + } + + this.stats.concreteValues = imported; + this.log(`Concrete value import complete: ${imported.toLocaleString()}`); + } + + async importRefsets(files) { + if (!files || files.length === 0) { + this.log('No refset files found; skipping'); + return; + } + + this.log(`Importing refsets from ${files.length} files...`); + + const memberRows = []; + const seenRefsets = new Map(); + let memberCount = 0; + + for (const file of files) { + for await (const cols of readTsv(file)) { + if (cols.length < 6) continue; + + const active = cols[2] === '1' ? 1 : 0; + if (!active) continue; + + const refsetId = cols[4]; + const componentId = cols[5]; + const conceptId = this.conceptIdByCode.get(componentId); + + if (!refsetId || !componentId) continue; + if (!conceptId) continue; + + const vsUrl = `${BASE_URI}?fhir_vs=refset/${refsetId}`; + + if (!seenRefsets.has(vsUrl)) { + await this.runSql( + `INSERT OR IGNORE INTO value_set (cs_id, url, version, name) + VALUES (?, ?, ?, ?)`, + [this.csId, vsUrl, this.config.version || null, `SNOMED Refset ${refsetId}`] + ); + const row = await this.get( + `SELECT vs_id + FROM value_set + WHERE cs_id = ? AND url = ? AND version = ?`, + [this.csId, vsUrl, this.config.version || null] + ); + if (!row) continue; + seenRefsets.set(vsUrl, row.vs_id); + } + + memberRows.push([ + seenRefsets.get(vsUrl), + conceptId, + 1 + ]); + memberCount += 1; + + if (memberRows.length >= FLUSH_ROW_TARGET) { + await this.bulkInsert( + `INSERT OR IGNORE INTO value_set_member (vs_id, concept_id, active)`, + 3, + memberRows + ); + memberRows.length = 0; + this.log(` refset members imported: ${memberCount.toLocaleString()}`); + } + } + } + + if (memberRows.length > 0) { + await this.bulkInsert( + `INSERT OR IGNORE INTO value_set_member (vs_id, concept_id, active)`, + 3, + memberRows + ); + } + + this.stats.refsets = seenRefsets.size; + this.stats.refsetMembers = memberCount; + this.log(`Refset import complete: ${seenRefsets.size.toLocaleString()} refsets, ${memberCount.toLocaleString()} members`); + } + + async buildClosure() { + this.log('Building transitive closure (is-a, inferred)...'); + if (!this.isAPropertyId) { + throw new Error('Cannot build closure: is-a property_id not resolved'); + } + + await this.exec('BEGIN TRANSACTION'); + try { + await this.exec('DELETE FROM closure'); + + // Temp frontier tables for iterative breadth expansion. + await this.exec(` + CREATE TEMP TABLE IF NOT EXISTS _closure_frontier ( + ancestor_id INTEGER NOT NULL, + descendant_id INTEGER NOT NULL, + depth INTEGER NOT NULL, + PRIMARY KEY (ancestor_id, descendant_id) + ) WITHOUT ROWID; + + CREATE TEMP TABLE IF NOT EXISTS _closure_next ( + ancestor_id INTEGER NOT NULL, + descendant_id INTEGER NOT NULL, + depth INTEGER NOT NULL, + PRIMARY KEY (ancestor_id, descendant_id) + ) WITHOUT ROWID; + + CREATE INDEX IF NOT EXISTS _idx_closure_frontier_desc + ON _closure_frontier(descendant_id, ancestor_id); + `); + + await this.exec('DELETE FROM _closure_frontier'); + await this.exec('DELETE FROM _closure_next'); + + // Self rows (depth 0) go directly into closure. + await this.runSql( + `INSERT OR IGNORE INTO closure (ancestor_id, descendant_id) + SELECT concept_id, concept_id + FROM concept + WHERE cs_id = ?`, + [this.csId] + ); + + // Direct is-a edges (depth 1) populate closure + initial frontier. + await this.runSql( + `INSERT OR IGNORE INTO closure (ancestor_id, descendant_id) + SELECT target_concept_id, source_concept_id + FROM concept_link + WHERE active = 1 + AND property_id = ? + AND edge_set_id = ?`, + [this.isAPropertyId, EDGE_SET_INFERRED] + ); + + await this.runSql( + `INSERT OR IGNORE INTO _closure_frontier (ancestor_id, descendant_id, depth) + SELECT target_concept_id, source_concept_id, 1 + FROM concept_link + WHERE active = 1 + AND property_id = ? + AND edge_set_id = ?`, + [this.isAPropertyId, EDGE_SET_INFERRED] + ); + + let iteration = 0; + let cumulativeNew = 0; + while (true) { + await this.exec('DELETE FROM _closure_next'); + + await this.runSql( + `INSERT OR IGNORE INTO _closure_next (ancestor_id, descendant_id, depth) + SELECT f.ancestor_id, l.source_concept_id, f.depth + 1 + FROM _closure_frontier f + JOIN concept_link l + ON l.property_id = ? + AND l.edge_set_id = ? + AND l.active = 1 + AND l.target_concept_id = f.descendant_id + WHERE NOT EXISTS ( + SELECT 1 + FROM closure c + WHERE c.ancestor_id = f.ancestor_id + AND c.descendant_id = l.source_concept_id + )`, + [this.isAPropertyId, EDGE_SET_INFERRED] + ); + + const nextCountRow = await this.get(`SELECT COUNT(*) AS n FROM _closure_next`); + const nextCount = nextCountRow ? nextCountRow.n : 0; + if (nextCount === 0) { + if (this.config.verbose) { + this.log(` closure iteration ${iteration + 1}: +0 rows`); + } + break; + } + + await this.runSql( + `INSERT OR IGNORE INTO closure (ancestor_id, descendant_id) + SELECT ancestor_id, descendant_id + FROM _closure_next`, + [] + ); + + await this.exec('DELETE FROM _closure_frontier'); + await this.exec( + `INSERT OR IGNORE INTO _closure_frontier (ancestor_id, descendant_id, depth) + SELECT ancestor_id, descendant_id, depth + FROM _closure_next` + ); + + iteration += 1; + cumulativeNew += nextCount; + if (this.config.verbose || iteration % 5 === 0) { + this.log(` closure iteration ${iteration}: +${nextCount.toLocaleString()} rows (cumulative ${cumulativeNew.toLocaleString()})`); + } + } + + await this.exec('DELETE FROM _closure_frontier'); + await this.exec('DELETE FROM _closure_next'); + + await this.exec('COMMIT'); + } catch (error) { + await this.exec('ROLLBACK'); + throw error; + } + + const row = await this.get( + `SELECT COUNT(*) AS n FROM closure`, + [] + ); + + this.stats.closureRows = row ? row.n : 0; + this.log(`Closure complete: ${this.stats.closureRows.toLocaleString()} rows`); + } + + async writeCsConfig() { + const runtimeFilters = { + concept: { operators: ['=', 'is-a', 'descendent-of', 'in'] }, + code: { operators: ['regex'] } + }; + + const runtimeSearch = { + mode: 'fts-broad', + activeOnly: true, + designationActiveOnly: true, + literalActiveOnly: true, + sources: ['display', 'designation', 'literal'], + ftsTables: { + display: 'search_fts_display', + designation: 'search_fts_designation', + literal: 'search_fts_literal' + }, + likeFallback: { enabled: true, caseInsensitive: true } + }; + + const configRows = [ + ['runtime.versioning', JSON.stringify({ algorithm: 'date', partialMatch: true })], + ['runtime.languages', JSON.stringify({ default: 'en' })], + ['runtime.designations', JSON.stringify({ + useMapping: { + fsn: { system: BASE_URI, code: FSN_TYPE_ID, display: 'Fully specified name' }, + synonym: { system: BASE_URI, code: SYNONYM_TYPE_ID, display: 'Synonym (core metadata concept)' } + }, + primaryDisplay: { + source: 'designation', + strategy: 'first-active', + activeOnly: true, + order: 'designation_id_asc' + } + })], + ['runtime.hierarchy', JSON.stringify({ + propertyCode: IS_A_TYPE_ID, + edgeSetId: EDGE_SET_INFERRED, + closure: { enabled: true, fallbackRecursive: false } + })], + ['runtime.filters', JSON.stringify(runtimeFilters)], + ['runtime.implicitValueSets', JSON.stringify({ + all: { queries: ['fhir_vs', 'fhir_vs=all'] }, + isa: { queryPrefix: 'fhir_vs=isa/', filter: { property: 'concept', op: 'is-a', valueFromSuffix: true } }, + refset: { queryPrefix: 'fhir_vs=refset/', filter: { property: 'concept', op: 'in', valueFromSuffix: true } } + })], + ['runtime.status', JSON.stringify({ + inactive: { source: 'concept.active', invert: true }, + deprecated: { source: 'constant', value: false }, + abstract: { source: 'constant', value: false } + })], + ['runtime.search', JSON.stringify(runtimeSearch)], + ['runtime.behaviorFlags', JSON.stringify({ + tags: ['snomed'] + })] + ]; + + for (const [key, value] of configRows) { + await this.runSql( + `INSERT OR REPLACE INTO cs_config (cs_id, key, value) VALUES (?, ?, ?)`, + [this.csId, key, typeof value === 'string' ? value : JSON.stringify(value)] + ); + } + } + + async finalizeDatabase() { + this.log('Finalizing SQLite database...'); + await this.exec('ANALYZE'); + await this.exec('PRAGMA journal_mode = DELETE'); + await this.exec('PRAGMA synchronous = NORMAL'); + await this.exec('VACUUM'); + } + + async buildSearchIndexes() { + this.log('Building broad text search indexes (display/designation/literal)...'); + + await this.exec('BEGIN TRANSACTION'); + try { + await this.exec('DELETE FROM search_fts_display'); + await this.exec('DELETE FROM search_fts_designation'); + await this.exec('DELETE FROM search_fts_literal'); + + const display = await this.runSql( + `INSERT INTO search_fts_display(rowid, term) + SELECT concept_id, trim(display) + FROM concept + WHERE cs_id = ? + AND display IS NOT NULL + AND trim(display) <> ''`, + [this.csId] + ); + + const designation = await this.runSql( + `INSERT INTO search_fts_designation(rowid, term) + SELECT d.designation_id, trim(d.term) + FROM designation d + JOIN concept c ON c.concept_id = d.concept_id + WHERE c.cs_id = ? + AND d.term IS NOT NULL + AND trim(d.term) <> ''`, + [this.csId] + ); + + const literal = await this.runSql( + `INSERT INTO search_fts_literal(rowid, term) + SELECT literal_id, txt + FROM ( + SELECT cl.literal_id AS literal_id, + trim(COALESCE(NULLIF(cl.value_text, ''), NULLIF(cl.value_raw, ''))) AS txt + FROM concept_literal cl + JOIN concept c ON c.concept_id = cl.source_concept_id + WHERE c.cs_id = ? + ) x + WHERE txt IS NOT NULL + AND txt <> ''`, + [this.csId] + ); + + await this.exec(`INSERT INTO search_fts_display(search_fts_display) VALUES ('optimize')`); + await this.exec(`INSERT INTO search_fts_designation(search_fts_designation) VALUES ('optimize')`); + await this.exec(`INSERT INTO search_fts_literal(search_fts_literal) VALUES ('optimize')`); + + await this.exec('COMMIT'); + + this.stats.ftsDisplayRows = display.changes || 0; + this.stats.ftsDesignationRows = designation.changes || 0; + this.stats.ftsLiteralRows = literal.changes || 0; + + this.log( + `Search index complete: display=${this.stats.ftsDisplayRows.toLocaleString()}, ` + + `designation=${this.stats.ftsDesignationRows.toLocaleString()}, ` + + `literal=${this.stats.ftsLiteralRows.toLocaleString()}` + ); + } catch (error) { + await this.exec('ROLLBACK'); + throw error; + } + } + + async ensureProperty(propertyCode, valueKind, isHierarchy) { + if (!propertyCode) return null; + if (this.propertyIdByCode.has(propertyCode)) { + return this.propertyIdByCode.get(propertyCode); + } + + await this.runSql( + `INSERT OR IGNORE INTO property_def (cs_id, property_code, value_kind, is_hierarchy) + VALUES (?, ?, ?, ?)`, + [this.csId, propertyCode, valueKind, isHierarchy] + ); + + const row = await this.get( + `SELECT property_id + FROM property_def + WHERE cs_id = ? AND property_code = ?`, + [this.csId, propertyCode] + ); + if (!row) return null; + + this.propertyIdByCode.set(propertyCode, row.property_id); + this.seenPropertyCodes.add(propertyCode); + return row.property_id; + } + + async bulkInsert(sqlPrefix, columnCount, rows) { + if (!rows.length) return; + + const chunkSize = Math.max(1, Math.floor(MAX_SQL_PARAMS / columnCount)); + await this.exec('BEGIN TRANSACTION'); + + try { + for (let i = 0; i < rows.length; i += chunkSize) { + const chunk = rows.slice(i, i + chunkSize); + const placeholders = chunk.map(() => `(${new Array(columnCount).fill('?').join(',')})`).join(','); + const flat = []; + for (const row of chunk) { + for (const value of row) flat.push(value); + } + + await this.runSql(`${sqlPrefix} VALUES ${placeholders}`, flat); + } + + await this.exec('COMMIT'); + } catch (error) { + await this.exec('ROLLBACK'); + throw error; + } + } + + async runSql(sql, params = []) { + return new Promise((resolve, reject) => { + this.db.run(sql, params, function onRun(err) { + if (err) { + reject(err); + } else { + resolve({ changes: this.changes || 0, lastID: this.lastID }); + } + }); + }); + } + + async get(sql, params = []) { + return new Promise((resolve, reject) => { + this.db.get(sql, params, (err, row) => { + if (err) reject(err); + else resolve(row); + }); + }); + } + + async exec(sql) { + return new Promise((resolve, reject) => { + this.db.exec(sql, (err) => { + if (err) reject(err); + else resolve(); + }); + }); + } + + log(message) { + if (!this.config.verbose) return; + console.log(message); + } +} + +function parseEditionAndVersion(uri) { + if (!uri) return { edition: null, version: null }; + + const match = uri.match(/\/sct\/(\d+)\/version\/(\d{8})/); + if (!match) return { edition: null, version: null }; + + return { + edition: match[1], + version: match[2] + }; +} + +function snomedName(editionCode) { + if (!editionCode) return 'SNOMED CT'; + if (editionCode === '900000000000207008') return 'SNOMED CT International'; + if (editionCode === '731000124108') return 'SNOMED CT US Edition'; + return `SNOMED CT ${editionCode}`; +} + +function edgeSetIdFromCharacteristic(characteristicTypeId) { + if (characteristicTypeId === CHAR_STATED) return EDGE_SET_STATED; + if (characteristicTypeId === CHAR_ADDITIONAL) return EDGE_SET_ADDITIONAL; + if (characteristicTypeId === CHAR_INFERRED) return EDGE_SET_INFERRED; + return EDGE_SET_INFERRED; +} + +function mapUseCode(typeId) { + if (typeId === FSN_TYPE_ID) return 'fsn'; + if (typeId === SYNONYM_TYPE_ID) return 'synonym'; + return typeId || null; +} + +function parseConcreteValue(rawValue) { + if (rawValue === null || rawValue === undefined) { + return { valueText: null, valueNum: null, valueBool: null }; + } + + if (rawValue.startsWith('#')) { + const n = Number(rawValue.slice(1)); + return { + valueText: null, + valueNum: Number.isFinite(n) ? n : null, + valueBool: null + }; + } + + if (rawValue === 'true' || rawValue === 'false') { + return { + valueText: null, + valueNum: null, + valueBool: rawValue === 'true' ? 1 : 0 + }; + } + + if (rawValue.startsWith('"') && rawValue.endsWith('"') && rawValue.length >= 2) { + return { + valueText: rawValue.slice(1, -1), + valueNum: null, + valueBool: null + }; + } + + return { + valueText: rawValue, + valueNum: null, + valueBool: null + }; +} + +function classifyRf2File(filePath, firstLine, files) { + if (!firstLine) return; + + if (firstLine.startsWith('id\teffectiveTime\tactive\tmoduleId\tdefinitionStatusId')) { + files.concepts.push(filePath); + return; + } + + if (firstLine.startsWith('id\teffectiveTime\tactive\tmoduleId\tconceptId\tlanguageCode\ttypeId\tterm\tcaseSignificanceId')) { + files.descriptions.push(filePath); + return; + } + + if (firstLine.startsWith('id\teffectiveTime\tactive\tmoduleId\tsourceId\tdestinationId\trelationshipGroup\ttypeId\tcharacteristicTypeId\tmodifierId')) { + if (filePath.toLowerCase().includes('statedrelationship')) { + return; + } + files.relationships.push(filePath); + return; + } + + if (firstLine.startsWith('id\teffectiveTime\tactive\tmoduleId\tsourceId\tvalue\trelationshipGroup\ttypeId\tcharacteristicTypeId\tmodifierId')) { + files.concreteValues.push(filePath); + return; + } + + if (firstLine.startsWith('id\teffectiveTime\tactive\tmoduleId\trefsetId\treferencedComponentId\tacceptabilityId')) { + files.languageRefsets.push(filePath); + files.refsets.push(filePath); + return; + } + + if (firstLine.startsWith('id\teffectiveTime\tactive\tmoduleId\trefsetId\treferencedComponentId')) { + files.refsets.push(filePath); + } +} + +function scanDirectory(dir, files, snapshotOnly) { + if (!fs.existsSync(dir)) return; + + const entries = fs.readdirSync(dir, { withFileTypes: true }); + for (const entry of entries) { + const fullPath = path.join(dir, entry.name); + + if (entry.isDirectory()) { + if (!entry.name.startsWith('.')) { + scanDirectory(fullPath, files, snapshotOnly); + } + continue; + } + + if (!entry.isFile() || !entry.name.endsWith('.txt')) continue; + + if (snapshotOnly && !fullPath.toLowerCase().includes('snapshot')) { + continue; + } + + const firstLine = readFirstLine(fullPath); + classifyRf2File(fullPath, firstLine, files); + } +} + +function readFirstLine(filePath) { + const fd = fs.openSync(filePath, 'r'); + try { + const buf = Buffer.alloc(1024); + const count = fs.readSync(fd, buf, 0, buf.length, 0); + if (count <= 0) return ''; + + const text = buf.toString('utf8', 0, count); + const index = text.indexOf('\n'); + if (index < 0) return text.trim(); + return text.slice(0, index).replace(/\r$/, ''); + } finally { + fs.closeSync(fd); + } +} + +async function* readTsv(filePath) { + const stream = fs.createReadStream(filePath); + const rl = readline.createInterface({ input: stream, crlfDelay: Infinity }); + + let lineNumber = 0; + for await (const line of rl) { + lineNumber += 1; + if (lineNumber === 1) continue; + if (!line) continue; + yield line.split('\t'); + } +} + +function openSqlite(filePath) { + return new Promise((resolve, reject) => { + const db = new sqlite3.Database(filePath, (err) => { + if (err) reject(err); + else resolve(db); + }); + }); +} + +function closeSqlite(db) { + return new Promise((resolve, reject) => { + db.close((err) => { + if (err) reject(err); + else resolve(); + }); + }); +} + +module.exports = { + SnomedSqliteV0Importer, + constants: { + BASE_URI, + IS_A_TYPE_ID, + FSN_TYPE_ID, + SYNONYM_TYPE_ID, + CHAR_INFERRED, + CHAR_STATED, + CHAR_ADDITIONAL, + ACCEPTABILITY_PREFERRED, + EDGE_SET_INFERRED, + EDGE_SET_STATED, + EDGE_SET_ADDITIONAL + } +}; diff --git a/tx/importers/sqlite-v2/schema-v0.sql b/tx/importers/sqlite-v2/schema-v0.sql new file mode 100644 index 0000000..c50bed6 --- /dev/null +++ b/tx/importers/sqlite-v2/schema-v0.sql @@ -0,0 +1,183 @@ +PRAGMA foreign_keys = OFF; + +CREATE TABLE IF NOT EXISTS code_system ( + cs_id INTEGER PRIMARY KEY AUTOINCREMENT, + base_uri TEXT NOT NULL, + edition_code TEXT, + version TEXT, + canonical_uri TEXT NOT NULL, + name TEXT, + source_kind TEXT, + loaded_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP +); + +CREATE UNIQUE INDEX IF NOT EXISTS idx_code_system_base_version + ON code_system(base_uri, version); + +CREATE TABLE IF NOT EXISTS cs_config ( + cs_id INTEGER NOT NULL, + key TEXT NOT NULL, + value TEXT NOT NULL, + PRIMARY KEY (cs_id, key), + FOREIGN KEY (cs_id) REFERENCES code_system(cs_id) +); + +CREATE TABLE IF NOT EXISTS concept ( + concept_id INTEGER PRIMARY KEY, + cs_id INTEGER NOT NULL, + code TEXT NOT NULL, + active INTEGER NOT NULL DEFAULT 1, + display TEXT, + definition TEXT, + FOREIGN KEY (cs_id) REFERENCES code_system(cs_id) +); + +CREATE UNIQUE INDEX IF NOT EXISTS idx_concept_cs_code + ON concept(cs_id, code); + +CREATE INDEX IF NOT EXISTS idx_concept_active + ON concept(cs_id, active); + +-- Case-insensitive lookups used by generic property/link filters. +CREATE INDEX IF NOT EXISTS idx_concept_cs_code_nocase + ON concept(cs_id, code COLLATE NOCASE, concept_id); + +CREATE INDEX IF NOT EXISTS idx_concept_cs_display_nocase + ON concept(cs_id, display COLLATE NOCASE, concept_id); + +CREATE TABLE IF NOT EXISTS designation ( + designation_id INTEGER PRIMARY KEY AUTOINCREMENT, + concept_id INTEGER NOT NULL, + active INTEGER NOT NULL DEFAULT 1, + language_code TEXT, + use_code TEXT, + term TEXT NOT NULL, + preferred INTEGER NOT NULL DEFAULT 0, + FOREIGN KEY (concept_id) REFERENCES concept(concept_id) +); + +CREATE INDEX IF NOT EXISTS idx_designation_concept + ON designation(concept_id, active); +CREATE INDEX IF NOT EXISTS idx_designation_concept_pref_term + ON designation(concept_id, preferred DESC, term); + +CREATE TABLE IF NOT EXISTS property_def ( + property_id INTEGER PRIMARY KEY AUTOINCREMENT, + cs_id INTEGER NOT NULL, + property_code TEXT NOT NULL, + value_kind TEXT NOT NULL DEFAULT 'concept', + is_hierarchy INTEGER NOT NULL DEFAULT 0, + display TEXT, + FOREIGN KEY (cs_id) REFERENCES code_system(cs_id) +); + +CREATE UNIQUE INDEX IF NOT EXISTS idx_propdef_cs_code + ON property_def(cs_id, property_code); + +CREATE TABLE IF NOT EXISTS concept_link ( + edge_id INTEGER PRIMARY KEY AUTOINCREMENT, + edge_set_id INTEGER NOT NULL DEFAULT 1, + source_concept_id INTEGER NOT NULL, + property_id INTEGER NOT NULL, + target_concept_id INTEGER NOT NULL, + group_id INTEGER NOT NULL DEFAULT 0, + active INTEGER NOT NULL DEFAULT 1, + FOREIGN KEY (source_concept_id) REFERENCES concept(concept_id), + FOREIGN KEY (target_concept_id) REFERENCES concept(concept_id), + FOREIGN KEY (property_id) REFERENCES property_def(property_id) +); + +CREATE INDEX IF NOT EXISTS idx_concept_link_source + ON concept_link(source_concept_id, property_id, edge_set_id, active); + +CREATE INDEX IF NOT EXISTS idx_concept_link_target + ON concept_link(target_concept_id, property_id, edge_set_id, active); + +-- Property-driven link filters perform better with property-first access. +CREATE INDEX IF NOT EXISTS idx_concept_link_prop_active_source + ON concept_link(property_id, edge_set_id, active, source_concept_id, target_concept_id); + +CREATE INDEX IF NOT EXISTS idx_concept_link_prop_active_target + ON concept_link(property_id, edge_set_id, active, target_concept_id, source_concept_id); + +CREATE TABLE IF NOT EXISTS concept_literal ( + literal_id INTEGER PRIMARY KEY AUTOINCREMENT, + edge_set_id INTEGER NOT NULL DEFAULT 1, + source_concept_id INTEGER NOT NULL, + property_id INTEGER NOT NULL, + value_raw TEXT, + value_text TEXT, + value_num REAL, + value_bool INTEGER, + group_id INTEGER NOT NULL DEFAULT 0, + active INTEGER NOT NULL DEFAULT 1, + FOREIGN KEY (source_concept_id) REFERENCES concept(concept_id), + FOREIGN KEY (property_id) REFERENCES property_def(property_id) +); + +CREATE INDEX IF NOT EXISTS idx_concept_literal_source + ON concept_literal(source_concept_id, property_id, edge_set_id, active); + +-- Property/text predicates need value-oriented access paths. +CREATE INDEX IF NOT EXISTS idx_concept_literal_prop_active_text_nocase + ON concept_literal(property_id, active, value_text COLLATE NOCASE, source_concept_id); + +CREATE INDEX IF NOT EXISTS idx_concept_literal_prop_active_raw_nocase + ON concept_literal(property_id, active, value_raw COLLATE NOCASE, source_concept_id); + +-- Broad text search surfaces (rowid-linked, contentless FTS5). +-- These power fast filter text matching across display/designation/literal. +CREATE VIRTUAL TABLE IF NOT EXISTS search_fts_display + USING fts5(term, tokenize='trigram', content=''); + +CREATE VIRTUAL TABLE IF NOT EXISTS search_fts_designation + USING fts5(term, tokenize='trigram', content=''); + +CREATE VIRTUAL TABLE IF NOT EXISTS search_fts_literal + USING fts5(term, tokenize='trigram', content=''); + +CREATE TABLE IF NOT EXISTS closure ( + ancestor_id INTEGER NOT NULL, + descendant_id INTEGER NOT NULL, + PRIMARY KEY (ancestor_id, descendant_id) +) WITHOUT ROWID; + +CREATE TABLE IF NOT EXISTS value_set ( + vs_id INTEGER PRIMARY KEY AUTOINCREMENT, + cs_id INTEGER NOT NULL, + url TEXT NOT NULL, + version TEXT, + name TEXT, + FOREIGN KEY (cs_id) REFERENCES code_system(cs_id) +); + +CREATE UNIQUE INDEX IF NOT EXISTS idx_value_set_cs_url_version + ON value_set(cs_id, url, version); + +CREATE TABLE IF NOT EXISTS value_set_member ( + member_id INTEGER PRIMARY KEY AUTOINCREMENT, + vs_id INTEGER NOT NULL, + concept_id INTEGER NOT NULL, + active INTEGER NOT NULL DEFAULT 1, + FOREIGN KEY (vs_id) REFERENCES value_set(vs_id), + FOREIGN KEY (concept_id) REFERENCES concept(concept_id) +); + +CREATE UNIQUE INDEX IF NOT EXISTS idx_vsm_unique + ON value_set_member(vs_id, concept_id); + +CREATE INDEX IF NOT EXISTS idx_vsm_vs + ON value_set_member(vs_id); + +CREATE TABLE IF NOT EXISTS load_audit ( + run_id INTEGER PRIMARY KEY AUTOINCREMENT, + started_at TEXT NOT NULL, + completed_at TEXT, + source_path TEXT, + target_db TEXT, + terminology TEXT, + edition_code TEXT, + version TEXT, + status TEXT NOT NULL, + stats_json TEXT +); diff --git a/tx/library.js b/tx/library.js index c1cada9..66055f1 100644 --- a/tx/library.js +++ b/tx/library.js @@ -19,6 +19,7 @@ const {RxNormServicesFactory} = require("./cs/cs-rxnorm"); const {NdcServicesFactory} = require("./cs/cs-ndc"); const {UniiServicesFactory} = require("./cs/cs-unii"); const {SnomedServicesFactory} = require("./cs/cs-snomed"); +const {SqliteRuntimeV0FactoryProvider} = require("./cs/cs-sqlite-runtime-v0"); const {CPTServicesFactory} = require("./cs/cs-cpt"); const {OMOPServicesFactory} = require("./cs/cs-omop"); const {PackageValueSetProvider} = require("./vs/vs-package"); @@ -239,6 +240,10 @@ class Library { await this.loadSnomed(details, isDefault, mode); break; + case 'sqlite-v0': + await this.loadSqliteV0(details, isDefault, mode); + break; + case 'cpt': await this.loadCpt(details, isDefault, mode); break; @@ -404,6 +409,26 @@ class Library { this.registerProvider(sctFN, sct, isDefault); } + async loadSqliteV0(details, isDefault, mode) { + // Parse optional ?specialization=id or ?specialization=none + let filePart = details; + let specialization; + const qIdx = details.indexOf('?'); + if (qIdx !== -1) { + filePart = details.substring(0, qIdx); + const params = new URLSearchParams(details.substring(qIdx + 1)); + specialization = params.get('specialization') || undefined; + } + const sqliteFN = await this.getOrDownloadFile(filePart); + if (mode === "fetch" || mode === "npm") { + return; + } + const factory = await SqliteRuntimeV0FactoryProvider.createFromMetadata( + this.i18n, sqliteFN, { idPrefix: 'sqlite-v0', specialization } + ); + this.registerProvider(sqliteFN, factory, isDefault); + } + async loadCpt(details, isDefault, mode) { const cptFN = await this.getOrDownloadFile(details); if (mode === "fetch" || mode === "npm") { diff --git a/tx/params.js b/tx/params.js index 2c0528e..62f9837 100644 --- a/tx/params.js +++ b/tx/params.js @@ -557,6 +557,17 @@ class TxParameters { } assign(other) { + this.count = other.count; + this.offset = other.offset; + this.limit = other.limit; + this.filter = other.filter; + this.limitedExpansion = other.limitedExpansion; + this.incompleteOK = other.incompleteOK; + this.abstractOk = other.abstractOk; + this.inferSystem = other.inferSystem; + if (other.supplements) { + this.supplements = new Set(other.supplements); + } if (other.FVersionRules) { this.FVersionRules = [...other.FVersionRules]; } diff --git a/tx/perf-counters.js b/tx/perf-counters.js new file mode 100644 index 0000000..533796f --- /dev/null +++ b/tx/perf-counters.js @@ -0,0 +1,49 @@ +/** + * Lightweight opt-in counters and timers for new code paths. + * Disabled by default; call enable() from test harnesses. + * + * bump(name) — record that a branch was taken + * begin(name) — start a timer, returns a token + * end(token) — stop the timer, accumulate elapsed ms + * snapshot() — { counts: {name: N}, timings: {name: {calls, totalMs}} } + */ + +let enabled = false; +const counts = {}; +const timings = {}; + +function bump(name) { + if (!enabled) return; + counts[name] = (counts[name] || 0) + 1; +} + +function begin(name) { + if (!enabled) return null; + return { name, t0: performance.now() }; +} + +function end(token) { + if (!token) return; + const ms = performance.now() - token.t0; + const entry = timings[token.name] || (timings[token.name] = { calls: 0, totalMs: 0 }); + entry.calls++; + entry.totalMs += ms; +} + +function reset() { + for (const k of Object.keys(counts)) delete counts[k]; + for (const k of Object.keys(timings)) delete timings[k]; +} + +function snapshot() { + const t = {}; + for (const [k, v] of Object.entries(timings)) { + t[k] = { calls: v.calls, totalMs: +v.totalMs.toFixed(2) }; + } + return { counts: { ...counts }, timings: t }; +} + +function enable() { enabled = true; } +function disable() { enabled = false; } + +module.exports = { bump, begin, end, reset, snapshot, enable, disable }; diff --git a/tx/tx.all-v0.yml b/tx/tx.all-v0.yml new file mode 100644 index 0000000..391b003 --- /dev/null +++ b/tx/tx.all-v0.yml @@ -0,0 +1,7 @@ +base: + url: https://storage.googleapis.com/tx-fhir-org + +sources: + - sqlite-v0:rxnorm_02022026.v0.db + - sqlite-v0:loinc_281_full.v0.db + - sqlite-v0:sct_intl_20250201.v0.db diff --git a/tx/tx.js b/tx/tx.js index 95a697e..085a3bd 100644 --- a/tx/tx.js +++ b/tx/tx.js @@ -144,7 +144,10 @@ class TXModule { // Load HTML template txHtml.loadTemplate(); - // Validate config + // Validate config — allow env var override for library source + if (process.env.TX_LIBRARY_SOURCE) { + config.librarySource = process.env.TX_LIBRARY_SOURCE; + } if (!config.librarySource) { throw new Error('TX module requires librarySource configuration'); } diff --git a/tx/tx.loinc-only.yml b/tx/tx.loinc-only.yml new file mode 100644 index 0000000..be65aa0 --- /dev/null +++ b/tx/tx.loinc-only.yml @@ -0,0 +1,5 @@ +base: + url: https://storage.googleapis.com/tx-fhir-org + +sources: + - loinc:loinc-2.77-a.db diff --git a/tx/tx.rxnorm-loinc-matched.yml b/tx/tx.rxnorm-loinc-matched.yml new file mode 100644 index 0000000..e8caedb --- /dev/null +++ b/tx/tx.rxnorm-loinc-matched.yml @@ -0,0 +1,6 @@ +base: + url: https://storage.googleapis.com/tx-fhir-org + +sources: + - rxnorm:rxnorm_02022026-a.db + - loinc:loinc-2.81-a.db diff --git a/tx/tx.rxnorm-loinc-v0.yml b/tx/tx.rxnorm-loinc-v0.yml new file mode 100644 index 0000000..618d256 --- /dev/null +++ b/tx/tx.rxnorm-loinc-v0.yml @@ -0,0 +1,6 @@ +base: + url: https://storage.googleapis.com/tx-fhir-org + +sources: + - sqlite-v0:rxnorm_02022026.v0.db + - sqlite-v0:loinc_281_full.v0.db diff --git a/tx/tx.rxnorm-loinc.yml b/tx/tx.rxnorm-loinc.yml new file mode 100644 index 0000000..c487991 --- /dev/null +++ b/tx/tx.rxnorm-loinc.yml @@ -0,0 +1,6 @@ +base: + url: https://storage.googleapis.com/tx-fhir-org + +sources: + - rxnorm:rxnorm_02032025-a.db + - loinc:loinc-2.77-a.db diff --git a/tx/tx.rxnorm-only.yml b/tx/tx.rxnorm-only.yml new file mode 100644 index 0000000..df11245 --- /dev/null +++ b/tx/tx.rxnorm-only.yml @@ -0,0 +1,5 @@ +base: + url: https://storage.googleapis.com/tx-fhir-org + +sources: + - rxnorm:rxnorm_02032025-a.db diff --git a/tx/tx.rxnorm-v0-only.yml b/tx/tx.rxnorm-v0-only.yml new file mode 100644 index 0000000..460cea7 --- /dev/null +++ b/tx/tx.rxnorm-v0-only.yml @@ -0,0 +1,5 @@ +base: + url: https://storage.googleapis.com/tx-fhir-org + +sources: + - sqlite-v0:rxnorm_02022026.v0.db diff --git a/tx/tx.snomed-legacy-only.yml b/tx/tx.snomed-legacy-only.yml new file mode 100644 index 0000000..db09ee1 --- /dev/null +++ b/tx/tx.snomed-legacy-only.yml @@ -0,0 +1,5 @@ +base: + url: https://storage.googleapis.com/tx-fhir-org + +sources: + - snomed:sct_intl_20250201.cache diff --git a/tx/tx.snomed-v0-only.yml b/tx/tx.snomed-v0-only.yml new file mode 100644 index 0000000..b427c0b --- /dev/null +++ b/tx/tx.snomed-v0-only.yml @@ -0,0 +1,5 @@ +base: + url: https://storage.googleapis.com/tx-fhir-org + +sources: + - sqlite-v0:sct_intl_20250201.v0.db diff --git a/tx/tx.test-lite.yml b/tx/tx.test-lite.yml new file mode 100644 index 0000000..a2225c1 --- /dev/null +++ b/tx/tx.test-lite.yml @@ -0,0 +1,28 @@ +base: + url: https://storage.googleapis.com/tx-fhir-org + +sources: + - internal:lang + - internal:country + - internal:currency + - internal:areacode + - internal:mimetypes + - internal:usstates + - internal:hgvs + - ucum:tx/data/ucum-essence.xml + - loinc:loinc-2.77-a.db + - loinc!:loinc-2.81-b.db + - rxnorm:rxnorm_02032025-a.db + - ndc:ndc-20211101.db + - unii:unii_20240622.db + - snomed:sct_intl_20240201.cache + - snomed!:sct_intl_20250201.cache + - cpt:CodeSystem-cpt.db|cpt-2023-fragment-0.1.db + - omop:omop_v20250227.db + - npm:hl7.terminology + - npm:fhir.tx.support.r4 + - npm:ihe.formatcode.fhir + - npm:fhir.dicom + - npm:hl7.fhir.us.core + - npm:us.cdc.phinvads + - npm:hl7.fhir.uv.sdc diff --git a/tx/tx.upstream-baseline.yml b/tx/tx.upstream-baseline.yml new file mode 100644 index 0000000..b8ec9bb --- /dev/null +++ b/tx/tx.upstream-baseline.yml @@ -0,0 +1,7 @@ +base: + url: https://storage.googleapis.com/tx-fhir-org + +sources: + - rxnorm:rxnorm_02022026-a.db + - loinc:loinc-2.81-a.db + - snomed:sct_intl_20250201.cache diff --git a/tx/workers/expand.js b/tx/workers/expand.js index 8ca3f14..05aa2ee 100644 --- a/tx/workers/expand.js +++ b/tx/workers/expand.js @@ -13,16 +13,19 @@ const {TxParameters} = require("../params"); const {Designations, SearchFilterText} = require("../library/designations"); const {Extensions} = require("../library/extensions"); const {getValuePrimitive, getValueName} = require("../../library/utilities"); +const perfCounters = require('../perf-counters'); const {div} = require("../../library/html"); const {Issue, OperationOutcome} = require("../library/operation-outcome"); const crypto = require('crypto'); const ValueSet = require("../library/valueset"); const {VersionUtilities} = require("../../library/version-utilities"); -// Expansion limits (from Pascal constants) -const UPPER_LIMIT_NO_TEXT = 1000; -const UPPER_LIMIT_TEXT = 1000; -const INTERNAL_LIMIT = 10000; +// Expansion limits — raised to 1M to allow full code-system expansions +// (e.g., SNOMED Clinical Findings ~124K codes). The v0 SQLite provider +// handles large expansions efficiently via streaming pagination. +const UPPER_LIMIT_NO_TEXT = 1000000; +const UPPER_LIMIT_TEXT = 1000000; +const INTERNAL_LIMIT = 1000000; const EXPANSION_DEAD_TIME_SECS = 30; const CACHE_WHEN_DEBUGGING = false; @@ -226,7 +229,17 @@ class ValueSetExpander { } async listDisplaysFromProvider(displays, cs, context) { - await cs.designations(context, displays); + const langs = this.params.workingLanguages?.(); + if (!this.params.includeDesignations && langs && cs.hasAnyDisplays(langs)) { + perfCounters.bump('display.fastPath'); + const d = await cs.display(context); + if (d) { + displays.addDesignation(true, 'active', null, null, d); + } + } else { + perfCounters.bump('display.fullPath'); + await cs.designations(context, displays); + } displays.source = cs; } @@ -664,7 +677,7 @@ class ValueSetExpander { } } - async includeCodes(cset, path, vsSrc, compose, filter, expansion, excludeInactive, notClosed, vsInfo) { + async includeCodes(cset, path, vsSrc, filter, expansion, excludeInactive, notClosed, compose, vsInfo) { this.worker.deadCheck('processCodes#1'); const valueSets = []; @@ -756,69 +769,37 @@ class ValueSetExpander { } const prep = await cs.getPrepContext(true); const ctxt = await cs.searchFilter(prep, filter, false); - await cs.filterExclude(prep, ) let set = await cs.executeFilters(prep); this.worker.opContext.log('iterate filters'); while (await cs.filterMore(ctxt, set)) { - this.worker.deadCheck('processCodes#4'); const c = await cs.filterConcept(ctxt, set); + this.worker.deadCheck('processCodes#4'); if (await this.passesFilters(cs, c, prep, filters, 0)) { const cds = new Designations(this.worker.i18n.languageDefinitions); await this.listDisplaysFromProvider(cds, cs, c); - await this.includeCode(cs, null, await cs.system(), await cs.version(), await cs.code(c), await cs.isAbstract(c), await cs.isInactive(c), await cs.deprecated(c), await cs.getCodeStatus(c), - cds, await cs.definition(c), await cs.itemWeight(c), expansion, valueSets, await cs.getExtensions(c), null, await cs.getProperties(c), null, excludeInactive, vsSrc.url); + await this.includeCode(cs, null, await cs.system(), await cs.version(), await cs.code(c), await cs.isAbstract(c), await cs.isInactive(c), await cs.deprecated(c), await cs.getStatus(c), + cds, await cs.definition(c), await cs.itemWeight(c), expansion, valueSets, await cs.extensions(c), null, await this._propsIfRequested(cs, c), null, excludeInactive, vsSrc.url); } } this.worker.opContext.log('iterate filters done'); } } - if (cset.concept) { - this.worker.opContext.log('iterate concepts'); - const cds = new Designations(this.worker.i18n.languageDefinitions); + // --- Unified include: register all concepts + filters as intent, execute once --- + const hasConcepts = cset.concept && cset.concept.length > 0; + const hasFilters = cset.filter && cset.filter.length > 0; + const canUnify = hasConcepts && typeof cs.includeConcepts === 'function'; - for (const cc of cset.concept) { - this.worker.deadCheck('processCodes#3'); - cds.clear(); - Extensions.checkNoModifiers(cc, 'ValueSetExpander.processCodes', 'set concept reference'); - const cctxt = await cs.locate(cc.code, this.allAltCodes); - if (cctxt && cctxt.context && (!this.params.activeOnly || !await cs.isInactive(cctxt.context)) && await this.passesFilters(cs, cctxt.context, prep, filters, 0)) { - await this.listDisplaysFromProvider(cds, cs, cctxt.context); - this.listDisplaysFromIncludeConcept(cds, cc, vsSrc); - if (filter.passesDesignations(cds) || filter.passes(cc.code)) { - let ov = Extensions.readString(cc, 'http://hl7.org/fhir/StructureDefinition/itemWeight'); - if (!ov) { - ov = await cs.itemWeight(cctxt.context); - } - let added = await this.includeCode(cs, null, cs.system(), cs.version(), cc.code, await cs.isAbstract(cctxt.context), await cs.isInactive(cctxt.context), await cs.isDeprecated(cctxt.context), await cs.getStatus(cctxt.context), cds, - await cs.definition(cctxt.context), ov, expansion, valueSets, await cs.extensions(cctxt.context), cc.extension, await cs.properties(cctxt.context), null, excludeInactive, vsSrc.url); - if (added) { - this.addToTotal(); - } - } - } - } - this.worker.opContext.log('iterate concepts done'); - } - - if (cset.filter) { + if (canUnify || hasFilters) { this.worker.opContext.log('prepare filters'); - const fcl = cset.filter; - const prep = await cs.getPrepContext(true, - this.params, excludeInactive, vsInfo.csDoOffset ? this.offset : -1, cs.handlesOffset() && vsInfo.csDoExcludes ? this.count : -1); - + // Per-CS LIMIT is safe: excludes are system-scoped, so excludes from + // other systems can't drain this CS's results. The worker's overall + // count management handles cross-system totals. + const prep = cs.handlesOffset() + ? await cs.getPrepContext(true, this.params, excludeInactive, this.offset, this.count) + : await cs.getPrepContext(true, this.params, excludeInactive); if (!filter.isNull) { - await cs.searchFilter(filter, prep, true); - } - if (vsInfo.csDoExcludes) { - for (let exc of compose.exclude || []) { - if (exc.filter) { - await cs.filterExcludeFilters(prep, this.excludeFilterList(exc)); - } - if (exc.concept) { - await cs.filterExcludeConcepts(prep, exc.concept.map(c => c.code)); - } - } + await cs.searchFilter(prep, filter, true); } if (cs.specialEnumeration()) { @@ -826,49 +807,139 @@ class ValueSetExpander { notClosed.value = true; } - for (let i = 0; i < fcl.length; i++) { - this.worker.deadCheck('processCodes#4a'); - const fc = fcl[i]; - if (!fc.value) { - throw new Issue('error', 'invalid', path+".filter["+i+"]", 'UNABLE_TO_HANDLE_SYSTEM_FILTER_WITH_NO_VALUE', this.worker.i18n.translate('UNABLE_TO_HANDLE_SYSTEM_FILTER_WITH_NO_VALUE', this.params.httpLanguages, [cs.system(), fc.property, fc.op]), 'vs-invalid', 400); + // Register include concepts + if (canUnify) { + await cs.includeConcepts(prep, cset.concept.map(c => c.code)); + } + + // Register include filters + if (hasFilters) { + const fcl = cset.filter; + for (let i = 0; i < fcl.length; i++) { + this.worker.deadCheck('processCodes#4a'); + const fc = fcl[i]; + if (!fc.value) { + throw new Issue('error', 'invalid', path+".filter["+i+"]", 'UNABLE_TO_HANDLE_SYSTEM_FILTER_WITH_NO_VALUE', this.worker.i18n.translate('UNABLE_TO_HANDLE_SYSTEM_FILTER_WITH_NO_VALUE', this.params.httpLanguages, [cs.system(), fc.property, fc.op]), 'vs-invalid', 400); + } + Extensions.checkNoModifiers(fc, 'ValueSetExpander.processCodes', 'filter'); + await cs.filter(prep, fc.property, fc.op, fc.value); } - Extensions.checkNoModifiers(fc, 'ValueSetExpander.processCodes', 'filter'); - await cs.filter(prep, fc.property, fc.op, fc.value); } + // Register all excludes (concepts + filters) for this system + if (vsInfo && vsInfo.csDoExcludes && compose) { + for (const exc of compose.exclude || []) { + if (exc.system === cset.system) { + if (exc.concept && exc.concept.length > 0) { + await cs.filterExcludeConcepts(prep, exc.concept.map(c => c.code)); + } + const filterList = (exc.filter || []).map(f => ({ prop: f.property, op: f.op, value: f.value })); + if (filterList.length > 0) { + await cs.filterExcludeFilters(prep, filterList); + } + } + } + } + + // Execute — provider sees full picture: all concepts, filters, excludes const fset = await cs.executeFilters(prep); if (await cs.filtersNotClosed(prep)) { notClosed.value = true; - } else if (fset.length === 1 && !excludeInactive && !this.params.activeOnly) { - // this.addToTotal(await cs.filterSize(prep, fset[0])); } - this.worker.opContext.log('iterate filters'); - while (await cs.filterMore(prep, fset[0])) { - this.worker.deadCheck('processCodes#5'); - const c = await cs.filterConcept(prep, fset[0]); - const ok = (!this.params.activeOnly || !await cs.isInactive(c)) && (await this.passesFilters(cs, c, prep, fset, 1)); - if (ok) { - // count++; - const cds = new Designations(this.worker.i18n.languageDefinitions); - if (this.passesImports(valueSets, cs.system(), await cs.code(c), 0)) { - await this.listDisplaysFromProvider(cds, cs, c); - let parent = null; - if (cs.hasParents()) { - parent = this.map.get(this.keyS(cs.system(), cs.version(), await cs.parent(c))); - } else { - this.canBeHierarchy = false; + // Build compose concept lookup for metadata merging + const conceptDefs = new Map(); + if (hasConcepts) { + for (const cc of cset.concept) { + conceptDefs.set(cc.code, cc); + } + } + + // Iterate concept-list results first (if unified): preserves compose ordering + metadata + if (canUnify) { + this.worker.opContext.log('iterate concepts'); + const cds = new Designations(this.worker.i18n.languageDefinitions); + for (const cc of cset.concept) { + this.worker.deadCheck('processCodes#3'); + cds.clear(); + Extensions.checkNoModifiers(cc, 'ValueSetExpander.processCodes', 'set concept reference'); + const located = await cs.locate(cc.code); + const ctx = located?.context || null; + if (ctx && (!this.params.activeOnly || !await cs.isInactive(ctx))) { + await this.listDisplaysFromProvider(cds, cs, ctx); + this.listDisplaysFromIncludeConcept(cds, cc, vsSrc); + if (filter.passesDesignations(cds) || filter.passes(cc.code)) { + let ov = Extensions.readString(cc, 'http://hl7.org/fhir/StructureDefinition/itemWeight'); + if (!ov) { + ov = await cs.itemWeight(ctx); + } + let added = await this.includeCode(cs, null, cs.system(), cs.version(), cc.code, await cs.isAbstract(ctx), await cs.isInactive(ctx), await cs.isDeprecated(ctx), await cs.getStatus(ctx), cds, + await cs.definition(ctx), ov, expansion, valueSets, await cs.extensions(ctx), cc.extension, await this._propsIfRequested(cs, ctx), null, excludeInactive, vsSrc.url); + if (added) { + this.addToTotal(); + } + } + } + } + this.worker.opContext.log('iterate concepts done'); + } + + // Iterate filter results (if any filters were registered) + if (hasFilters && fset.length > 0) { + this.worker.opContext.log('iterate filters'); + while (await cs.filterMore(prep, fset[0])) { + const c = await cs.filterConcept(prep, fset[0]); + this.worker.deadCheck('processCodes#5'); + // Skip codes already emitted from concept list + if (canUnify && conceptDefs.has(await cs.code(c))) continue; + const ok = (!this.params.activeOnly || !await cs.isInactive(c)) && (await this.passesFilters(cs, c, prep, fset, 1)); + if (ok) { + const cds = new Designations(this.worker.i18n.languageDefinitions); + if (this.passesImports(valueSets, cs.system(), await cs.code(c), 0)) { + await this.listDisplaysFromProvider(cds, cs, c); + let parent = null; + if (cs.hasParents()) { + parent = this.map.get(this.keyS(cs.system(), cs.version(), await cs.parent(c))); + } else { + this.canBeHierarchy = false; + } + let added = await this.includeCode(cs, parent, await cs.system(), await cs.version(), await cs.code(c), await cs.isAbstract(c), await cs.isInactive(c), + await cs.isDeprecated(c), await cs.getStatus(c), cds, await cs.definition(c), await cs.itemWeight(c), + expansion, null, await cs.extensions(c), null, await this._propsIfRequested(cs, c), null, excludeInactive, vsSrc.url); + if (added) { + this.addToTotal(); + } + } + } + } + this.worker.opContext.log('iterate filters done'); + } + } else if (hasConcepts) { + // Fallback for providers without includeConcepts (non-v0) + this.worker.opContext.log('iterate concepts'); + const cds = new Designations(this.worker.i18n.languageDefinitions); + for (const cc of cset.concept) { + this.worker.deadCheck('processCodes#3'); + cds.clear(); + Extensions.checkNoModifiers(cc, 'ValueSetExpander.processCodes', 'set concept reference'); + let cctxt = await cs.locate(cc.code, this.allAltCodes); + if (cctxt && cctxt.context && (!this.params.activeOnly || !await cs.isInactive(cctxt.context)) && await this.passesFilters(cs, cctxt.context, prep, filters, 0)) { + await this.listDisplaysFromProvider(cds, cs, cctxt.context); + this.listDisplaysFromIncludeConcept(cds, cc, vsSrc); + if (filter.passesDesignations(cds) || filter.passes(cc.code)) { + let ov = Extensions.readString(cc, 'http://hl7.org/fhir/StructureDefinition/itemWeight'); + if (!ov) { + ov = await cs.itemWeight(cctxt.context); } - let added = await this.includeCode(cs, parent, await cs.system(), await cs.version(), await cs.code(c), await cs.isAbstract(c), await cs.isInactive(c), - await cs.isDeprecated(c), await cs.getStatus(c), cds, await cs.definition(c), await cs.itemWeight(c), - expansion, null, await cs.extensions(c), null, await cs.properties(c), null, excludeInactive, vsSrc.url); + let added = await this.includeCode(cs, null, cs.system(), cs.version(), cc.code, await cs.isAbstract(cctxt.context), await cs.isInactive(cctxt.context), await cs.isDeprecated(cctxt.context), await cs.getStatus(cctxt.context), cds, + await cs.definition(cctxt.context), ov, expansion, valueSets, await cs.extensions(cctxt.context), cc.extension, await this._propsIfRequested(cs, cctxt.context), null, excludeInactive, vsSrc.url); if (added) { this.addToTotal(); } } } } - this.worker.opContext.log('iterate filters done'); + this.worker.opContext.log('iterate concepts done'); } } } @@ -897,6 +968,15 @@ class ValueSetExpander { return true; } + async _propsIfRequested(cs, context) { + if (this.params.properties.length) { + perfCounters.bump('props.loaded'); + return await cs.properties(context); + } + perfCounters.bump('props.skipped'); + return null; + } + async excludeCodes(cset, path, vsSrc, filter, expansion, excludeInactive, notClosed) { this.worker.deadCheck('processCodes#1'); const valueSets = []; @@ -979,7 +1059,7 @@ class ValueSetExpander { notClosed.value = true; } const prep = await cs.getPrepContext(true); - const ctxt = await cs.searchFilter(filter, prep, false); + const ctxt = await cs.searchFilter(prep, filter, false); await cs.prepare(prep); while (await cs.filterMore(ctxt)) { this.worker.deadCheck('processCodes#4'); @@ -1015,7 +1095,7 @@ class ValueSetExpander { this.worker.opContext.log('prep filters'); const prep = await cs.getPrepContext(true); if (!filter.isNull) { - await cs.searchFilter(filter, prep, true); + await cs.searchFilter(prep, filter, true); } if (cs.specialEnumeration()) { @@ -1070,7 +1150,7 @@ class ValueSetExpander { const cds = new Designations(this.worker.i18n.languageDefinitions); await this.listDisplaysFromProvider(cds, cs, context); const t = await this.includeCode(cs, parent, await cs.system(), await cs.version(), context.code, await cs.isAbstract(context), await cs.isInactive(context), await cs.isDeprecated(context), await cs.getStatus(context), cds, await cs.definition(context), - await cs.itemWeight(context), expansion, imports, await cs.extensions(context), null, await cs.properties(context), null, excludeInactive, srcUrl); + await cs.itemWeight(context), expansion, imports, await cs.extensions(context), null, await this._propsIfRequested(cs, context), null, excludeInactive, srcUrl); if (t != null) { result++; } @@ -1123,15 +1203,18 @@ class ValueSetExpander { } } - async handleCompose(source, filter, expansion, notClosed, vsInfo) { + async handleCompose(source, filter, expansion, notClosed) { this.worker.opContext.log('compose #1'); + const compose = source.jsonObj.compose; + const vsInfo = this.scanValueSet(compose); + const ts = new Map(); - for (const c of source.jsonObj.compose.include || []) { + for (const c of compose.include || []) { this.worker.deadCheck('handleCompose#2'); await this.checkSource(c, expansion, filter, source.url, ts, vsInfo); } - for (const c of source.jsonObj.compose.exclude || []) { + for (const c of compose.exclude || []) { this.worker.deadCheck('handleCompose#3'); this.hasExclusions = true; await this.checkSource(c, expansion, filter, source.url, ts, null); @@ -1139,18 +1222,24 @@ class ValueSetExpander { this.worker.opContext.log('compose #2'); + const excludeInactive = this.excludeInactives(source); + + // Process excludes — only iterate when the provider does NOT handle excludes itself. + // When csDoExcludes is true, excludes are handled in SQL via filterExcludeFilters/executeFilters + // inside includeCodes, so iterating them here would be redundant work. if (!vsInfo.csDoExcludes) { - let i = 0; - for (const c of source.jsonObj.compose.exclude || []) { + let j = 0; + for (const c of compose.exclude || []) { this.worker.deadCheck('handleCompose#4'); - await this.excludeCodes(c, "ValueSet.compose.exclude["+i+"]", source, source.jsonObj.compose, filter, expansion, this.excludeInactives(source), notClosed); + await this.excludeCodes(c, "ValueSet.compose.exclude["+j+"]", source, filter, expansion, excludeInactive, notClosed); + j++; } } let i = 0; - for (const c of source.jsonObj.compose.include || []) { + for (const c of compose.include || []) { this.worker.deadCheck('handleCompose#5'); - await this.includeCodes(c, "ValueSet.compose.include["+i+"]", source, filter, expansion, this.excludeInactives(source), notClosed, vsInfo); + await this.includeCodes(c, "ValueSet.compose.include["+i+"]", source, filter, expansion, excludeInactive, notClosed, compose, vsInfo); i++; } } @@ -1278,11 +1367,10 @@ class ValueSetExpander { let notClosed = { value : false}; - let vsInfo = this.scanValueSet(source.jsonObj.compose); try { if (source.jsonObj.compose && Extensions.checkNoModifiers(source.jsonObj.compose, 'ValueSetExpander.Expand', 'compose') && this.worker.checkNoLockedDate(source.url, source.jsonObj.compose)) { - await this.handleCompose(source, filter, exp, notClosed, vsInfo); + await this.handleCompose(source, filter, exp, notClosed); } const unused = new Set([...this.requiredSupplements].filter(s => !this.usedSupplements.has(s))); @@ -1358,7 +1446,7 @@ class ValueSetExpander { const c = list[i]; if (this.map.has(this.keyC(c))) { o++; - if ((vsInfo.csDoOffset) || (o > this.offset && (this.count < 0 || t < this.count))) { + if (o > this.offset && (this.count < 0 || t < this.count)) { t++; if (!exp.contains) { exp.contains = []; @@ -1554,45 +1642,41 @@ class ValueSetExpander { } /** - * we have a look at the value set compose to see what we have. - * If it's all one code system(|version), and has no value set dependencies, - * then we call it simple - this will affect how it can be handled later - * - * @param compose - * @returns {undefined} + * Scan the ValueSet compose to determine if it's "simple" — all one code system + * with no value set dependencies. This affects whether excludes/offset can be + * pushed down to the CS provider. */ scanValueSet(compose) { - let result = { isSimple : false, hasExcludes : true, csset : new Set(), csDoExcludes : false, csDoOffset : false}; + const result = { isSimple: false, hasExcludes: false, csset: new Set(), csDoExcludes: false, csDoOffset: false }; let simple = true; - for (let inc of compose.include) { - if (!this.isSimpleInclude(inc, result.csset, false)) { + for (const inc of compose.include || []) { + if (!this._isSimpleInclude(inc, result.csset, false)) { simple = false; } } - for (let exc of compose.exclude) { - if (!this.isSimpleInclude(exc, result.csset, true)) { + for (const exc of compose.exclude || []) { + if (!this._isSimpleInclude(exc, result.csset, true)) { simple = false; } result.hasExcludes = true; } - if (simple && result.csset.size == 1) { + if (simple && result.csset.size === 1) { result.isSimple = true; } return result; } - isSimpleInclude(inc, set, isExclude) { - set.add(inc.system+"|"+inc.version); - return (!inc.valueset || inc.valueset.length == 0) && ((inc.filter && inc.filter.length > 0) || (isExclude && inc.concept && inc.filter.concept > 0)); + _isSimpleInclude(inc, set, isExclude) { + set.add(inc.system + '|' + (inc.version || '')); + return (!inc.valueSet || inc.valueSet.length === 0) + && ((inc.filter && inc.filter.length > 0) || (isExclude && inc.concept && inc.concept.length > 0)); } excludeFilterList(exc) { const results = []; - for (const f of exc.filter || []) { results.push({ prop: f.property, op: f.op, value: f.value }); } - return results; } } diff --git a/tx/workers/validate.js b/tx/workers/validate.js index 36ba555..a81652e 100644 --- a/tx/workers/validate.js +++ b/tx/workers/validate.js @@ -768,7 +768,7 @@ class ValueSetChecker { ver.value = cs.version(); contentMode.value = cs.contentMode(); let msg = ''; - excluded = (system === '%%null%%' || cs.system() === system) && await this.checkConceptSet(path, 'not in', cs, cc, code, displays, this.valueSet, msg, inactive, normalForm, vstatus, op, vcc); + excluded = (system === '%%null%%' || cs.system() === system) && await this.checkConceptSet(path, 'not in', cs, cc, code, displays, this.valueSet, msg, inactive, normalForm, vstatus, op, vcc, messages); if (msg) { messages.push(msg); } diff --git a/tx/workers/worker.js b/tx/workers/worker.js index c1c749d..ee34ca3 100644 --- a/tx/workers/worker.js +++ b/tx/workers/worker.js @@ -7,6 +7,7 @@ const {Issue} = require("../library/operation-outcome"); const {Languages} = require("../../library/languages"); const {ConceptMap} = require("../library/conceptmap"); const {Renderer} = require("../library/renderer"); +const perfCounters = require('../perf-counters'); /** * Custom error for terminology setup issues @@ -43,6 +44,7 @@ class TerminologyWorker { this.noCacheThisOne = false; this.params = null; // Will be set by subclasses this.renderer = new Renderer(i18n, languages, provider); + this._providerCache = new Map(); } /** @@ -144,6 +146,17 @@ class TerminologyWorker { if (!noVParams) { version = this.determineVersionBase(url, version, params); } + + // Memoize by resolved url|version|supplements within a single request + const suppKey = statedSupplements ? [...statedSupplements].sort().join(',') : ''; + const kindsKey = Array.isArray(kinds) ? kinds.join(',') : String(kinds); + const cacheKey = `${url}|${version}|${kindsKey}|${suppKey}`; + if (this._providerCache.has(cacheKey)) { + perfCounters.bump('cache.hit'); + return this._providerCache.get(cacheKey); + } + perfCounters.bump('cache.miss'); + let codeSystemResource = null; let provider = null; const supplements = this.loadSupplements(url, version, statedSupplements); @@ -184,6 +197,7 @@ class TerminologyWorker { if (checkVer) { this.checkVersion(url, provider.version(), params, provider.versionAlgorithm(), op); } + this._providerCache.set(cacheKey, provider); } return provider;