From 21ba44ebb4c82c55dbfd880c56bcb661f527be2b Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 00:20:51 -0500 Subject: [PATCH 1/7] Batch SQLite INSERTs for semantic ref and property indexing Add add_terms_batch / add_properties_batch to the index interfaces with executemany-based SQLite implementations. Restructure add_metadata_to_index_from_list and add_to_property_index to collect all items first, then batch-insert via extend() and the new batch methods. Eliminates ~1000 individual INSERT round-trips during indexing. --- src/typeagent/knowpro/interfaces_core.py | 5 ++ src/typeagent/knowpro/interfaces_indexes.py | 5 ++ src/typeagent/storage/memory/propindex.py | 75 ++++++++++++++++-- src/typeagent/storage/memory/semrefindex.py | 87 ++++++++++++++++++++- src/typeagent/storage/sqlite/propindex.py | 30 +++++++ src/typeagent/storage/sqlite/semrefindex.py | 23 ++++++ 6 files changed, 215 insertions(+), 10 deletions(-) diff --git a/src/typeagent/knowpro/interfaces_core.py b/src/typeagent/knowpro/interfaces_core.py index 105e45b6..72f11f8c 100644 --- a/src/typeagent/knowpro/interfaces_core.py +++ b/src/typeagent/knowpro/interfaces_core.py @@ -168,6 +168,11 @@ async def add_term( semantic_ref_ordinal: SemanticRefOrdinal | ScoredSemanticRefOrdinal, ) -> str: ... + async def add_terms_batch( + self, + terms: list[tuple[str, SemanticRefOrdinal | ScoredSemanticRefOrdinal]], + ) -> None: ... + async def remove_term( self, term: str, semantic_ref_ordinal: SemanticRefOrdinal ) -> None: ... diff --git a/src/typeagent/knowpro/interfaces_indexes.py b/src/typeagent/knowpro/interfaces_indexes.py index a894ab88..3ae62024 100644 --- a/src/typeagent/knowpro/interfaces_indexes.py +++ b/src/typeagent/knowpro/interfaces_indexes.py @@ -59,6 +59,11 @@ async def add_property( semantic_ref_ordinal: SemanticRefOrdinal | ScoredSemanticRefOrdinal, ) -> None: ... + async def add_properties_batch( + self, + properties: list[tuple[str, str, SemanticRefOrdinal | ScoredSemanticRefOrdinal]], + ) -> None: ... + async def lookup_property( self, property_name: str, value: str ) -> list[ScoredSemanticRefOrdinal] | None: ... diff --git a/src/typeagent/storage/memory/propindex.py b/src/typeagent/storage/memory/propindex.py index acc7b89a..1a1c7968 100644 --- a/src/typeagent/storage/memory/propindex.py +++ b/src/typeagent/storage/memory/propindex.py @@ -109,6 +109,57 @@ async def build_property_index(conversation: IConversation) -> None: await add_to_property_index(conversation, 0) +def _collect_facet_properties( + facet: kplib.Facet | None, + ordinal: SemanticRefOrdinal, +) -> list[tuple[str, str, SemanticRefOrdinal]]: + """Collect property tuples from a facet without touching any index.""" + if facet is None: + return [] + props: list[tuple[str, str, SemanticRefOrdinal]] = [ + (PropertyNames.FacetName.value, facet.name, ordinal) + ] + value = facet.value + if value is not None: + if isinstance(value, float) and value: + value = f"{value:g}" + props.append((PropertyNames.FacetValue.value, str(value), ordinal)) + return props + + +def _collect_entity_properties( + entity: kplib.ConcreteEntity, + ordinal: SemanticRefOrdinal, +) -> list[tuple[str, str, SemanticRefOrdinal]]: + """Collect all property tuples for an entity.""" + props: list[tuple[str, str, SemanticRefOrdinal]] = [ + (PropertyNames.EntityName.value, entity.name, ordinal) + ] + for t in entity.type: + props.append((PropertyNames.EntityType.value, t, ordinal)) + if entity.facets: + for facet in entity.facets: + props.extend(_collect_facet_properties(facet, ordinal)) + return props + + +def _collect_action_properties( + action: kplib.Action, + ordinal: SemanticRefOrdinal, +) -> list[tuple[str, str, SemanticRefOrdinal]]: + """Collect all property tuples for an action.""" + props: list[tuple[str, str, SemanticRefOrdinal]] = [ + (PropertyNames.Verb.value, " ".join(action.verbs), ordinal) + ] + if action.subject_entity_name != "none": + props.append((PropertyNames.Subject.value, action.subject_entity_name, ordinal)) + if action.object_entity_name != "none": + props.append((PropertyNames.Object.value, action.object_entity_name, ordinal)) + if action.indirect_object_entity_name != "none": + props.append((PropertyNames.IndirectObject.value, action.indirect_object_entity_name, ordinal)) + return props + + async def add_to_property_index( conversation: IConversation, start_at_ordinal: SemanticRefOrdinal, @@ -127,29 +178,32 @@ async def add_to_property_index( semantic_refs = conversation.semantic_refs size = await semantic_refs.size() + collected: list[tuple[str, str, SemanticRefOrdinal]] = [] for semantic_ref_ordinal, semantic_ref in enumerate( await semantic_refs.get_slice(start_at_ordinal, size), start_at_ordinal, ): assert semantic_ref.semantic_ref_ordinal == semantic_ref_ordinal if isinstance(semantic_ref.knowledge, kplib.Action): - await add_action_properties_to_index( - semantic_ref.knowledge, property_index, semantic_ref_ordinal + collected.extend( + _collect_action_properties(semantic_ref.knowledge, semantic_ref_ordinal) ) elif isinstance(semantic_ref.knowledge, kplib.ConcreteEntity): - await add_entity_properties_to_index( - semantic_ref.knowledge, property_index, semantic_ref_ordinal + collected.extend( + _collect_entity_properties(semantic_ref.knowledge, semantic_ref_ordinal) ) elif isinstance(semantic_ref.knowledge, Tag): - tag = semantic_ref.knowledge - await property_index.add_property( - PropertyNames.Tag.value, tag.text, semantic_ref_ordinal + collected.append( + (PropertyNames.Tag.value, semantic_ref.knowledge.text, semantic_ref_ordinal) ) elif isinstance(semantic_ref.knowledge, Topic): pass else: assert_never(semantic_ref.knowledge) + if collected: + await property_index.add_properties_batch(collected) + class PropertyIndex(IPropertyToSemanticRefIndex): def __init__(self): @@ -183,6 +237,13 @@ async def add_property( else: self._map[term_text] = [semantic_ref_ordinal] + async def add_properties_batch( + self, + properties: list[tuple[str, str, SemanticRefOrdinal | ScoredSemanticRefOrdinal]], + ) -> None: + for name, value, ordinal in properties: + await self.add_property(name, value, ordinal) + async def clear(self) -> None: self._map = {} diff --git a/src/typeagent/storage/memory/semrefindex.py b/src/typeagent/storage/memory/semrefindex.py index 6c42022d..7f131d9a 100644 --- a/src/typeagent/storage/memory/semrefindex.py +++ b/src/typeagent/storage/memory/semrefindex.py @@ -577,6 +577,48 @@ async def add_metadata_to_index[TMessage: IMessage]( i += 1 +def _collect_facet_terms(facet: kplib.Facet | None) -> list[str]: + """Collect terms from a facet without touching any index.""" + if facet is None: + return [] + terms = [facet.name] + if facet.value is not None: + terms.append(str(facet.value)) + return terms + + +def _collect_entity_terms(entity: kplib.ConcreteEntity) -> list[str]: + """Collect all terms an entity would add to the semantic ref index.""" + terms = [entity.name] + for t in entity.type: + terms.append(t) + if entity.facets: + for facet in entity.facets: + terms.extend(_collect_facet_terms(facet)) + return terms + + +def _collect_action_terms(action: kplib.Action) -> list[str]: + """Collect all terms an action would add to the semantic ref index.""" + terms = [" ".join(action.verbs)] + if action.subject_entity_name != "none": + terms.append(action.subject_entity_name) + if action.object_entity_name != "none": + terms.append(action.object_entity_name) + if action.indirect_object_entity_name != "none": + terms.append(action.indirect_object_entity_name) + if action.params: + for param in action.params: + if isinstance(param, str): + terms.append(param) + else: + terms.append(param.name) + if isinstance(param.value, str): + terms.append(param.value) + terms.extend(_collect_facet_terms(action.subject_entity_facet)) + return terms + + async def add_metadata_to_index_from_list[TMessage: IMessage]( messages: list[TMessage], semantic_refs: ISemanticRefCollection, @@ -585,18 +627,50 @@ async def add_metadata_to_index_from_list[TMessage: IMessage]( knowledge_validator: KnowledgeValidator | None = None, ) -> None: """Extract metadata knowledge from a list of messages starting at ordinal.""" + next_ordinal = await semantic_refs.size() + collected_refs: list[SemanticRef] = [] + collected_terms: list[tuple[str, SemanticRefOrdinal]] = [] + for i, msg in enumerate(messages, start_from_ordinal): knowledge_response = msg.get_knowledge() for entity in knowledge_response.entities: if knowledge_validator is None or knowledge_validator("entity", entity): - await add_entity_to_index(entity, semantic_refs, semantic_ref_index, i) + ref = SemanticRef( + semantic_ref_ordinal=next_ordinal, + range=text_range_from_location(i), + knowledge=entity, + ) + collected_refs.append(ref) + for term in _collect_entity_terms(entity): + collected_terms.append((term, next_ordinal)) + next_ordinal += 1 for action in knowledge_response.actions: if knowledge_validator is None or knowledge_validator("action", action): - await add_action_to_index(action, semantic_refs, semantic_ref_index, i) + ref = SemanticRef( + semantic_ref_ordinal=next_ordinal, + range=text_range_from_location(i), + knowledge=action, + ) + collected_refs.append(ref) + for term in _collect_action_terms(action): + collected_terms.append((term, next_ordinal)) + next_ordinal += 1 for topic_response in knowledge_response.topics: topic = Topic(text=topic_response) if knowledge_validator is None or knowledge_validator("topic", topic): - await add_topic_to_index(topic, semantic_refs, semantic_ref_index, i) + ref = SemanticRef( + semantic_ref_ordinal=next_ordinal, + range=text_range_from_location(i), + knowledge=topic, + ) + collected_refs.append(ref) + collected_terms.append((topic.text, next_ordinal)) + next_ordinal += 1 + + if collected_refs: + await semantic_refs.extend(collected_refs) + if collected_terms: + await semantic_ref_index.add_terms_batch(collected_terms) class TermToSemanticRefIndex(ITermToSemanticRefIndex): @@ -635,6 +709,13 @@ async def add_term( self._map[term] = [semantic_ref_ordinal] return term + async def add_terms_batch( + self, + terms: list[tuple[str, SemanticRefOrdinal | ScoredSemanticRefOrdinal]], + ) -> None: + for term, ordinal in terms: + await self.add_term(term, ordinal) + async def lookup_term(self, term: str) -> list[ScoredSemanticRefOrdinal] | None: return self._map.get(self._prepare_term(term)) or [] diff --git a/src/typeagent/storage/sqlite/propindex.py b/src/typeagent/storage/sqlite/propindex.py index 5a0fa63a..6a619869 100644 --- a/src/typeagent/storage/sqlite/propindex.py +++ b/src/typeagent/storage/sqlite/propindex.py @@ -67,6 +67,36 @@ async def add_property( (property_name, value, score, semref_id), ) + async def add_properties_batch( + self, + properties: list[tuple[str, str, interfaces.SemanticRefOrdinal | interfaces.ScoredSemanticRefOrdinal]], + ) -> None: + if not properties: + return + from ...storage.memory.propindex import ( + make_property_term_text, + split_property_term_text, + ) + rows = [] + for property_name, value, ordinal in properties: + if isinstance(ordinal, interfaces.ScoredSemanticRefOrdinal): + semref_id = ordinal.semantic_ref_ordinal + score = ordinal.score + else: + semref_id = ordinal + score = 1.0 + term_text = make_property_term_text(property_name, value) + term_text = term_text.lower() + property_name, value = split_property_term_text(term_text) + if property_name.startswith("prop."): + property_name = property_name[5:] + rows.append((property_name, value, score, semref_id)) + cursor = self.db.cursor() + cursor.executemany( + "INSERT INTO PropertyIndex (prop_name, value_str, score, semref_id) VALUES (?, ?, ?, ?)", + rows, + ) + async def clear(self) -> None: cursor = self.db.cursor() cursor.execute("DELETE FROM PropertyIndex") diff --git a/src/typeagent/storage/sqlite/semrefindex.py b/src/typeagent/storage/sqlite/semrefindex.py index 682b8e7d..0925eb45 100644 --- a/src/typeagent/storage/sqlite/semrefindex.py +++ b/src/typeagent/storage/sqlite/semrefindex.py @@ -56,6 +56,29 @@ async def add_term( return term + async def add_terms_batch( + self, + terms: list[tuple[str, interfaces.SemanticRefOrdinal | interfaces.ScoredSemanticRefOrdinal]], + ) -> None: + if not terms: + return + rows = [] + for term, ordinal in terms: + if not term: + continue + term = self._prepare_term(term) + if isinstance(ordinal, interfaces.ScoredSemanticRefOrdinal): + semref_id = ordinal.semantic_ref_ordinal + else: + semref_id = ordinal + rows.append((term, semref_id)) + if rows: + cursor = self.db.cursor() + cursor.executemany( + "INSERT OR IGNORE INTO SemanticRefIndex (term, semref_id) VALUES (?, ?)", + rows, + ) + async def remove_term( self, term: str, semantic_ref_ordinal: interfaces.SemanticRefOrdinal ) -> None: From 0d9871b1499a104e1dc3b67aec6d07e6f4eb423d Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 01:41:33 -0500 Subject: [PATCH 2/7] Remove underscore prefix from collect helper functions Rename _collect_{facet,entity,action}_{terms,properties} to drop the leading underscore in propindex.py and semrefindex.py. --- src/typeagent/storage/memory/propindex.py | 12 ++++++------ src/typeagent/storage/memory/semrefindex.py | 14 +++++++------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/typeagent/storage/memory/propindex.py b/src/typeagent/storage/memory/propindex.py index 1a1c7968..7ff49e29 100644 --- a/src/typeagent/storage/memory/propindex.py +++ b/src/typeagent/storage/memory/propindex.py @@ -109,7 +109,7 @@ async def build_property_index(conversation: IConversation) -> None: await add_to_property_index(conversation, 0) -def _collect_facet_properties( +def collect_facet_properties( facet: kplib.Facet | None, ordinal: SemanticRefOrdinal, ) -> list[tuple[str, str, SemanticRefOrdinal]]: @@ -127,7 +127,7 @@ def _collect_facet_properties( return props -def _collect_entity_properties( +def collect_entity_properties( entity: kplib.ConcreteEntity, ordinal: SemanticRefOrdinal, ) -> list[tuple[str, str, SemanticRefOrdinal]]: @@ -139,11 +139,11 @@ def _collect_entity_properties( props.append((PropertyNames.EntityType.value, t, ordinal)) if entity.facets: for facet in entity.facets: - props.extend(_collect_facet_properties(facet, ordinal)) + props.extend(collect_facet_properties(facet, ordinal)) return props -def _collect_action_properties( +def collect_action_properties( action: kplib.Action, ordinal: SemanticRefOrdinal, ) -> list[tuple[str, str, SemanticRefOrdinal]]: @@ -186,11 +186,11 @@ async def add_to_property_index( assert semantic_ref.semantic_ref_ordinal == semantic_ref_ordinal if isinstance(semantic_ref.knowledge, kplib.Action): collected.extend( - _collect_action_properties(semantic_ref.knowledge, semantic_ref_ordinal) + collect_action_properties(semantic_ref.knowledge, semantic_ref_ordinal) ) elif isinstance(semantic_ref.knowledge, kplib.ConcreteEntity): collected.extend( - _collect_entity_properties(semantic_ref.knowledge, semantic_ref_ordinal) + collect_entity_properties(semantic_ref.knowledge, semantic_ref_ordinal) ) elif isinstance(semantic_ref.knowledge, Tag): collected.append( diff --git a/src/typeagent/storage/memory/semrefindex.py b/src/typeagent/storage/memory/semrefindex.py index 7f131d9a..a98a3b21 100644 --- a/src/typeagent/storage/memory/semrefindex.py +++ b/src/typeagent/storage/memory/semrefindex.py @@ -577,7 +577,7 @@ async def add_metadata_to_index[TMessage: IMessage]( i += 1 -def _collect_facet_terms(facet: kplib.Facet | None) -> list[str]: +def collect_facet_terms(facet: kplib.Facet | None) -> list[str]: """Collect terms from a facet without touching any index.""" if facet is None: return [] @@ -587,18 +587,18 @@ def _collect_facet_terms(facet: kplib.Facet | None) -> list[str]: return terms -def _collect_entity_terms(entity: kplib.ConcreteEntity) -> list[str]: +def collect_entity_terms(entity: kplib.ConcreteEntity) -> list[str]: """Collect all terms an entity would add to the semantic ref index.""" terms = [entity.name] for t in entity.type: terms.append(t) if entity.facets: for facet in entity.facets: - terms.extend(_collect_facet_terms(facet)) + terms.extend(collect_facet_terms(facet)) return terms -def _collect_action_terms(action: kplib.Action) -> list[str]: +def collect_action_terms(action: kplib.Action) -> list[str]: """Collect all terms an action would add to the semantic ref index.""" terms = [" ".join(action.verbs)] if action.subject_entity_name != "none": @@ -615,7 +615,7 @@ def _collect_action_terms(action: kplib.Action) -> list[str]: terms.append(param.name) if isinstance(param.value, str): terms.append(param.value) - terms.extend(_collect_facet_terms(action.subject_entity_facet)) + terms.extend(collect_facet_terms(action.subject_entity_facet)) return terms @@ -641,7 +641,7 @@ async def add_metadata_to_index_from_list[TMessage: IMessage]( knowledge=entity, ) collected_refs.append(ref) - for term in _collect_entity_terms(entity): + for term in collect_entity_terms(entity): collected_terms.append((term, next_ordinal)) next_ordinal += 1 for action in knowledge_response.actions: @@ -652,7 +652,7 @@ async def add_metadata_to_index_from_list[TMessage: IMessage]( knowledge=action, ) collected_refs.append(ref) - for term in _collect_action_terms(action): + for term in collect_action_terms(action): collected_terms.append((term, next_ordinal)) next_ordinal += 1 for topic_response in knowledge_response.topics: From bc6b9663e5ee49d31056455b6ba0919f3e5e9648 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 01:52:09 -0500 Subject: [PATCH 3/7] Fix pyright errors: use Sequence for batch method signatures Change list to Sequence in add_terms_batch and add_properties_batch interfaces and implementations to satisfy covariance. Add missing add_terms_batch to FakeTermIndex in conftest.py. --- src/typeagent/knowpro/interfaces_core.py | 3 ++- src/typeagent/knowpro/interfaces_indexes.py | 2 +- src/typeagent/storage/memory/propindex.py | 27 +++++++++++++++++---- src/typeagent/storage/memory/semrefindex.py | 8 +++--- src/typeagent/storage/sqlite/propindex.py | 10 +++++++- src/typeagent/storage/sqlite/semrefindex.py | 7 +++++- tests/conftest.py | 9 ++++++- 7 files changed, 53 insertions(+), 13 deletions(-) diff --git a/src/typeagent/knowpro/interfaces_core.py b/src/typeagent/knowpro/interfaces_core.py index 72f11f8c..cd9e885c 100644 --- a/src/typeagent/knowpro/interfaces_core.py +++ b/src/typeagent/knowpro/interfaces_core.py @@ -4,6 +4,7 @@ from __future__ import annotations +from collections.abc import Sequence from datetime import datetime as Datetime from typing import ( Any, @@ -170,7 +171,7 @@ async def add_term( async def add_terms_batch( self, - terms: list[tuple[str, SemanticRefOrdinal | ScoredSemanticRefOrdinal]], + terms: Sequence[tuple[str, SemanticRefOrdinal | ScoredSemanticRefOrdinal]], ) -> None: ... async def remove_term( diff --git a/src/typeagent/knowpro/interfaces_indexes.py b/src/typeagent/knowpro/interfaces_indexes.py index 3ae62024..6c348a01 100644 --- a/src/typeagent/knowpro/interfaces_indexes.py +++ b/src/typeagent/knowpro/interfaces_indexes.py @@ -61,7 +61,7 @@ async def add_property( async def add_properties_batch( self, - properties: list[tuple[str, str, SemanticRefOrdinal | ScoredSemanticRefOrdinal]], + properties: Sequence[tuple[str, str, SemanticRefOrdinal | ScoredSemanticRefOrdinal]], ) -> None: ... async def lookup_property( diff --git a/src/typeagent/storage/memory/propindex.py b/src/typeagent/storage/memory/propindex.py index 7ff49e29..f9717b24 100644 --- a/src/typeagent/storage/memory/propindex.py +++ b/src/typeagent/storage/memory/propindex.py @@ -1,6 +1,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. +from collections.abc import Sequence import enum from typing import assert_never @@ -156,7 +157,13 @@ def collect_action_properties( if action.object_entity_name != "none": props.append((PropertyNames.Object.value, action.object_entity_name, ordinal)) if action.indirect_object_entity_name != "none": - props.append((PropertyNames.IndirectObject.value, action.indirect_object_entity_name, ordinal)) + props.append( + ( + PropertyNames.IndirectObject.value, + action.indirect_object_entity_name, + ordinal, + ) + ) return props @@ -186,15 +193,23 @@ async def add_to_property_index( assert semantic_ref.semantic_ref_ordinal == semantic_ref_ordinal if isinstance(semantic_ref.knowledge, kplib.Action): collected.extend( - collect_action_properties(semantic_ref.knowledge, semantic_ref_ordinal) + collect_action_properties( + semantic_ref.knowledge, semantic_ref_ordinal + ) ) elif isinstance(semantic_ref.knowledge, kplib.ConcreteEntity): collected.extend( - collect_entity_properties(semantic_ref.knowledge, semantic_ref_ordinal) + collect_entity_properties( + semantic_ref.knowledge, semantic_ref_ordinal + ) ) elif isinstance(semantic_ref.knowledge, Tag): collected.append( - (PropertyNames.Tag.value, semantic_ref.knowledge.text, semantic_ref_ordinal) + ( + PropertyNames.Tag.value, + semantic_ref.knowledge.text, + semantic_ref_ordinal, + ) ) elif isinstance(semantic_ref.knowledge, Topic): pass @@ -239,7 +254,9 @@ async def add_property( async def add_properties_batch( self, - properties: list[tuple[str, str, SemanticRefOrdinal | ScoredSemanticRefOrdinal]], + properties: Sequence[ + tuple[str, str, SemanticRefOrdinal | ScoredSemanticRefOrdinal] + ], ) -> None: for name, value, ordinal in properties: await self.add_property(name, value, ordinal) diff --git a/src/typeagent/storage/memory/semrefindex.py b/src/typeagent/storage/memory/semrefindex.py index a98a3b21..8437bacd 100644 --- a/src/typeagent/storage/memory/semrefindex.py +++ b/src/typeagent/storage/memory/semrefindex.py @@ -3,11 +3,13 @@ from __future__ import annotations # TODO: Avoid -from collections.abc import AsyncIterable, Callable +from collections.abc import AsyncIterable, Callable, Sequence from typechat import Failure -from ...knowpro import convknowledge, knowledge_schema as kplib, secindex +from ...knowpro import convknowledge +from ...knowpro import knowledge_schema as kplib +from ...knowpro import secindex from ...knowpro.convsettings import ConversationSettings, SemanticRefIndexSettings from ...knowpro.interfaces import ( # Interfaces.; Other imports. IConversation, @@ -711,7 +713,7 @@ async def add_term( async def add_terms_batch( self, - terms: list[tuple[str, SemanticRefOrdinal | ScoredSemanticRefOrdinal]], + terms: Sequence[tuple[str, SemanticRefOrdinal | ScoredSemanticRefOrdinal]], ) -> None: for term, ordinal in terms: await self.add_term(term, ordinal) diff --git a/src/typeagent/storage/sqlite/propindex.py b/src/typeagent/storage/sqlite/propindex.py index 6a619869..f9704b45 100644 --- a/src/typeagent/storage/sqlite/propindex.py +++ b/src/typeagent/storage/sqlite/propindex.py @@ -3,6 +3,7 @@ """SQLite-based property index implementation.""" +from collections.abc import Sequence import sqlite3 from ...knowpro import interfaces @@ -69,7 +70,13 @@ async def add_property( async def add_properties_batch( self, - properties: list[tuple[str, str, interfaces.SemanticRefOrdinal | interfaces.ScoredSemanticRefOrdinal]], + properties: Sequence[ + tuple[ + str, + str, + interfaces.SemanticRefOrdinal | interfaces.ScoredSemanticRefOrdinal, + ] + ], ) -> None: if not properties: return @@ -77,6 +84,7 @@ async def add_properties_batch( make_property_term_text, split_property_term_text, ) + rows = [] for property_name, value, ordinal in properties: if isinstance(ordinal, interfaces.ScoredSemanticRefOrdinal): diff --git a/src/typeagent/storage/sqlite/semrefindex.py b/src/typeagent/storage/sqlite/semrefindex.py index 0925eb45..ac68a1e0 100644 --- a/src/typeagent/storage/sqlite/semrefindex.py +++ b/src/typeagent/storage/sqlite/semrefindex.py @@ -3,6 +3,7 @@ """SQLite-based semantic reference index implementation.""" +from collections.abc import Sequence import re import sqlite3 import unicodedata @@ -58,7 +59,11 @@ async def add_term( async def add_terms_batch( self, - terms: list[tuple[str, interfaces.SemanticRefOrdinal | interfaces.ScoredSemanticRefOrdinal]], + terms: Sequence[ + tuple[ + str, interfaces.SemanticRefOrdinal | interfaces.ScoredSemanticRefOrdinal + ] + ], ) -> None: if not terms: return diff --git a/tests/conftest.py b/tests/conftest.py index dae619c1..7f0f11f5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,7 +1,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -from collections.abc import AsyncGenerator, Callable, Iterator +from collections.abc import AsyncGenerator, Callable, Iterator, Sequence import os from pathlib import Path import tempfile @@ -236,6 +236,13 @@ async def add_term( self.term_to_refs[term].append(scored_ref) return term + async def add_terms_batch( + self, + terms: Sequence[tuple[str, int | ScoredSemanticRefOrdinal]], + ) -> None: + for term, ordinal in terms: + await self.add_term(term, ordinal) + async def remove_term(self, term: str, semantic_ref_ordinal: int) -> None: if term in self.term_to_refs: self.term_to_refs[term] = [ From 1c8a2949c8a065f9ce65ca64b43b5c5c127681c8 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 03:37:56 -0500 Subject: [PATCH 4/7] Add batch metadata query to avoid N+1 in lookup_term_filtered MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit lookup_term_filtered called get_item() per scored ref — one SELECT and full deserialization per match. The filter only needs knowledge_type (a plain column) and range (json.loads of range_json), never the expensive knowledge_json deserialization (64% of per-row cost). Add get_metadata_multiple to ISemanticRefCollection that fetches only semref_id, range_json, knowledge_type in a single batch query. Replace the N+1 loop in lookup_term_filtered with one get_metadata_multiple call. Benchmark (200 matches, 200 rounds): 4.38ms → 1.32ms (3.3x speedup). --- src/typeagent/knowpro/interfaces_storage.py | 19 +++- src/typeagent/knowpro/query.py | 18 ++-- src/typeagent/storage/memory/collections.py | 13 +++ src/typeagent/storage/sqlite/collections.py | 25 +++++ tests/benchmarks/test_benchmark_query.py | 104 ++++++++++++++++++++ 5 files changed, 168 insertions(+), 11 deletions(-) create mode 100644 tests/benchmarks/test_benchmark_query.py diff --git a/src/typeagent/knowpro/interfaces_storage.py b/src/typeagent/knowpro/interfaces_storage.py index a82fe7ad..97f7b600 100644 --- a/src/typeagent/knowpro/interfaces_storage.py +++ b/src/typeagent/knowpro/interfaces_storage.py @@ -6,16 +6,18 @@ from collections.abc import AsyncIterable, Iterable from datetime import datetime as Datetime -from typing import Any, Protocol, Self +from typing import Any, NamedTuple, Protocol, Self from pydantic.dataclasses import dataclass from .interfaces_core import ( IMessage, ITermToSemanticRefIndex, + KnowledgeType, MessageOrdinal, SemanticRef, SemanticRefOrdinal, + TextRange, ) from .interfaces_indexes import ( IConversationSecondaryIndexes, @@ -57,6 +59,14 @@ class ConversationMetadata: extra: dict[str, str] | None = None +class SemanticRefMetadata(NamedTuple): + """Lightweight metadata for filtering without full knowledge deserialization.""" + + ordinal: SemanticRefOrdinal + range: TextRange + knowledge_type: KnowledgeType + + class IReadonlyCollection[T, TOrdinal](AsyncIterable[T], Protocol): async def size(self) -> int: ... @@ -91,6 +101,12 @@ class IMessageCollection[TMessage: IMessage]( class ISemanticRefCollection(ICollection[SemanticRef, SemanticRefOrdinal], Protocol): """A collection of SemanticRefs.""" + async def get_metadata_multiple( + self, ordinals: list[SemanticRefOrdinal] + ) -> list[SemanticRefMetadata]: + """Batch-fetch lightweight metadata without deserializing knowledge.""" + ... + class IStorageProvider[TMessage: IMessage](Protocol): """API spec for storage providers -- maybe in-memory or persistent.""" @@ -190,4 +206,5 @@ class IConversation[ "ISemanticRefCollection", "IStorageProvider", "STATUS_INGESTED", + "SemanticRefMetadata", ] diff --git a/src/typeagent/knowpro/query.py b/src/typeagent/knowpro/query.py index 44fa06ec..5859e3bc 100644 --- a/src/typeagent/knowpro/query.py +++ b/src/typeagent/knowpro/query.py @@ -37,6 +37,7 @@ ScoredSemanticRefOrdinal, SearchTerm, SemanticRef, + SemanticRefMetadata, SemanticRefOrdinal, SemanticRefSearchResult, Term, @@ -174,17 +175,14 @@ async def lookup_term_filtered( semantic_ref_index: ITermToSemanticRefIndex, term: Term, semantic_refs: ISemanticRefCollection, - filter: Callable[[SemanticRef, ScoredSemanticRefOrdinal], bool], + filter: Callable[[SemanticRefMetadata, ScoredSemanticRefOrdinal], bool], ) -> list[ScoredSemanticRefOrdinal] | None: """Look up a term in the semantic reference index and filter the results.""" scored_refs = await semantic_ref_index.lookup_term(term.text) if scored_refs: - filtered = [] - for sr in scored_refs: - semantic_ref = await semantic_refs.get_item(sr.semantic_ref_ordinal) - if filter(semantic_ref, sr): - filtered.append(sr) - return filtered + ordinals = [sr.semantic_ref_ordinal for sr in scored_refs] + metadata = await semantic_refs.get_metadata_multiple(ordinals) + return [sr for sr, m in zip(scored_refs, metadata) if filter(m, sr)] return None @@ -202,10 +200,10 @@ async def lookup_term( semantic_ref_index, term, semantic_refs, - lambda sr, _: ( - not knowledge_type or sr.knowledge.knowledge_type == knowledge_type + lambda m, _: ( + not knowledge_type or m.knowledge_type == knowledge_type ) - and ranges_in_scope.is_range_in_scope(sr.range), + and ranges_in_scope.is_range_in_scope(m.range), ) return await semantic_ref_index.lookup_term(term.text) diff --git a/src/typeagent/storage/memory/collections.py b/src/typeagent/storage/memory/collections.py index 9973a290..8a5b14eb 100644 --- a/src/typeagent/storage/memory/collections.py +++ b/src/typeagent/storage/memory/collections.py @@ -10,6 +10,7 @@ IMessage, MessageOrdinal, SemanticRef, + SemanticRefMetadata, SemanticRefOrdinal, ) @@ -63,6 +64,18 @@ async def extend(self, items: Iterable[T]) -> None: class MemorySemanticRefCollection(MemoryCollection[SemanticRef, SemanticRefOrdinal]): """A collection of semantic references.""" + async def get_metadata_multiple( + self, ordinals: list[SemanticRefOrdinal] + ) -> list[SemanticRefMetadata]: + return [ + SemanticRefMetadata( + ordinal=o, + range=self.items[o].range, + knowledge_type=self.items[o].knowledge.knowledge_type, + ) + for o in ordinals + ] + class MemoryMessageCollection[TMessage: IMessage]( MemoryCollection[TMessage, MessageOrdinal] diff --git a/src/typeagent/storage/sqlite/collections.py b/src/typeagent/storage/sqlite/collections.py index 9730f6d1..911fff94 100644 --- a/src/typeagent/storage/sqlite/collections.py +++ b/src/typeagent/storage/sqlite/collections.py @@ -340,6 +340,31 @@ async def get_multiple(self, arg: list[int]) -> list[interfaces.SemanticRef]: assert set(rowdict) == set(arg) return [self._deserialize_semantic_ref_from_row(rowdict[ordl]) for ordl in arg] + async def get_metadata_multiple( + self, ordinals: list[int] + ) -> list[interfaces.SemanticRefMetadata]: + if not ordinals: + return [] + cursor = self.db.cursor() + placeholders = ",".join("?" * len(ordinals)) + cursor.execute( + f""" + SELECT semref_id, range_json, knowledge_type + FROM SemanticRefs WHERE semref_id IN ({placeholders}) + """, + ordinals, + ) + rows = cursor.fetchall() + rowdict = {r[0]: r for r in rows} + return [ + interfaces.SemanticRefMetadata( + ordinal=rowdict[o][0], + range=interfaces.TextRange.deserialize(json.loads(rowdict[o][1])), + knowledge_type=rowdict[o][2], + ) + for o in ordinals + ] + async def append(self, item: interfaces.SemanticRef) -> None: cursor = self.db.cursor() semref_id, range_json, knowledge_type, knowledge_json = ( diff --git a/tests/benchmarks/test_benchmark_query.py b/tests/benchmarks/test_benchmark_query.py new file mode 100644 index 00000000..8c4dd137 --- /dev/null +++ b/tests/benchmarks/test_benchmark_query.py @@ -0,0 +1,104 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +"""Benchmark for lookup_term_filtered — measures the N+1 query pattern. + +After indexing 200 synthetic messages, looks up a high-frequency term +and filters results via lookup_term_filtered. Each call triggers +one get_item() SELECT per matching semantic ref (N+1 pattern). + +Run: + uv run python -m pytest tests/benchmarks/test_benchmark_query.py -v -s +""" + +import os +import tempfile + +import pytest + +from typeagent.aitools.model_adapters import create_test_embedding_model +from typeagent.knowpro.convsettings import ConversationSettings +from typeagent.knowpro.interfaces_core import Term +from typeagent.knowpro.query import lookup_term_filtered +from typeagent.storage.sqlite.provider import SqliteStorageProvider +from typeagent.transcripts.transcript import ( + Transcript, + TranscriptMessage, + TranscriptMessageMeta, +) + + +def make_settings() -> ConversationSettings: + model = create_test_embedding_model() + settings = ConversationSettings(model=model) + settings.semantic_ref_index_settings.auto_extract_knowledge = False + return settings + + +def synthetic_messages(n: int) -> list[TranscriptMessage]: + return [ + TranscriptMessage( + text_chunks=[f"Message {i} about topic {i % 10}"], + metadata=TranscriptMessageMeta(speaker=f"Speaker{i % 3}"), + tags=[f"tag{i % 5}"], + ) + for i in range(n) + ] + + +async def create_indexed_transcript( + db_path: str, settings: ConversationSettings, n_messages: int +) -> Transcript: + """Create and index a transcript, returning it ready for queries.""" + storage = SqliteStorageProvider( + db_path, + message_type=TranscriptMessage, + message_text_index_settings=settings.message_text_index_settings, + related_term_index_settings=settings.related_term_index_settings, + ) + settings.storage_provider = storage + transcript = await Transcript.create(settings, name="bench") + messages = synthetic_messages(n_messages) + await transcript.add_messages_with_indexing(messages) + return transcript + + +@pytest.mark.asyncio +async def test_benchmark_lookup_term_filtered(async_benchmark): + """Benchmark lookup_term_filtered with N+1 get_item pattern.""" + settings = make_settings() + tmpdir = tempfile.mkdtemp() + db_path = os.path.join(tmpdir, "query_bench.db") + + transcript = await create_indexed_transcript(db_path, settings, 200) + + # Find a high-frequency term to look up. + semref_index = transcript.semantic_ref_index + terms = await semref_index.get_terms() + # Pick the term with the most matches. + best_term = None + best_count = 0 + for t in terms: + refs = await semref_index.lookup_term(t) + if refs and len(refs) > best_count: + best_count = len(refs) + best_term = t + + assert best_term is not None, "No terms found after indexing" + print(f"\nBenchmarking term '{best_term}' with {best_count} matches") + + term = Term(text=best_term) + semantic_refs = transcript.semantic_refs + # Filter that accepts all — isolates the get_item overhead. + accept_all = lambda sr, scored: True + + async def target(): + await lookup_term_filtered(semref_index, term, semantic_refs, accept_all) + + try: + await async_benchmark.pedantic(target, rounds=200, warmup_rounds=20) + finally: + await settings.storage_provider.close() + import shutil + + shutil.rmtree(tmpdir, ignore_errors=True) From eb5323ba5fa94b95f1c68a2daad215c96f1fed89 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 03:49:46 -0500 Subject: [PATCH 5/7] Extend batch metadata query to remaining N+1 call sites Apply the same get_metadata_multiple pattern from lookup_term_filtered to four more sites that called get_item() in a loop: - propindex.lookup_property_in_property_index: filter by .range - SemanticRefAccumulator.group_matches_by_type: group by .knowledge_type - SemanticRefAccumulator.get_matches_in_scope: filter by .range - answers.get_scored_semantic_refs_from_ordinals_iter: two-phase metadata filter then batch get_multiple for matching full objects All sites now use a single batch query instead of N individual SELECTs, skipping knowledge_json deserialization where only range or knowledge_type is needed. --- src/typeagent/knowpro/answers.py | 29 +++++++++++++---------- src/typeagent/knowpro/collections.py | 23 +++++++++++------- src/typeagent/storage/memory/propindex.py | 13 +++++----- 3 files changed, 38 insertions(+), 27 deletions(-) diff --git a/src/typeagent/knowpro/answers.py b/src/typeagent/knowpro/answers.py index 58a536ed..663d5804 100644 --- a/src/typeagent/knowpro/answers.py +++ b/src/typeagent/knowpro/answers.py @@ -452,19 +452,22 @@ async def get_scored_semantic_refs_from_ordinals_iter( semantic_ref_matches: list[ScoredSemanticRefOrdinal], knowledge_type: KnowledgeType, ) -> list[Scored[SemanticRef]]: - result = [] - for semantic_ref_match in semantic_ref_matches: - semantic_ref = await semantic_refs.get_item( - semantic_ref_match.semantic_ref_ordinal - ) - if semantic_ref.knowledge.knowledge_type == knowledge_type: - result.append( - Scored( - item=semantic_ref, - score=semantic_ref_match.score, - ) - ) - return result + if not semantic_ref_matches: + return [] + ordinals = [m.semantic_ref_ordinal for m in semantic_ref_matches] + metadata = await semantic_refs.get_metadata_multiple(ordinals) + matching = [ + (sr_match, m.ordinal) + for sr_match, m in zip(semantic_ref_matches, metadata) + if m.knowledge_type == knowledge_type + ] + if not matching: + return [] + full_refs = await semantic_refs.get_multiple([o for _, o in matching]) + return [ + Scored(item=ref, score=sr_match.score) + for (sr_match, _), ref in zip(matching, full_refs) + ] def merge_scored_concrete_entities( diff --git a/src/typeagent/knowpro/collections.py b/src/typeagent/knowpro/collections.py index a2716577..6ff7b557 100644 --- a/src/typeagent/knowpro/collections.py +++ b/src/typeagent/knowpro/collections.py @@ -331,13 +331,17 @@ async def group_matches_by_type( self, semantic_refs: ISemanticRefCollection, ) -> dict[KnowledgeType, "SemanticRefAccumulator"]: + matches = list(self) + if not matches: + return {} + ordinals = [match.value for match in matches] + metadata = await semantic_refs.get_metadata_multiple(ordinals) groups: dict[KnowledgeType, SemanticRefAccumulator] = {} - for match in self: - semantic_ref = await semantic_refs.get_item(match.value) - group = groups.get(semantic_ref.knowledge.knowledge_type) + for match, m in zip(matches, metadata): + group = groups.get(m.knowledge_type) if group is None: group = SemanticRefAccumulator(self.search_term_matches) - groups[semantic_ref.knowledge.knowledge_type] = group + groups[m.knowledge_type] = group group.set_match(match) return groups @@ -346,11 +350,14 @@ async def get_matches_in_scope( semantic_refs: ISemanticRefCollection, ranges_in_scope: "TextRangesInScope", ) -> "SemanticRefAccumulator": + matches = list(self) + if not matches: + return SemanticRefAccumulator(self.search_term_matches) + ordinals = [match.value for match in matches] + metadata = await semantic_refs.get_metadata_multiple(ordinals) accumulator = SemanticRefAccumulator(self.search_term_matches) - for match in self: - if ranges_in_scope.is_range_in_scope( - (await semantic_refs.get_item(match.value)).range - ): + for match, m in zip(matches, metadata): + if ranges_in_scope.is_range_in_scope(m.range): accumulator.set_match(match) return accumulator diff --git a/src/typeagent/storage/memory/propindex.py b/src/typeagent/storage/memory/propindex.py index f9717b24..ecb3e85d 100644 --- a/src/typeagent/storage/memory/propindex.py +++ b/src/typeagent/storage/memory/propindex.py @@ -330,12 +330,13 @@ async def lookup_property_in_property_index( property_value, ) if ranges_in_scope is not None and scored_refs: - filtered_refs = [] - for sr in scored_refs: - semantic_ref = await semantic_refs.get_item(sr.semantic_ref_ordinal) - if ranges_in_scope.is_range_in_scope(semantic_ref.range): - filtered_refs.append(sr) - scored_refs = filtered_refs + ordinals = [sr.semantic_ref_ordinal for sr in scored_refs] + metadata = await semantic_refs.get_metadata_multiple(ordinals) + scored_refs = [ + sr + for sr, m in zip(scored_refs, metadata) + if ranges_in_scope.is_range_in_scope(m.range) + ] return scored_refs or None # Return None if no results From 2281ee7355dc16708311345d4c28171d082a2ecb Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 04:54:11 -0500 Subject: [PATCH 6/7] Speed up scope-filtering: bisect in contains_range, inline tuple comparisons - Use bisect_right with key=start in TextRangeCollection.contains_range to skip O(n) linear scan (O(log n) for non-overlapping point ranges) - Replace TextLocation allocations in TextRange __eq__/__lt__/__contains__ with a shared _effective_end returning tuples - Skip pydantic validation in get_metadata_multiple by constructing TextLocation/TextRange directly from JSON --- src/typeagent/knowpro/collections.py | 16 +++++---- src/typeagent/knowpro/interfaces_core.py | 36 +++++++-------------- src/typeagent/storage/sqlite/collections.py | 33 +++++++++++++++---- 3 files changed, 48 insertions(+), 37 deletions(-) diff --git a/src/typeagent/knowpro/collections.py b/src/typeagent/knowpro/collections.py index 6ff7b557..d7c07b19 100644 --- a/src/typeagent/knowpro/collections.py +++ b/src/typeagent/knowpro/collections.py @@ -526,12 +526,16 @@ def add_ranges(self, text_ranges: "list[TextRange] | TextRangeCollection") -> No self.add_range(text_range) def contains_range(self, inner_range: TextRange) -> bool: - # Since ranges are sorted by start, once we pass inner_range's start - # no further range can contain it. - for outer_range in self._ranges: - if outer_range.start > inner_range.start: - break - if inner_range in outer_range: + if not self._ranges: + return False + # Bisect on start only to find all ranges with start <= inner.start, + # then scan backwards — the most likely containing range has the + # largest start still <= inner's. + hi = bisect.bisect_right( + self._ranges, inner_range.start, key=lambda r: r.start + ) + for i in range(hi - 1, -1, -1): + if inner_range in self._ranges[i]: return True return False diff --git a/src/typeagent/knowpro/interfaces_core.py b/src/typeagent/knowpro/interfaces_core.py index cd9e885c..4dc8fc8e 100644 --- a/src/typeagent/knowpro/interfaces_core.py +++ b/src/typeagent/knowpro/interfaces_core.py @@ -255,32 +255,24 @@ def __repr__(self) -> str: else: return f"{self.__class__.__name__}({self.start}, {self.end})" + @staticmethod + def _effective_end(tr: "TextRange") -> tuple[int, int]: + """Return (message_ordinal, chunk_ordinal) for the effective end.""" + if tr.end is not None: + return (tr.end.message_ordinal, tr.end.chunk_ordinal) + return (tr.start.message_ordinal, tr.start.chunk_ordinal + 1) + def __eq__(self, other: object) -> bool: if not isinstance(other, TextRange): return NotImplemented - if self.start != other.start: return False - - # Get the effective end for both ranges - self_end = self.end or TextLocation( - self.start.message_ordinal, self.start.chunk_ordinal + 1 - ) - other_end = other.end or TextLocation( - other.start.message_ordinal, other.start.chunk_ordinal + 1 - ) - return self_end == other_end + return TextRange._effective_end(self) == TextRange._effective_end(other) def __lt__(self, other: Self) -> bool: if self.start != other.start: return self.start < other.start - self_end = self.end or TextLocation( - self.start.message_ordinal, self.start.chunk_ordinal + 1 - ) - other_end = other.end or TextLocation( - other.start.message_ordinal, other.start.chunk_ordinal + 1 - ) - return self_end < other_end + return TextRange._effective_end(self) < TextRange._effective_end(other) def __gt__(self, other: Self) -> bool: return other.__lt__(self) @@ -292,13 +284,9 @@ def __le__(self, other: Self) -> bool: return not other.__lt__(self) def __contains__(self, other: Self) -> bool: - other_end = other.end or TextLocation( - other.start.message_ordinal, other.start.chunk_ordinal + 1 - ) - self_end = self.end or TextLocation( - self.start.message_ordinal, self.start.chunk_ordinal + 1 - ) - return self.start <= other.start and other_end <= self_end + if not (self.start <= other.start): + return False + return TextRange._effective_end(other) <= TextRange._effective_end(self) def serialize(self) -> TextRangeData: return self.__pydantic_serializer__.to_python( # type: ignore diff --git a/src/typeagent/storage/sqlite/collections.py b/src/typeagent/storage/sqlite/collections.py index 911fff94..fe394dcb 100644 --- a/src/typeagent/storage/sqlite/collections.py +++ b/src/typeagent/storage/sqlite/collections.py @@ -356,14 +356,33 @@ async def get_metadata_multiple( ) rows = cursor.fetchall() rowdict = {r[0]: r for r in rows} - return [ - interfaces.SemanticRefMetadata( - ordinal=rowdict[o][0], - range=interfaces.TextRange.deserialize(json.loads(rowdict[o][1])), - knowledge_type=rowdict[o][2], + result = [] + for o in ordinals: + row = rowdict[o] + range_data = json.loads(row[1]) + start = range_data["start"] + end_data = range_data.get("end") + result.append( + interfaces.SemanticRefMetadata( + ordinal=row[0], + range=interfaces.TextRange( + start=interfaces.TextLocation( + start["messageOrdinal"], + start.get("chunkOrdinal", 0), + ), + end=( + interfaces.TextLocation( + end_data["messageOrdinal"], + end_data.get("chunkOrdinal", 0), + ) + if end_data + else None + ), + ), + knowledge_type=row[2], + ) ) - for o in ordinals - ] + return result async def append(self, item: interfaces.SemanticRef) -> None: cursor = self.db.cursor() From 743d8dbc6f1de91960437d144954e57642eb56e7 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Fri, 10 Apr 2026 05:16:38 -0500 Subject: [PATCH 7/7] Add pytest-async-benchmark as dev dependency --- pyproject.toml | 4 ++++ uv.lock | 11 +++++++++++ 2 files changed, 15 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 1339e34e..3cb25d7a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,6 +58,9 @@ Documentation = "https://github.com/microsoft/typeagent-py/tree/main/docs/README [tool.uv.build-backend] module-root = "src" +[tool.uv.sources] +pytest-async-benchmark = { git = "https://github.com/KRRT7/pytest-async-benchmark.git", rev = "feat/pedantic-mode" } + [tool.pytest.ini_options] asyncio_default_fixture_loop_scope = "function" testpaths = ["tests"] @@ -91,6 +94,7 @@ dev = [ "opentelemetry-instrumentation-httpx>=0.57b0", "pyright>=1.1.408", # 407 has a regression "pytest>=8.3.5", + "pytest-async-benchmark", "pytest-asyncio>=0.26.0", "pytest-mock>=3.14.0", ] diff --git a/uv.lock b/uv.lock index 4eab7ead..e2b66a3d 100644 --- a/uv.lock +++ b/uv.lock @@ -1922,6 +1922,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" }, ] +[[package]] +name = "pytest-async-benchmark" +version = "0.2.0" +source = { git = "https://github.com/KRRT7/pytest-async-benchmark.git?rev=feat%2Fpedantic-mode#029d03634d140789baebc6c3c8f72d5c81a67f9a" } +dependencies = [ + { name = "pytest" }, + { name = "rich" }, +] + [[package]] name = "pytest-asyncio" version = "1.3.0" @@ -2398,6 +2407,7 @@ dev = [ { name = "opentelemetry-instrumentation-httpx" }, { name = "pyright" }, { name = "pytest" }, + { name = "pytest-async-benchmark" }, { name = "pytest-asyncio" }, { name = "pytest-mock" }, ] @@ -2436,6 +2446,7 @@ dev = [ { name = "opentelemetry-instrumentation-httpx", specifier = ">=0.57b0" }, { name = "pyright", specifier = ">=1.1.408" }, { name = "pytest", specifier = ">=8.3.5" }, + { name = "pytest-async-benchmark", git = "https://github.com/KRRT7/pytest-async-benchmark.git?rev=feat%2Fpedantic-mode" }, { name = "pytest-asyncio", specifier = ">=0.26.0" }, { name = "pytest-mock", specifier = ">=3.14.0" }, ]