Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<img src="src/front/static/global/img/ontobricks-icon.svg" alt="OntoBricks Logo" width="120" height="120">
</p>

<h1 align="center">OntoBricks 0.5.1</h1>
<h1 align="center">OntoBricks 0.5.2</h1>

<p align="center">
<strong>Digital Twin Builder for Databricks</strong>
Expand Down
24 changes: 24 additions & 0 deletions changelogs/v0.5.2/benoitcayladbx_2026-06-22.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# MCP Server: retry on 502/503 (Databricks Apps cold-start)

## Context
When the MCP server calls the main OntoBricks app during a Databricks Apps
cold-start or idle-sleep window, the platform proxy returns 502 before the
app is ready. Previously `_get` and `_post` raised immediately on any 4xx/5xx,
so a single cold-start would fail the entire MCP tool call.

## Changes

1. `src/mcp-server/server/app.py` — added `import asyncio` (top-level imports)
2. `src/mcp-server/server/app.py` — added module-level constants `_RETRY_STATUSES = {502, 503}`
and `_RETRY_DELAYS = (5, 10, 20)` (seconds between attempts)
3. `src/mcp-server/server/app.py` — `_get`: retry loop up to 3 times on 502/503,
sleeping 5 s → 10 s → 20 s; each retry logged at WARNING level; final attempt
still raises via `raise_for_status()`
4. `src/mcp-server/server/app.py` — `_post`: same retry logic as `_get`

## Modified files
- `src/mcp-server/server/app.py`

## Test results
2283 passed, 15 skipped (units only — e2e skipped, Python 3.9 union-type syntax
incompatibility in conftest.py unrelated to this change)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "ontobricks"
version = "0.5.1"
version = "0.5.2"
description = "Ontology Management Tool for Databricks"
requires-python = ">=3.10"
dependencies = [
Expand Down
22 changes: 11 additions & 11 deletions src/back/core/w3c/r2rml/R2RMLGenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ def _add_entity_mapping(
g.add((triples_map, RDFS.comment, Literal(comment)))

# Logical Table - using SQL query or table name
logical_table = BNode()
logical_table = BNode(f"lt_{map_name}")
g.add((triples_map, self.rr.logicalTable, logical_table))

if sql_query:
Expand All @@ -256,7 +256,7 @@ def _add_entity_mapping(
)

# Subject Map
subject_map = BNode()
subject_map = BNode(f"sm_{map_name}")
g.add((triples_map, self.rr.subjectMap, subject_map))

# Template for subject URI - ALWAYS use taxonomy base URI.
Expand Down Expand Up @@ -286,11 +286,11 @@ def _add_entity_mapping(

# Add label column mapping if specified
if label_column:
pom = BNode()
pom = BNode(f"pom_{map_name}_label")
g.add((triples_map, self.rr.predicateObjectMap, pom))
g.add((pom, self.rr.predicate, RDFS.label))

obj_map = BNode()
obj_map = BNode(f"om_{map_name}_label")
g.add((pom, self.rr.objectMap, obj_map))
g.add((obj_map, self.rr.column, Literal(label_column)))

Expand All @@ -299,11 +299,11 @@ def _add_entity_mapping(

# Add attribute mappings (DatatypeProperty mappings)
if attribute_mappings:
for attr_name, column_name in attribute_mappings.items():
for attr_name, column_name in sorted(attribute_mappings.items()):
if not column_name:
continue

pom = BNode()
pom = BNode(f"pom_{map_name}_{self._sanitize_name(attr_name)}")
g.add((triples_map, self.rr.predicateObjectMap, pom))

# Prefer the ontology property URI when it matches the
Expand All @@ -320,7 +320,7 @@ def _add_entity_mapping(
)
g.add((pom, self.rr.predicate, attr_uri))

obj_map = BNode()
obj_map = BNode(f"om_{map_name}_{self._sanitize_name(attr_name)}")
g.add((pom, self.rr.objectMap, obj_map))
g.add((obj_map, self.rr.column, Literal(column_name)))
g.add((obj_map, self.rr.datatype, XSD.string)) # Default to string
Expand Down Expand Up @@ -445,7 +445,7 @@ def _add_relationship_mapping(
)

# Logical Table - using SQL query if provided
logical_table = BNode()
logical_table = BNode(f"lt_{map_name}")
g.add((triples_map, self.rr.logicalTable, logical_table))

if sql_query:
Expand All @@ -468,7 +468,7 @@ def _add_relationship_mapping(
)

# Subject Map - references the source entity
subject_map = BNode()
subject_map = BNode(f"sm_{map_name}")
g.add((triples_map, self.rr.subjectMap, subject_map))

source_class_name = self._resolve_class_name(
Expand All @@ -489,7 +489,7 @@ def _add_relationship_mapping(
)

# Predicate Object Map
pom = BNode()
pom = BNode(f"pom_{map_name}")
g.add((triples_map, self.rr.predicateObjectMap, pom))

# Predicate Map - the relationship property.
Expand All @@ -511,7 +511,7 @@ def _add_relationship_mapping(
g.add((pom, self.rr.predicate, URIRef(f"{self.base_uri}{property_uri}")))

# Object Map - reference to target entity
obj_map = BNode()
obj_map = BNode(f"om_{map_name}")
g.add((pom, self.rr.objectMap, obj_map))

target_class_name = self._resolve_class_name(
Expand Down
7 changes: 7 additions & 0 deletions src/mcp-server/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,10 @@ build-backend = "hatchling.build"

[tool.hatch.build.targets.wheel]
packages = ["server"]

[dependency-groups]
dev = [
"anyio>=4.13.0",
"pytest>=9.1.0",
"pytest-asyncio>=1.4.0",
]
59 changes: 52 additions & 7 deletions src/mcp-server/server/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@

from __future__ import annotations

import asyncio
import json
import logging
import os
Expand Down Expand Up @@ -423,6 +424,10 @@ def _get_auth_headers(mode: str) -> dict:
return {}


_RETRY_STATUSES = {502, 503}
_RETRY_DELAYS = (5, 10, 20) # seconds between attempts; total max ~35 s


async def _get(
client: httpx.AsyncClient, path: str, params: dict | None = None
) -> dict:
Expand All @@ -433,17 +438,35 @@ async def _get(
empty payloads in the Apps log stream. On non-2xx responses we
log a body excerpt before re-raising so the caller (and the LLM)
sees an actionable error instead of a bare ``HTTPStatusError``.

Retries up to 3 times on 502/503 (Databricks Apps cold-start window)
with exponential back-off: 5 s → 10 s → 20 s.
"""
logger.info("GET %s%s params=%s", client.base_url, path, params or {})
resp = await client.get(path, params=params, timeout=120)
resp: httpx.Response | None = None
for attempt, delay in enumerate((*_RETRY_DELAYS, None), start=1):
resp = await client.get(path, params=params, timeout=120)
if resp.status_code not in _RETRY_STATUSES:
break
body_excerpt = resp.text[:300].replace("\n", " ") if resp.text else ""
if delay is None:
logger.warning(
"GET %s%s → %s (attempt %d/%d, giving up) body=%r",
client.base_url, path, resp.status_code,
attempt, len(_RETRY_DELAYS) + 1, body_excerpt,
)
else:
logger.warning(
"GET %s%s → %s (attempt %d/%d, retrying in %ds) body=%r",
client.base_url, path, resp.status_code,
attempt, len(_RETRY_DELAYS) + 1, delay, body_excerpt,
)
await asyncio.sleep(delay)
if resp.status_code >= 400:
body_excerpt = resp.text[:500].replace("\n", " ") if resp.text else ""
logger.warning(
"GET %s%s → %s body=%r",
client.base_url,
path,
resp.status_code,
body_excerpt,
client.base_url, path, resp.status_code, body_excerpt,
)
else:
logger.info("GET %s%s → %s", client.base_url, path, resp.status_code)
Expand All @@ -454,9 +477,31 @@ async def _get(
async def _post(
client: httpx.AsyncClient, path: str, json: dict | None = None
) -> dict:
"""POST *path* on *client* with optional JSON body and return the JSON response."""
"""POST *path* on *client* with optional JSON body and return the JSON response.

Retries up to 3 times on 502/503 (Databricks Apps cold-start window)
with exponential back-off: 5 s → 10 s → 20 s.
"""
logger.info("POST %s%s", client.base_url, path)
resp = await client.post(path, json=json or {}, timeout=120)
resp: httpx.Response | None = None
for attempt, delay in enumerate((*_RETRY_DELAYS, None), start=1):
resp = await client.post(path, json=json or {}, timeout=120)
if resp.status_code not in _RETRY_STATUSES:
break
body_excerpt = resp.text[:300].replace("\n", " ") if resp.text else ""
if delay is None:
logger.warning(
"POST %s%s → %s (attempt %d/%d, giving up) body=%r",
client.base_url, path, resp.status_code,
attempt, len(_RETRY_DELAYS) + 1, body_excerpt,
)
else:
logger.warning(
"POST %s%s → %s (attempt %d/%d, retrying in %ds) body=%r",
client.base_url, path, resp.status_code,
attempt, len(_RETRY_DELAYS) + 1, delay, body_excerpt,
)
await asyncio.sleep(delay)
if resp.status_code >= 400:
body_excerpt = resp.text[:500].replace("\n", " ") if resp.text else ""
logger.warning("POST %s%s → %s body=%r", client.base_url, path, resp.status_code, body_excerpt)
Expand Down
Loading