diff --git a/.gitignore b/.gitignore index 4241e28..0727150 100644 --- a/.gitignore +++ b/.gitignore @@ -28,3 +28,4 @@ CLAUDE.md DEVLOG*.md **/.ipynb_checkpoints/ +.claude/ diff --git a/coordinode-embedded/.gitignore b/coordinode-embedded/.gitignore index 2c96eb1..db4b861 100644 --- a/coordinode-embedded/.gitignore +++ b/coordinode-embedded/.gitignore @@ -1,2 +1,5 @@ target/ Cargo.lock +*.so +*.pyd +*.dylib diff --git a/coordinode-rs b/coordinode-rs index 21ae71c..e0694e5 160000 --- a/coordinode-rs +++ b/coordinode-rs @@ -1 +1 @@ -Subproject commit 21ae71c381a0f1cb2bd625410fc5585a9d6d080c +Subproject commit e0694e583a4ccb7e42fd29bfff89b51ce9964e72 diff --git a/coordinode/README.md b/coordinode/README.md index b2ad25f..575319f 100644 --- a/coordinode/README.md +++ b/coordinode/README.md @@ -105,7 +105,7 @@ db.cypher( params={"title": "RAG intro", "vec": [0.1] * 384}, ) -# Nearest-neighbour search (requires HNSW index — coming in v0.4) +# Nearest-neighbour search results = db.vector_search( label="Doc", property="embedding", @@ -117,6 +117,70 @@ for r in results: print(r.node.id, r.distance) ``` +## Hybrid Search (v0.4+) + +Fuse BM25 full-text and vector similarity using Cypher scoring functions: + +```python +# Full-text scoring (text_score / text_match) requires a TEXT INDEX on the +# queried property — without it those calls return zero/no matches. +db.create_text_index("idx_doc_body", "Doc", "body") + +# Reciprocal Rank Fusion of text + vector. Projecting `d AS doc_id` returns the +# internal node id (an integer) — fetch properties explicitly when needed. +rows = db.cypher(""" + MATCH (d:Doc) + WHERE text_match(d, $q) OR d.embedding IS NOT NULL + RETURN d AS doc_id, + d.title AS title, + rrf_score( + text_score(d, $q), + vec_score(d.embedding, $vec) + ) AS score + ORDER BY score DESC LIMIT 10 +""", params={"q": "graph neural network", "vec": [0.1] * 384}) +# Full node properties: db.get_node(rows[0]["doc_id"]). +``` + +Helpers available in Cypher (evaluated server-side in coordinode-rs ≥ v0.4.0): +``text_score``, ``vec_score``, ``doc_score``, ``text_match``, ``rrf_score``, +``hybrid_score``. These are built-in Cypher functions; nothing to import on the +Python side. + +## ATTACH / DETACH DOCUMENT (v0.4+) + +Promote a nested property to a graph node (and back): + +```python +db.cypher("MATCH (a:Article {id: $id}) DETACH DOCUMENT a.body AS (d:Body)", + params={"id": 1}) +db.cypher("MATCH (a:Article {id: $id})-[:HAS_BODY]->(d:Body) " + "ATTACH DOCUMENT d INTO a.body", params={"id": 1}) +``` + +## Consistency Controls + +```python +# Majority read for strict freshness. `n AS node_id` returns the integer id; +# use get_node(id) or project explicit properties (e.g. n.email AS email). +db.cypher( + "MATCH (n:Account) RETURN n AS node_id, n.email AS email", + read_concern="majority", +) + +# Majority write (required for causal reads) +db.cypher("CREATE (n:Event {t: timestamp()})", write_concern="majority") + +# Causal read: see at least state at raft index 42 +db.cypher("MATCH (n) RETURN count(n) AS total", after_index=42) +``` + +Accepted values: + +- ``read_concern``: ``local`` (default) · ``majority`` · ``linearizable`` · ``snapshot`` +- ``write_concern``: ``w0`` · ``w1`` (default) · ``majority`` +- ``read_preference``: ``primary`` (default) · ``primary_preferred`` · ``secondary`` · ``secondary_preferred`` · ``nearest`` + ## Related Packages | Package | Description | diff --git a/coordinode/coordinode/__init__.py b/coordinode/coordinode/__init__.py index 423f831..c728f12 100644 --- a/coordinode/coordinode/__init__.py +++ b/coordinode/coordinode/__init__.py @@ -23,7 +23,6 @@ CoordinodeClient, EdgeResult, EdgeTypeInfo, - HybridResult, LabelInfo, NodeResult, PropertyDefinitionInfo, @@ -44,7 +43,6 @@ "EdgeResult", "VectorResult", "TextResult", - "HybridResult", "LabelInfo", "EdgeTypeInfo", "PropertyDefinitionInfo", diff --git a/coordinode/coordinode/client.py b/coordinode/coordinode/client.py index 1587556..f65d598 100644 --- a/coordinode/coordinode/client.py +++ b/coordinode/coordinode/client.py @@ -113,23 +113,6 @@ def __repr__(self) -> str: return f"TextResult(node_id={self.node_id}, score={self.score:.4f}, snippet={self.snippet!r})" -class HybridResult: - """A single result from hybrid text + vector search (RRF-ranked).""" - - def __init__(self, proto_result: Any) -> None: - self.node_id: int = proto_result.node_id - # Combined RRF score: text_weight/(60+rank_text) + vector_weight/(60+rank_vec). - self.score: float = proto_result.score - # NOTE: proto HybridResult carries only node_id + score (no embedded Node - # message). A full node is not included by design — the server returns IDs - # for efficiency. Callers that need node properties should use the client - # API: `client.get_node(self.node_id)`, or match on an application-level - # property in Cypher (e.g. WHERE n.id = ). - - def __repr__(self) -> str: - return f"HybridResult(node_id={self.node_id}, score={self.score:.6f})" - - class PropertyDefinitionInfo: """A property definition from the schema (name, type, required, unique).""" @@ -270,16 +253,54 @@ async def cypher( self, query: str, params: dict[str, PyValue] | None = None, + *, + read_concern: str | None = None, + write_concern: str | None = None, + read_preference: str | None = None, + after_index: int | None = None, ) -> list[dict[str, Any]]: - """Execute an OpenCypher query. Returns rows as list of dicts.""" + """Execute an OpenCypher query. Returns rows as list of dicts. + + Consistency parameters (all optional; server defaults apply when omitted): + + - ``read_concern``: ``"local"`` (default), ``"majority"``, ``"linearizable"``, ``"snapshot"``. + - ``write_concern``: ``"w0"``, ``"w1"`` (default, leader-ack), ``"majority"``. Required + ``"majority"`` when using causal reads (``after_index`` > 0). + - ``read_preference``: ``"primary"`` (default), ``"primary_preferred"``, ``"secondary"``, + ``"secondary_preferred"``, ``"nearest"``. + - ``after_index``: raft log index for causal reads — returned rows reflect at least + the state at this index. + """ from coordinode._proto.coordinode.v1.query.cypher_pb2 import ( # type: ignore[import] ExecuteCypherRequest, ) + # Validate after_index type/range BEFORE any numeric comparison so that + # True (bool is a subclass of int) and "7" (str) produce a clear + # "must be a non-negative integer" error instead of a misleading + # causal-read violation or a raw TypeError. + if after_index is not None and ( + not isinstance(after_index, int) or isinstance(after_index, bool) or after_index < 0 + ): + raise ValueError(f"after_index must be a non-negative integer, got {after_index!r}") + # Causal reads (after_index > 0) are only satisfiable when writes were + # acknowledged by a majority; otherwise the referenced index may never + # replicate and the read would hang. Mirror the server's rejection. + if after_index is not None and after_index > 0 and (write_concern or "").strip().lower() != "majority": + raise ValueError( + "after_index > 0 requires write_concern='majority' — causal reads " + "depend on majority-committed writes. Pass write_concern='majority'." + ) req = ExecuteCypherRequest( query=query, parameters=dict_to_props(params or {}), ) + if read_concern is not None or after_index is not None: + req.read_concern.CopyFrom(_make_read_concern(read_concern, after_index)) + if write_concern is not None: + req.write_concern.CopyFrom(_make_write_concern(write_concern)) + if read_preference is not None: + req.read_preference = _make_read_preference(read_preference) resp = await self._cypher_stub.ExecuteCypher(req, timeout=self._timeout) columns = list(resp.columns) return [{col: from_property_value(val) for col, val in zip(columns, row.values)} for row in resp.rows] @@ -776,64 +797,6 @@ async def text_search( resp = await self._text_stub.TextSearch(req, timeout=self._timeout) return [TextResult(r) for r in resp.results] - async def hybrid_text_vector_search( - self, - label: str, - text_query: str, - vector: Sequence[float], - *, - limit: int = 10, - text_weight: float = 0.5, - vector_weight: float = 0.5, - vector_property: str = "embedding", - ) -> list[HybridResult]: - """Fuse BM25 text search and cosine vector search using Reciprocal Rank Fusion (RRF). - - Runs text and vector searches independently, then combines their ranked - lists:: - - rrf_score(node) = text_weight / (60 + rank_text) - + vector_weight / (60 + rank_vec) - - Args: - label: Node label to search (e.g. ``"Article"``). - text_query: Full-text query string (same syntax as :meth:`text_search`). - vector: Query embedding vector. Must match the dimensionality stored - in *vector_property*. - limit: Maximum fused results to return (default 10). The server may - apply its own upper bound; pass a reasonable value (e.g. ≤ 1000). - text_weight: Weight for the BM25 component (default 0.5). - vector_weight: Weight for the cosine component (default 0.5). - vector_property: Node property containing the embedding (default - ``"embedding"``). - - Returns: - List of :class:`HybridResult` ordered by RRF score descending. - - Note: - A full-text index covering *label* **must exist** before calling this - method — create one with :meth:`create_text_index` or a - ``CREATE TEXT INDEX`` Cypher statement. Calling this method on a - label without a text index returns an empty list. - """ - if not isinstance(limit, int) or isinstance(limit, bool) or limit < 1: - raise ValueError(f"limit must be an integer >= 1, got {limit!r}.") - from coordinode._proto.coordinode.v1.query.text_pb2 import ( # type: ignore[import] - HybridTextVectorSearchRequest, - ) - - req = HybridTextVectorSearchRequest( - label=label, - text_query=text_query, - vector=[float(v) for v in vector], - limit=limit, - text_weight=text_weight, - vector_weight=vector_weight, - vector_property=vector_property, - ) - resp = await self._text_stub.HybridTextVectorSearch(req, timeout=self._timeout) - return [HybridResult(r) for r in resp.results] - async def health(self) -> bool: from coordinode._proto.coordinode.v1.health.health_pb2 import ( # type: ignore[import] HealthCheckRequest, @@ -907,9 +870,23 @@ def cypher( self, query: str, params: dict[str, PyValue] | None = None, + *, + read_concern: str | None = None, + write_concern: str | None = None, + read_preference: str | None = None, + after_index: int | None = None, ) -> list[dict[str, Any]]: - """Execute an OpenCypher query. Returns rows as list of dicts.""" - return self._run(self._async.cypher(query, params)) + """Execute an OpenCypher query. See :meth:`AsyncCoordinodeClient.cypher` for consistency args.""" + return self._run( + self._async.cypher( + query, + params, + read_concern=read_concern, + write_concern=write_concern, + read_preference=read_preference, + after_index=after_index, + ) + ) def vector_search( self, @@ -1016,34 +993,67 @@ def text_search( """Run a full-text BM25 search over all indexed text properties for *label*.""" return self._run(self._async.text_search(label, query, limit=limit, fuzzy=fuzzy, language=language)) - def hybrid_text_vector_search( - self, - label: str, - text_query: str, - vector: Sequence[float], - *, - limit: int = 10, - text_weight: float = 0.5, - vector_weight: float = 0.5, - vector_property: str = "embedding", - ) -> list[HybridResult]: - """Fuse BM25 text search and cosine vector search using RRF ranking.""" - return self._run( - self._async.hybrid_text_vector_search( - label, - text_query, - vector, - limit=limit, - text_weight=text_weight, - vector_weight=vector_weight, - vector_property=vector_property, - ) - ) - def health(self) -> bool: return self._run(self._async.health()) +# ── Consistency helpers ────────────────────────────────────────────────────── + + +_READ_CONCERN_MAP = { + "local": "READ_CONCERN_LEVEL_LOCAL", + "majority": "READ_CONCERN_LEVEL_MAJORITY", + "linearizable": "READ_CONCERN_LEVEL_LINEARIZABLE", + "snapshot": "READ_CONCERN_LEVEL_SNAPSHOT", +} +_WRITE_CONCERN_MAP = { + "w0": "WRITE_CONCERN_LEVEL_W0", + "w1": "WRITE_CONCERN_LEVEL_W1", + "majority": "WRITE_CONCERN_LEVEL_MAJORITY", +} +_READ_PREFERENCE_MAP = { + "primary": "READ_PREFERENCE_PRIMARY", + "primary_preferred": "READ_PREFERENCE_PRIMARY_PREFERRED", + "secondary": "READ_PREFERENCE_SECONDARY", + "secondary_preferred": "READ_PREFERENCE_SECONDARY_PREFERRED", + "nearest": "READ_PREFERENCE_NEAREST", +} + + +def _normalize_consistency_key(value: Any, field: str, mapping: dict[str, str]) -> str: + if not isinstance(value, str) or not value.strip(): + raise ValueError(f"{field} must be a non-empty string; got {value!r}") + enum_name = mapping.get(value.strip().lower()) + if enum_name is None: + raise ValueError(f"invalid {field} {value!r}; expected one of {sorted(mapping)}") + return enum_name + + +def _make_read_concern(level: str | None, after_index: int | None) -> Any: + from coordinode._proto.coordinode.v1.replication import consistency_pb2 as pb # type: ignore[import] + + kwargs: dict[str, Any] = {} + if level is not None: + kwargs["level"] = getattr(pb, _normalize_consistency_key(level, "read_concern", _READ_CONCERN_MAP)) + if after_index is not None: + if not isinstance(after_index, int) or isinstance(after_index, bool) or after_index < 0: + raise ValueError(f"after_index must be a non-negative integer, got {after_index!r}") + kwargs["after_index"] = after_index + return pb.ReadConcern(**kwargs) + + +def _make_write_concern(level: str) -> Any: + from coordinode._proto.coordinode.v1.replication import consistency_pb2 as pb # type: ignore[import] + + return pb.WriteConcern(level=getattr(pb, _normalize_consistency_key(level, "write_concern", _WRITE_CONCERN_MAP))) + + +def _make_read_preference(pref: str) -> Any: + from coordinode._proto.coordinode.v1.replication import consistency_pb2 as pb # type: ignore[import] + + return getattr(pb, _normalize_consistency_key(pref, "read_preference", _READ_PREFERENCE_MAP)) + + # ── Stub factories (deferred import) ───────────────────────────────────────── diff --git a/demo/README.md b/demo/README.md index 974cfd7..c6992b9 100644 --- a/demo/README.md +++ b/demo/README.md @@ -13,8 +13,8 @@ Interactive notebooks for LlamaIndex, LangChain, and LangGraph integrations. > **Note:** First run installs `coordinode-embedded` from source (Rust build, ~5 min). > Subsequent runs use Colab's pip cache. -> The embedded Colab install is pinned to a specific commit that bundles coordinode-rs v0.3.17; the Colab notebook links above target `main`. -> The Docker Compose stack below uses the CoordiNode **server** image v0.3.17. +> The embedded Colab install is pinned to a specific commit that bundles coordinode-rs v0.4.1; the Colab notebook links above target `main`. +> The Docker Compose stack below uses the CoordiNode **server** image v0.4.1. ## Run locally (Docker Compose) diff --git a/demo/docker-compose.yml b/demo/docker-compose.yml index 48ba3f8..39b924a 100644 --- a/demo/docker-compose.yml +++ b/demo/docker-compose.yml @@ -1,7 +1,7 @@ services: coordinode: # Keep version in sync with root docker-compose.yml - image: ghcr.io/structured-world/coordinode:0.3.17 + image: ghcr.io/structured-world/coordinode:0.4.1 container_name: demo-coordinode ports: - "127.0.0.1:37080:7080" # gRPC (native API) — localhost-only diff --git a/demo/notebooks/00_seed_data.ipynb b/demo/notebooks/00_seed_data.ipynb index 6961f92..23e306c 100644 --- a/demo/notebooks/00_seed_data.ipynb +++ b/demo/notebooks/00_seed_data.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "a1b2c3d4-0000-0000-0000-000000000001", + "id": "0", "metadata": {}, "source": [ "# Seed Demo Data\n", @@ -11,26 +11,28 @@ "\n", "Populates CoordiNode with a **tech industry knowledge graph**.\n", "\n", - "> **Note:** When using `coordinode-embedded` (`LocalClient(\":memory:\")`), the seeded data\n", - "> lives only inside this notebook process — notebooks 01–03 will start with an empty graph.\n", - "> To share the graph across notebooks, point all of them at the same running CoordiNode\n", - "> server via `COORDINODE_ADDR`.\n", + "> **Note:** Embedded mode writes to `COORDINODE_EMBEDDED_PATH` (default\n", + "> `/content/coordinode-demo.db` in Colab, the OS temp dir locally), so\n", + "> the seeded graph persists across cell reruns and is visible to sibling demo\n", + "> notebooks within the same runtime. Delete the file or set a different path to\n", + "> reset. Connecting to a real CoordiNode server via `COORDINODE_ADDR` is also\n", + "> supported.\n", "\n", "**Graph contents:**\n", "- 10 people (engineers, researchers, founders)\n", "- 6 companies\n", "- 8 technologies / research areas\n", - "- ~35 relationships (WORKS_AT, FOUNDED, KNOWS, RESEARCHES, INVENTED, ACQUIRED, USES, …)\n", + "- ~35 relationships (WORKS_AT, FOUNDED, KNOWS, RESEARCHES, INVENTED, ACQUIRED, USES, \u2026)\n", "\n", "All nodes carry a `demo=true` property and a `demo_tag` equal to the `DEMO_TAG` variable\n", "set in the seed cell. MERGE operations and cleanup are scoped to that tag, so only nodes\n", "with the matching `demo_tag` are written or removed.\n", "\n", "**Environments:**\n", - "- **Google Colab** — uses `coordinode-embedded` (in-process Rust engine, no server needed). First run compiles from source (~5 min); subsequent runs use the pip cache.\n", - "- **Local / Docker Compose** — connects to a running CoordiNode server via gRPC.\n", + "- **Google Colab** \u2014 uses `coordinode-embedded` (in-process Rust engine, no server needed). First run compiles from source (~5 min); subsequent runs use the pip cache.\n", + "- **Local / Docker Compose** \u2014 connects to a running CoordiNode server via gRPC.\n", "\n", - "> **⚠️ Note for real-server use:** All writes and the cleanup step are scoped to `demo_tag`.\n", + "> **\u26a0\ufe0f Note for real-server use:** All writes and the cleanup step are scoped to `demo_tag`.\n", "> Collisions can occur if multiple runs reuse the same `demo_tag` value or if `demo_tag` is\n", "> empty. Run against a fresh/empty database or choose a unique `demo_tag` to avoid affecting\n", "> unrelated nodes." @@ -38,7 +40,7 @@ }, { "cell_type": "markdown", - "id": "a1b2c3d4-0000-0000-0000-000000000002", + "id": "1", "metadata": {}, "source": [ "## Install dependencies" @@ -47,7 +49,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a1b2c3d4-0000-0000-0000-000000000003", + "id": "2", "metadata": {}, "outputs": [], "source": [ @@ -56,12 +58,12 @@ "IN_COLAB = \"google.colab\" in sys.modules\n", "\n", "# Install coordinode-embedded only when running in Colab AND no gRPC server is configured.\n", - "# If COORDINODE_ADDR is set, a live server is already available — skip the 5-min Rust build.\n", + "# If COORDINODE_ADDR is set, a live server is already available \u2014 skip the 5-min Rust build.\n", "if IN_COLAB and not os.environ.get(\"COORDINODE_ADDR\"):\n", " # Install Rust toolchain via rustup (https://rustup.rs).\n", - " # Colab's apt packages ship rustc ≤1.75, which cannot build coordinode-embedded\n", - " # (requires Rust ≥1.80 for maturin/pyo3). apt-get is not a viable alternative here.\n", - " # Download the installer to a temp file and execute it explicitly — this avoids\n", + " # Colab's apt packages ship rustc \u22641.75, which cannot build coordinode-embedded\n", + " # (requires Rust \u22651.80 for maturin/pyo3). apt-get is not a viable alternative here.\n", + " # Download the installer to a temp file and execute it explicitly \u2014 this avoids\n", " # piping remote content directly into a shell while maintaining HTTPS/TLS security\n", " # through Python's default ssl context (cert-verified, TLS 1.2+).\n", " # SHA256 pinning of rustup-init is intentionally omitted: rustup.rs does not\n", @@ -74,8 +76,13 @@ " # never runs when a live gRPC server is available, so there is no risk of\n", " # unintentional execution in local or server environments.\n", " # Security note: downloading rustup-init via HTTPS with cert verification and\n", - " # executing from a temp file (not piped to shell) is by design — this is the\n", + " # executing from a temp file (not piped to shell) is by design \u2014 this is the\n", " # rustup project's own recommended install method for automated environments.\n", + " # protoc is required by coordinode-raft build (prost-build). Skip if already present (faster reruns),\n", + " # otherwise refresh apt indexes first \u2014 Colab caches can go stale on long-lived runtimes.\n", + " if subprocess.run([\"which\", \"protoc\"], capture_output=True).returncode != 0:\n", + " subprocess.run([\"apt-get\", \"update\", \"-y\", \"-q\"], check=True, timeout=120)\n", + " subprocess.run([\"apt-get\", \"install\", \"-y\", \"-q\", \"protobuf-compiler\"], check=True, timeout=120)\n", " import ssl as _ssl, tempfile as _tmp, urllib.request as _ur\n", "\n", " _ctx = _ssl.create_default_context()\n", @@ -98,12 +105,17 @@ " \"pip\",\n", " \"install\",\n", " \"-q\",\n", - " \"git+https://github.com/structured-world/coordinode-python.git@8da94d694ecaabee6f8380147d02f08220061bfa#subdirectory=coordinode-embedded\",\n", + " \"git+https://github.com/structured-world/coordinode-python.git@50ddc08a89a21fca73be007cb22b57a0054225c3#subdirectory=coordinode-embedded\",\n", " ],\n", " check=True,\n", " timeout=600,\n", " )\n", "\n", + "_coordinode_spec = (\n", + " \"git+https://github.com/structured-world/coordinode-python.git@50ddc08a89a21fca73be007cb22b57a0054225c3#subdirectory=coordinode\"\n", + " if IN_COLAB\n", + " else \"coordinode\"\n", + ")\n", "subprocess.run(\n", " [\n", " sys.executable,\n", @@ -111,7 +123,7 @@ " \"pip\",\n", " \"install\",\n", " \"-q\",\n", - " \"coordinode\",\n", + " _coordinode_spec,\n", " \"nest_asyncio\",\n", " ],\n", " check=True,\n", @@ -127,27 +139,34 @@ }, { "cell_type": "markdown", - "id": "a1b2c3d4-0000-0000-0000-000000000004", + "id": "3", "metadata": {}, - "source": "## Connect to CoordiNode\n\n- **Colab**: uses `LocalClient(\":memory:\")` — in-process embedded engine, no server required.\n- **Local with server**: connects to an existing CoordiNode on port 7080 (set `COORDINODE_ADDR` to override).\n- **Local without server**: falls back to `coordinode-embedded` if already installed (see [coordinode-embedded](https://github.com/structured-world/coordinode-python/tree/main/coordinode-embedded)); otherwise shows a `RuntimeError` with install instructions." + "source": [ + "## Connect to CoordiNode\n", + "\n", + "- **Colab**: uses `LocalClient(COORDINODE_EMBEDDED_PATH)` \u2014 in-process embedded engine backed by a file under `/content/`, no server required.\n", + "- **Local with server**: set `COORDINODE_ADDR=host:port` to point at a running CoordiNode (no auto-probe \u2014 explicit only).\n", + "- **Local without server**: uses `coordinode-embedded` (file-backed at `COORDINODE_EMBEDDED_PATH`, defaulting to the OS temp dir). Raises `RuntimeError` with install instructions if the package is missing." + ] }, { "cell_type": "code", "execution_count": null, - "id": "a1b2c3d4-0000-0000-0000-000000000005", + "id": "4", "metadata": {}, "outputs": [], "source": [ - "import os, socket\n", - "\n", - "\n", - "def _port_open(port):\n", - " try:\n", - " with socket.create_connection((\"127.0.0.1\", port), timeout=1):\n", - " return True\n", - " except OSError:\n", - " return False\n", - "\n", + "import os, tempfile\n", + "\n", + "# Persistent embedded DB path. Colab has /content which persists across cell\n", + "# reruns within a runtime session; locally fall back to the OS temp dir\n", + "# (portable across Linux/macOS/Windows). Override via COORDINODE_EMBEDDED_PATH.\n", + "COORDINODE_EMBEDDED_PATH = os.environ.get(\n", + " \"COORDINODE_EMBEDDED_PATH\",\n", + " \"/content/coordinode-demo.db\"\n", + " if os.path.isdir(\"/content\")\n", + " else os.path.join(tempfile.gettempdir(), \"coordinode-demo.db\"),\n", + ")\n", "\n", "if os.environ.get(\"COORDINODE_ADDR\"):\n", " COORDINODE_ADDR = os.environ[\"COORDINODE_ADDR\"]\n", @@ -155,59 +174,59 @@ "\n", " client = CoordinodeClient(COORDINODE_ADDR)\n", " if not client.health():\n", + " client.close()\n", " raise RuntimeError(f\"Health check failed for {COORDINODE_ADDR}\")\n", " print(f\"Connected to {COORDINODE_ADDR}\")\n", "else:\n", + " # No explicit server \u2014 use the embedded in-process engine backed by a file\n", + " # so the graph persists across cell reruns and between sibling demo\n", + " # notebooks within the same runtime.\n", " try:\n", - " grpc_port = int(os.environ.get(\"COORDINODE_PORT\", \"7080\"))\n", - " except ValueError as exc:\n", - " raise RuntimeError(\"COORDINODE_PORT must be an integer\") from exc\n", - "\n", - " if _port_open(grpc_port):\n", - " COORDINODE_ADDR = f\"localhost:{grpc_port}\"\n", - " from coordinode import CoordinodeClient\n", - "\n", - " client = CoordinodeClient(COORDINODE_ADDR)\n", - " if not client.health():\n", - " raise RuntimeError(f\"Health check failed for {COORDINODE_ADDR}\")\n", - " print(f\"Connected to {COORDINODE_ADDR}\")\n", - " else:\n", - " # No server available — use the embedded in-process engine.\n", - " try:\n", - " from coordinode_embedded import LocalClient\n", - " except ImportError as exc:\n", - " raise RuntimeError(\n", - " \"coordinode-embedded is not installed. \"\n", - " \"Run: pip install git+https://github.com/structured-world/coordinode-python.git@8da94d694ecaabee6f8380147d02f08220061bfa#subdirectory=coordinode-embedded\"\n", - " \" — or start a CoordiNode server and set COORDINODE_ADDR.\"\n", - " ) from exc\n", - "\n", - " client = LocalClient(\":memory:\")\n", - " print(\"Using embedded LocalClient (in-process)\")" + " from coordinode_embedded import LocalClient\n", + " except ImportError as exc:\n", + " raise RuntimeError(\n", + " \"coordinode-embedded is not installed. \"\n", + " \"In Colab, rerun the install cell above. \"\n", + " \"Locally, install from source: \"\n", + " \"pip install 'git+https://github.com/structured-world/coordinode-python.git#subdirectory=coordinode-embedded' \"\n", + " \"(requires Rust toolchain, ~5 min build). \"\n", + " \"Alternatively start a CoordiNode server and set COORDINODE_ADDR.\"\n", + " ) from exc\n", + "\n", + " client = LocalClient(COORDINODE_EMBEDDED_PATH)\n", + " print(f\"Using embedded LocalClient at {COORDINODE_EMBEDDED_PATH}\")\n" ] }, { "cell_type": "markdown", - "id": "a1b2c3d4-0000-0000-0000-000000000006", + "id": "5", "metadata": {}, "source": [ - "## Step 1 — Clear previous demo data" + "## Step 1 \u2014 Clear previous demo data" ] }, { "cell_type": "code", "execution_count": null, - "id": "a1b2c3d4-0000-0000-0000-000000000007", + "id": "6", "metadata": {}, "outputs": [], "source": [ "import uuid\n", "\n", - "DEMO_TAG = os.environ.get(\"COORDINODE_DEMO_TAG\") or f\"seed_data_{uuid.uuid4().hex[:8]}\"\n", + "# In server mode (COORDINODE_ADDR) parallel runs may share a DB, so a unique\n", + "# UUID tag prevents collisions. In embedded mode each LocalClient has its own\n", + "# file-backed DB, so a stable tag lets reseeds cleanly replace prior data\n", + "# (the DETACH DELETE below is scoped to this tag).\n", + "DEMO_TAG = os.environ.get(\"COORDINODE_DEMO_TAG\") or (\n", + " f\"seed_data_{uuid.uuid4().hex[:8]}\"\n", + " if os.environ.get(\"COORDINODE_ADDR\")\n", + " else \"seed_data\"\n", + ")\n", "print(\"Using DEMO_TAG:\", DEMO_TAG)\n", "# Remove prior demo nodes and any attached relationships in one step to avoid\n", "# duplicate relationship matches during cleanup (undirected MATCH -[r]-() returns\n", - "# each edge twice — once per endpoint — causing duplicate-delete errors).\n", + "# each edge twice \u2014 once per endpoint \u2014 causing duplicate-delete errors).\n", "client.cypher(\n", " \"MATCH (n {demo: true, demo_tag: $tag}) DETACH DELETE n\",\n", " params={\"tag\": DEMO_TAG},\n", @@ -217,26 +236,26 @@ }, { "cell_type": "markdown", - "id": "a1b2c3d4-0000-0000-0000-000000000008", + "id": "7", "metadata": {}, "source": [ - "## Step 2 — Create nodes" + "## Step 2 \u2014 Create nodes" ] }, { "cell_type": "code", "execution_count": null, - "id": "a1b2c3d4-0000-0000-0000-000000000009", + "id": "8", "metadata": {}, "outputs": [], "source": [ - "# ── People ────────────────────────────────────────────────────────────────\n", + "# \u2500\u2500 People \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n", "people = [\n", " {\"name\": \"Alice Chen\", \"role\": \"ML Researcher\", \"org\": \"DeepMind\", \"field\": \"Reinforcement Learning\"},\n", " {\"name\": \"Bob Torres\", \"role\": \"Staff Engineer\", \"org\": \"Google\", \"field\": \"Distributed Systems\"},\n", " {\"name\": \"Carol Smith\", \"role\": \"Founder & CEO\", \"org\": \"Synthex\", \"field\": \"NLP\"},\n", " {\"name\": \"David Park\", \"role\": \"Research Scientist\", \"org\": \"OpenAI\", \"field\": \"LLMs\"},\n", - " {\"name\": \"Eva Müller\", \"role\": \"Systems Architect\", \"org\": \"Synthex\", \"field\": \"Graph Databases\"},\n", + " {\"name\": \"Eva M\u00fcller\", \"role\": \"Systems Architect\", \"org\": \"Synthex\", \"field\": \"Graph Databases\"},\n", " {\"name\": \"Frank Liu\", \"role\": \"Principal Engineer\", \"org\": \"Meta\", \"field\": \"Graph ML\"},\n", " {\"name\": \"Grace Okafor\", \"role\": \"PhD Researcher\", \"org\": \"MIT\", \"field\": \"Knowledge Graphs\"},\n", " {\"name\": \"Henry Rossi\", \"role\": \"CTO\", \"org\": \"Synthex\", \"field\": \"Databases\"},\n", @@ -253,7 +272,7 @@ "\n", "print(f\"Created {len(people)} people\")\n", "\n", - "# ── Companies ─────────────────────────────────────────────────────────────\n", + "# \u2500\u2500 Companies \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n", "companies = [\n", " {\"name\": \"Google\", \"industry\": \"Technology\", \"founded\": 1998, \"hq\": \"Mountain View\"},\n", " {\"name\": \"Meta\", \"industry\": \"Technology\", \"founded\": 2004, \"hq\": \"Menlo Park\"},\n", @@ -271,7 +290,7 @@ "\n", "print(f\"Created {len(companies)} companies\")\n", "\n", - "# ── Technologies ──────────────────────────────────────────────────────────\n", + "# \u2500\u2500 Technologies \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n", "technologies = [\n", " {\"name\": \"Transformer\", \"type\": \"Architecture\", \"year\": 2017},\n", " {\"name\": \"Graph Neural Network\", \"type\": \"Algorithm\", \"year\": 2009},\n", @@ -294,16 +313,16 @@ }, { "cell_type": "markdown", - "id": "a1b2c3d4-0000-0000-0000-000000000010", + "id": "9", "metadata": {}, "source": [ - "## Step 3 — Create relationships" + "## Step 3 \u2014 Create relationships" ] }, { "cell_type": "code", "execution_count": null, - "id": "a1b2c3d4-0000-0000-0000-000000000011", + "id": "10", "metadata": {}, "outputs": [], "source": [ @@ -313,7 +332,7 @@ " (\"Bob Torres\", \"WORKS_AT\", \"Google\", {}),\n", " (\"Carol Smith\", \"WORKS_AT\", \"Synthex\", {\"since\": 2021}),\n", " (\"David Park\", \"WORKS_AT\", \"OpenAI\", {}),\n", - " (\"Eva Müller\", \"WORKS_AT\", \"Synthex\", {\"since\": 2022}),\n", + " (\"Eva M\u00fcller\", \"WORKS_AT\", \"Synthex\", {\"since\": 2022}),\n", " (\"Frank Liu\", \"WORKS_AT\", \"Meta\", {}),\n", " (\"Grace Okafor\", \"WORKS_AT\", \"MIT\", {}),\n", " (\"Henry Rossi\", \"WORKS_AT\", \"Synthex\", {\"since\": 2021}),\n", @@ -328,7 +347,7 @@ " (\"Carol Smith\", \"KNOWS\", \"Bob Torres\", {}),\n", " (\"Grace Okafor\", \"KNOWS\", \"Alice Chen\", {}),\n", " (\"Frank Liu\", \"KNOWS\", \"James Wright\", {}),\n", - " (\"Eva Müller\", \"KNOWS\", \"Grace Okafor\", {}),\n", + " (\"Eva M\u00fcller\", \"KNOWS\", \"Grace Okafor\", {}),\n", " # RESEARCHES / WORKS_ON\n", " (\"Alice Chen\", \"RESEARCHES\", \"Reinforcement Learning\", {\"since\": 2019}),\n", " (\"David Park\", \"RESEARCHES\", \"LLM\", {\"since\": 2020}),\n", @@ -383,16 +402,16 @@ }, { "cell_type": "markdown", - "id": "a1b2c3d4-0000-0000-0000-000000000012", + "id": "11", "metadata": {}, "source": [ - "## Step 4 — Verify" + "## Step 4 \u2014 Verify" ] }, { "cell_type": "code", "execution_count": null, - "id": "a1b2c3d4-0000-0000-0000-000000000013", + "id": "12", "metadata": {}, "outputs": [], "source": [ @@ -420,7 +439,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a1b2c3d4-0000-0000-0000-000000000014", + "id": "13", "metadata": {}, "outputs": [], "source": [ @@ -431,7 +450,7 @@ " params={\"co\": \"Synthex\", \"tag\": DEMO_TAG},\n", ")\n", "for r in rows:\n", - " print(f\" {r['name']} — {r['role']}\")\n", + " print(f\" {r['name']} \u2014 {r['role']}\")\n", "\n", "print(\"\\n=== What does Synthex use? ===\")\n", "rows = client.cypher(\n", @@ -447,10 +466,12 @@ " params={\"tech\": \"GraphRAG\", \"tag\": DEMO_TAG},\n", ")\n", "for r in rows:\n", - " print(f\" → {r['dependency']}\")\n", + " print(f\" \u2192 {r['dependency']}\")\n", "\n", - "print(\"\\n✓ Demo data seeded.\")\n", - "print(\"To query it from notebooks 01–03, connect them to the same CoordiNode server (COORDINODE_ADDR).\")\n", + "print(\"\\n\u2713 Demo data seeded.\")\n", + "print(\"To query it from notebooks 01\u201303:\")\n", + "print(f\" - Embedded mode: open them with the same COORDINODE_EMBEDDED_PATH (this run: {COORDINODE_EMBEDDED_PATH}) \u2014 they will see this seeded graph.\")\n", + "print(\" - Server mode: point them at the same running CoordiNode via COORDINODE_ADDR.\")\n", "client.close()" ] } diff --git a/demo/notebooks/01_llama_index_property_graph.ipynb b/demo/notebooks/01_llama_index_property_graph.ipynb index 4cb0b53..edf7b09 100644 --- a/demo/notebooks/01_llama_index_property_graph.ipynb +++ b/demo/notebooks/01_llama_index_property_graph.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "b2c3d4e5-0001-0000-0000-000000000001", + "id": "0", "metadata": {}, "source": [ "# LlamaIndex + CoordiNode: PropertyGraphIndex\n", @@ -12,22 +12,22 @@ "Demonstrates `CoordinodePropertyGraphStore` as a backend for LlamaIndex `PropertyGraphIndex`.\n", "\n", "**What works right now:**\n", - "- `upsert_nodes` / `upsert_relations` — idempotent MERGE (safe to call multiple times)\n", - "- `get()` — look up nodes by ID or properties\n", - "- `get_triplets()` — all edges (wildcard) or filtered by relation type / entity name\n", - "- `get_rel_map()` — outgoing relations for a set of nodes (depth=1)\n", - "- `structured_query()` — arbitrary Cypher pass-through\n", - "- `delete()` — remove nodes by id or name\n", - "- `get_schema()` — live text schema of the graph\n", + "- `upsert_nodes` / `upsert_relations` \u2014 idempotent MERGE (safe to call multiple times)\n", + "- `get()` \u2014 look up nodes by ID or properties\n", + "- `get_triplets()` \u2014 all edges (wildcard) or filtered by relation type / entity name\n", + "- `get_rel_map()` \u2014 outgoing relations for a set of nodes (depth=1)\n", + "- `structured_query()` \u2014 arbitrary Cypher pass-through\n", + "- `delete()` \u2014 remove nodes by id or name\n", + "- `get_schema()` \u2014 live text schema of the graph\n", "\n", "**Environments:**\n", - "- **Google Colab** — uses `coordinode-embedded` (in-process Rust engine, no server needed). First run compiles from source (~5 min); subsequent runs use the pip cache.\n", - "- **Local / Docker Compose** — connects to a running CoordiNode server via gRPC." + "- **Google Colab** \u2014 uses `coordinode-embedded` (in-process Rust engine, no server needed). First run compiles from source (~5 min); subsequent runs use the pip cache.\n", + "- **Local / Docker Compose** \u2014 connects to a running CoordiNode server via gRPC." ] }, { "cell_type": "markdown", - "id": "b2c3d4e5-0001-0000-0000-000000000002", + "id": "1", "metadata": {}, "source": [ "## Install dependencies" @@ -36,7 +36,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b2c3d4e5-0001-0000-0000-000000000003", + "id": "2", "metadata": {}, "outputs": [], "source": [ @@ -47,9 +47,9 @@ "# Install coordinode-embedded in Colab only (requires Rust build).\n", "if IN_COLAB and not os.environ.get(\"COORDINODE_ADDR\"):\n", " # Install Rust toolchain via rustup (https://rustup.rs).\n", - " # Colab's apt packages ship rustc ≤1.75, which cannot build coordinode-embedded\n", - " # (requires Rust ≥1.80 for maturin/pyo3). apt-get is not a viable alternative here.\n", - " # Download the installer to a temp file and execute it explicitly — this avoids\n", + " # Colab's apt packages ship rustc \u22641.75, which cannot build coordinode-embedded\n", + " # (requires Rust \u22651.80 for maturin/pyo3). apt-get is not a viable alternative here.\n", + " # Download the installer to a temp file and execute it explicitly \u2014 this avoids\n", " # piping remote content directly into a shell while maintaining HTTPS/TLS security\n", " # through Python's default ssl context (cert-verified, TLS 1.2+).\n", " # SHA256 pinning of rustup-init is intentionally omitted: rustup.rs does not\n", @@ -62,8 +62,13 @@ " # Colab sessions, so there is no risk of unintentional execution in local\n", " # or server environments.\n", " # Security note: downloading rustup-init via HTTPS with cert verification and\n", - " # executing from a temp file (not piped to shell) is by design — this is the\n", + " # executing from a temp file (not piped to shell) is by design \u2014 this is the\n", " # rustup project's own recommended install method for automated environments.\n", + " # protoc is required by coordinode-raft build (prost-build). Skip if already present (faster reruns),\n", + " # otherwise refresh apt indexes first \u2014 Colab caches can go stale on long-lived runtimes.\n", + " if subprocess.run([\"which\", \"protoc\"], capture_output=True).returncode != 0:\n", + " subprocess.run([\"apt-get\", \"update\", \"-y\", \"-q\"], check=True, timeout=120)\n", + " subprocess.run([\"apt-get\", \"install\", \"-y\", \"-q\", \"protobuf-compiler\"], check=True, timeout=120)\n", " import ssl as _ssl, tempfile as _tmp, urllib.request as _ur\n", "\n", " _ctx = _ssl.create_default_context()\n", @@ -86,7 +91,7 @@ " \"pip\",\n", " \"install\",\n", " \"-q\",\n", - " \"git+https://github.com/structured-world/coordinode-python.git@8da94d694ecaabee6f8380147d02f08220061bfa#subdirectory=coordinode-embedded\",\n", + " \"git+https://github.com/structured-world/coordinode-python.git@50ddc08a89a21fca73be007cb22b57a0054225c3#subdirectory=coordinode-embedded\",\n", " ],\n", " check=True,\n", " timeout=600,\n", @@ -94,8 +99,15 @@ "\n", "# coordinode-embedded is pinned to a specific git commit because it requires a Rust\n", "# build (maturin/pyo3) and the embedded engine must match the Python SDK version.\n", - "# The remaining packages (coordinode, llama-index, etc.) are installed without pins:\n", - "# they are pure Python, release frequently, and pip resolves a compatible version.\n", + "# coordinode (SDK) is git-pinned in Colab via _coordinode_spec to keep the\n", + "# SDK/proto in sync with this PR; outside Colab it resolves from PyPI.\n", + "# Other packages (LangChain / llama-index / nest_asyncio) are unpinned \u2014 pure\n", + "# Python, release frequently, pip resolves a compatible version.\n", + "_coordinode_spec = (\n", + " \"git+https://github.com/structured-world/coordinode-python.git@50ddc08a89a21fca73be007cb22b57a0054225c3#subdirectory=coordinode\"\n", + " if IN_COLAB\n", + " else \"coordinode\"\n", + ")\n", "subprocess.run(\n", " [\n", " sys.executable,\n", @@ -103,7 +115,7 @@ " \"pip\",\n", " \"install\",\n", " \"-q\",\n", - " \"coordinode\",\n", + " _coordinode_spec,\n", " \"llama-index-graph-stores-coordinode\",\n", " \"llama-index-core\",\n", " \"nest_asyncio\",\n", @@ -121,7 +133,7 @@ }, { "cell_type": "markdown", - "id": "b2c3d4e5-0001-0000-0000-000000000004", + "id": "3", "metadata": {}, "source": [ "## Adapter for embedded mode\n", @@ -134,16 +146,39 @@ { "cell_type": "code", "execution_count": null, - "id": "b2c3d4e5-0001-0000-0000-000000000005", + "id": "4", "metadata": {}, "outputs": [], "source": [ - "class _EmbeddedAdapter:\n \"\"\"Thin wrapper around LocalClient that adds CoordinodeClient-compatible methods.\"\"\"\n\n def __init__(self, local_client):\n self._lc = local_client\n\n def cypher(self, query, params=None):\n return self._lc.cypher(query, params or {})\n\n def get_schema_text(self):\n lbls = self._lc.cypher(\"MATCH (n) UNWIND labels(n) AS lbl RETURN DISTINCT lbl ORDER BY lbl\")\n rels = self._lc.cypher(\"MATCH ()-[r]->() RETURN DISTINCT type(r) AS t ORDER BY t\")\n lines = [\"Node labels:\"]\n for r in lbls:\n lines.append(f\" - {r['lbl']}\")\n lines.append(\"\\nEdge types:\")\n for r in rels:\n lines.append(f\" - {r['t']}\")\n return \"\\n\".join(lines)\n\n # Vector search not available in embedded mode — requires running CoordiNode server.\n\n def close(self):\n self._lc.close()\n" + "class _EmbeddedAdapter:\n", + " \"\"\"Thin wrapper around LocalClient that adds CoordinodeClient-compatible methods.\"\"\"\n", + "\n", + " def __init__(self, local_client):\n", + " self._lc = local_client\n", + "\n", + " def cypher(self, query, params=None):\n", + " return self._lc.cypher(query, params or {})\n", + "\n", + " def get_schema_text(self):\n", + " lbls = self._lc.cypher(\"MATCH (n) UNWIND labels(n) AS lbl RETURN DISTINCT lbl ORDER BY lbl\")\n", + " rels = self._lc.cypher(\"MATCH ()-[r]->() RETURN DISTINCT type(r) AS t ORDER BY t\")\n", + " lines = [\"Node labels:\"]\n", + " for r in lbls:\n", + " lines.append(f\" - {r['lbl']}\")\n", + " lines.append(\"\\nEdge types:\")\n", + " for r in rels:\n", + " lines.append(f\" - {r['t']}\")\n", + " return \"\\n\".join(lines)\n", + "\n", + " # Vector search not available in embedded mode \u2014 requires running CoordiNode server.\n", + "\n", + " def close(self):\n", + " self._lc.close()\n" ] }, { "cell_type": "markdown", - "id": "b2c3d4e5-0001-0000-0000-000000000006", + "id": "5", "metadata": {}, "source": [ "## Connect to CoordiNode" @@ -152,65 +187,58 @@ { "cell_type": "code", "execution_count": null, - "id": "b2c3d4e5-0001-0000-0000-000000000007", + "id": "6", "metadata": {}, "outputs": [], "source": [ - "import os, socket\n", - "\n", - "\n", - "def _port_open(port):\n", - " try:\n", - " with socket.create_connection((\"127.0.0.1\", port), timeout=1):\n", - " return True\n", - " except OSError:\n", - " return False\n", + "import os, tempfile\n", "\n", + "# Persistent embedded DB path. Colab has /content which persists across cell\n", + "# reruns within a runtime session; locally fall back to the OS temp dir\n", + "# (portable across Linux/macOS/Windows). Override via COORDINODE_EMBEDDED_PATH.\n", + "COORDINODE_EMBEDDED_PATH = os.environ.get(\n", + " \"COORDINODE_EMBEDDED_PATH\",\n", + " \"/content/coordinode-demo.db\"\n", + " if os.path.isdir(\"/content\")\n", + " else os.path.join(tempfile.gettempdir(), \"coordinode-demo.db\"),\n", + ")\n", "\n", "if os.environ.get(\"COORDINODE_ADDR\"):\n", " COORDINODE_ADDR = os.environ[\"COORDINODE_ADDR\"]\n", " from coordinode import CoordinodeClient\n", "\n", - " client = CoordinodeClient(COORDINODE_ADDR)\n", - " if not client.health():\n", - " client.close()\n", - " raise RuntimeError(f\"CoordiNode at {COORDINODE_ADDR} is not serving health checks\")\n", + " _cc = CoordinodeClient(COORDINODE_ADDR)\n", + " if not _cc.health():\n", + " _cc.close()\n", + " raise RuntimeError(f\"Health check failed for {COORDINODE_ADDR}\")\n", " print(f\"Connected to {COORDINODE_ADDR}\")\n", + " client = _cc\n", "else:\n", + " # No explicit server \u2014 use the embedded in-process engine backed by a file\n", + " # so the graph persists across cell reruns and between sibling demo\n", + " # notebooks within the same runtime.\n", " try:\n", - " grpc_port = int(os.environ.get(\"COORDINODE_PORT\", \"7080\"))\n", - " except ValueError as exc:\n", - " raise RuntimeError(\"COORDINODE_PORT must be an integer\") from exc\n", - "\n", - " if _port_open(grpc_port):\n", - " COORDINODE_ADDR = f\"localhost:{grpc_port}\"\n", - " from coordinode import CoordinodeClient\n", - "\n", - " client = CoordinodeClient(COORDINODE_ADDR)\n", - " if not client.health():\n", - " client.close()\n", - " raise RuntimeError(f\"CoordiNode at {COORDINODE_ADDR} is not serving health checks\")\n", - " print(f\"Connected to {COORDINODE_ADDR}\")\n", - " else:\n", - " # No server available — use the embedded in-process engine.\n", - " # Works without Docker or any external service; data is in-memory.\n", - " try:\n", - " from coordinode_embedded import LocalClient\n", - " except ImportError as exc:\n", - " raise RuntimeError(\n", - " \"coordinode-embedded is not installed. \"\n", - " \"Run: pip install git+https://github.com/structured-world/coordinode-python.git@8da94d694ecaabee6f8380147d02f08220061bfa#subdirectory=coordinode-embedded\"\n", - " \" — or start a CoordiNode server and set COORDINODE_ADDR.\"\n", - " ) from exc\n", + " from coordinode_embedded import LocalClient\n", + " except ImportError as exc:\n", + " raise RuntimeError(\n", + " \"coordinode-embedded is not installed. \"\n", + " \"In Colab, rerun the install cell above. \"\n", + " \"Locally, install from source: \"\n", + " \"pip install 'git+https://github.com/structured-world/coordinode-python.git#subdirectory=coordinode-embedded' \"\n", + " \"(requires Rust toolchain, ~5 min build). \"\n", + " \"Alternatively start a CoordiNode server and set COORDINODE_ADDR.\"\n", + " ) from exc\n", "\n", - " _lc = LocalClient(\":memory:\")\n", - " client = _EmbeddedAdapter(_lc)\n", - " print(\"Using embedded LocalClient (in-process)\")\n" + " _lc = LocalClient(COORDINODE_EMBEDDED_PATH)\n", + " # Wrap in _EmbeddedAdapter so CoordinodeGraph/PropertyGraphStore can call\n", + " # get_schema_text() \u2014 LocalClient has .cypher() only.\n", + " client = _EmbeddedAdapter(_lc)\n", + " print(f\"Using embedded LocalClient at {COORDINODE_EMBEDDED_PATH}\")\n" ] }, { "cell_type": "markdown", - "id": "b2c3d4e5-0001-0000-0000-000000000008", + "id": "7", "metadata": {}, "source": [ "## Create the property graph store\n", @@ -221,7 +249,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b2c3d4e5-0001-0000-0000-000000000009", + "id": "8", "metadata": {}, "outputs": [], "source": [ @@ -235,7 +263,7 @@ }, { "cell_type": "markdown", - "id": "b2c3d4e5-0001-0000-0000-000000000010", + "id": "9", "metadata": {}, "source": [ "## 1. Upsert nodes and relations\n", @@ -246,25 +274,44 @@ { "cell_type": "code", "execution_count": null, - "id": "b2c3d4e5-0001-0000-0000-000000000011", + "id": "10", "metadata": {}, "outputs": [], "source": [ - "import uuid\n\ntag = uuid.uuid4().hex\n\nnodes = [\n EntityNode(label=\"Person\", name=f\"Alice-{tag}\", properties={\"role\": \"researcher\", \"field\": \"AI\"}),\n EntityNode(label=\"Person\", name=f\"Bob-{tag}\", properties={\"role\": \"engineer\", \"field\": \"ML\"}),\n EntityNode(label=\"Topic\", name=f\"GraphRAG-{tag}\", properties={\"domain\": \"knowledge graphs\"}),\n]\nstore.upsert_nodes(nodes)\nprint(\"Upserted nodes:\", [n.name for n in nodes])\n\nalice, bob, graphrag = nodes\nrelations = [\n Relation(label=\"RESEARCHES\", source_id=alice.id, target_id=graphrag.id, properties={\"since\": 2023}),\n Relation(label=\"COLLABORATES\", source_id=alice.id, target_id=bob.id),\n Relation(label=\"IMPLEMENTS\", source_id=bob.id, target_id=graphrag.id),\n]\nstore.upsert_relations(relations)\nprint(\"Upserted relations:\", [r.label for r in relations])" + "import uuid\n", + "\n", + "tag = uuid.uuid4().hex\n", + "\n", + "nodes = [\n", + " EntityNode(label=\"Person\", name=f\"Alice-{tag}\", properties={\"role\": \"researcher\", \"field\": \"AI\"}),\n", + " EntityNode(label=\"Person\", name=f\"Bob-{tag}\", properties={\"role\": \"engineer\", \"field\": \"ML\"}),\n", + " EntityNode(label=\"Topic\", name=f\"GraphRAG-{tag}\", properties={\"domain\": \"knowledge graphs\"}),\n", + "]\n", + "store.upsert_nodes(nodes)\n", + "print(\"Upserted nodes:\", [n.name for n in nodes])\n", + "\n", + "alice, bob, graphrag = nodes\n", + "relations = [\n", + " Relation(label=\"RESEARCHES\", source_id=alice.id, target_id=graphrag.id, properties={\"since\": 2023}),\n", + " Relation(label=\"COLLABORATES\", source_id=alice.id, target_id=bob.id),\n", + " Relation(label=\"IMPLEMENTS\", source_id=bob.id, target_id=graphrag.id),\n", + "]\n", + "store.upsert_relations(relations)\n", + "print(\"Upserted relations:\", [r.label for r in relations])" ] }, { "cell_type": "markdown", - "id": "b2c3d4e5-0001-0000-0000-000000000012", + "id": "11", "metadata": {}, "source": [ - "## 2. get_triplets — all edges from a node (wildcard)" + "## 2. get_triplets \u2014 all edges from a node (wildcard)" ] }, { "cell_type": "code", "execution_count": null, - "id": "b2c3d4e5-0001-0000-0000-000000000013", + "id": "12", "metadata": {}, "outputs": [], "source": [ @@ -276,16 +323,16 @@ }, { "cell_type": "markdown", - "id": "b2c3d4e5-0001-0000-0000-000000000014", + "id": "13", "metadata": {}, "source": [ - "## 3. get_rel_map — relations for a set of nodes" + "## 3. get_rel_map \u2014 relations for a set of nodes" ] }, { "cell_type": "code", "execution_count": null, - "id": "b2c3d4e5-0001-0000-0000-000000000015", + "id": "14", "metadata": {}, "outputs": [], "source": [ @@ -298,16 +345,16 @@ }, { "cell_type": "markdown", - "id": "b2c3d4e5-0001-0000-0000-000000000016", + "id": "15", "metadata": {}, "source": [ - "## 4. structured_query — arbitrary Cypher" + "## 4. structured_query \u2014 arbitrary Cypher" ] }, { "cell_type": "code", "execution_count": null, - "id": "b2c3d4e5-0001-0000-0000-000000000017", + "id": "16", "metadata": {}, "outputs": [], "source": [ @@ -322,7 +369,7 @@ }, { "cell_type": "markdown", - "id": "b2c3d4e5-0001-0000-0000-000000000018", + "id": "17", "metadata": {}, "source": [ "## 5. get_schema" @@ -331,7 +378,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b2c3d4e5-0001-0000-0000-000000000019", + "id": "18", "metadata": {}, "outputs": [], "source": [ @@ -341,20 +388,20 @@ }, { "cell_type": "markdown", - "id": "b2c3d4e5-0001-0000-0000-000000000020", + "id": "19", "metadata": {}, "source": [ - "## 6. Idempotency — double upsert must not duplicate edges" + "## 6. Idempotency \u2014 double upsert must not duplicate edges" ] }, { "cell_type": "code", "execution_count": null, - "id": "b2c3d4e5-0001-0000-0000-000000000021", + "id": "20", "metadata": {}, "outputs": [], "source": [ - "store.upsert_relations(relations) # second call — should still be exactly 1 edge\n", + "store.upsert_relations(relations) # second call \u2014 should still be exactly 1 edge\n", "rows = store.structured_query(\n", " \"MATCH (a {name: $src})-[r:RESEARCHES]->(b {name: $dst}) RETURN count(r) AS cnt\",\n", " param_map={\"src\": f\"Alice-{tag}\", \"dst\": f\"GraphRAG-{tag}\"},\n", @@ -364,7 +411,7 @@ }, { "cell_type": "markdown", - "id": "b2c3d4e5-0001-0000-0000-000000000022", + "id": "21", "metadata": {}, "source": [ "## Cleanup" @@ -373,14 +420,14 @@ { "cell_type": "code", "execution_count": null, - "id": "b2c3d4e5-0001-0000-0000-000000000023", + "id": "22", "metadata": {}, "outputs": [], "source": [ "store.delete(entity_names=[f\"Alice-{tag}\", f\"Bob-{tag}\", f\"GraphRAG-{tag}\"])\n", "print(\"Cleaned up\")\n", "store.close()\n", - "client.close() # injected client — owned by caller" + "client.close() # injected client \u2014 owned by caller" ] } ], diff --git a/demo/notebooks/02_langchain_graph_chain.ipynb b/demo/notebooks/02_langchain_graph_chain.ipynb index cd005ef..cf23cf1 100644 --- a/demo/notebooks/02_langchain_graph_chain.ipynb +++ b/demo/notebooks/02_langchain_graph_chain.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "c3d4e5f6-0002-0000-0000-000000000001", + "id": "0", "metadata": {}, "source": [ "# LangChain + CoordiNode: Graph Chain\n", @@ -12,19 +12,19 @@ "Demonstrates `CoordinodeGraph` as a Knowledge Graph backend for LangChain.\n", "\n", "**What works right now:**\n", - "- `graph.query()` — arbitrary Cypher pass-through\n", - "- `graph.schema` / `refresh_schema()` — live graph schema\n", - "- `add_graph_documents()` — add Nodes + Relationships from a LangChain `GraphDocument`\n", - "- `GraphCypherQAChain` — LLM generates Cypher from a natural-language question *(requires `OPENAI_API_KEY`)*\n", + "- `graph.query()` \u2014 arbitrary Cypher pass-through\n", + "- `graph.schema` / `refresh_schema()` \u2014 live graph schema\n", + "- `add_graph_documents()` \u2014 add Nodes + Relationships from a LangChain `GraphDocument`\n", + "- `GraphCypherQAChain` \u2014 LLM generates Cypher from a natural-language question *(requires `OPENAI_API_KEY`)*\n", "\n", "**Environments:**\n", - "- **Google Colab** — uses `coordinode-embedded` (in-process Rust engine, no server needed). First run compiles from source (~5 min); subsequent runs use the pip cache.\n", - "- **Local / Docker Compose** — connects to a running CoordiNode server via gRPC." + "- **Google Colab** \u2014 uses `coordinode-embedded` (in-process Rust engine, no server needed). First run compiles from source (~5 min); subsequent runs use the pip cache.\n", + "- **Local / Docker Compose** \u2014 connects to a running CoordiNode server via gRPC." ] }, { "cell_type": "markdown", - "id": "c3d4e5f6-0002-0000-0000-000000000002", + "id": "1", "metadata": {}, "source": [ "## Install dependencies" @@ -33,7 +33,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c3d4e5f6-0002-0000-0000-000000000003", + "id": "2", "metadata": {}, "outputs": [], "source": [ @@ -44,9 +44,9 @@ "# Install coordinode-embedded in Colab only (requires Rust build).\n", "if IN_COLAB and not os.environ.get(\"COORDINODE_ADDR\"):\n", " # Install Rust toolchain via rustup (https://rustup.rs).\n", - " # Colab's apt packages ship rustc ≤1.75, which cannot build coordinode-embedded\n", - " # (requires Rust ≥1.80 for maturin/pyo3). apt-get is not a viable alternative here.\n", - " # Download the installer to a temp file and execute it explicitly — this avoids\n", + " # Colab's apt packages ship rustc \u22641.75, which cannot build coordinode-embedded\n", + " # (requires Rust \u22651.80 for maturin/pyo3). apt-get is not a viable alternative here.\n", + " # Download the installer to a temp file and execute it explicitly \u2014 this avoids\n", " # piping remote content directly into a shell while maintaining HTTPS/TLS security\n", " # through Python's default ssl context (cert-verified, TLS 1.2+).\n", " # SHA256 pinning of rustup-init is intentionally omitted: rustup.rs does not\n", @@ -59,8 +59,13 @@ " # Colab sessions, so there is no risk of unintentional execution in local\n", " # or server environments.\n", " # Security note: downloading rustup-init via HTTPS with cert verification and\n", - " # executing from a temp file (not piped to shell) is by design — this is the\n", + " # executing from a temp file (not piped to shell) is by design \u2014 this is the\n", " # rustup project's own recommended install method for automated environments.\n", + " # protoc is required by coordinode-raft build (prost-build). Skip if already present (faster reruns),\n", + " # otherwise refresh apt indexes first \u2014 Colab caches can go stale on long-lived runtimes.\n", + " if subprocess.run([\"which\", \"protoc\"], capture_output=True).returncode != 0:\n", + " subprocess.run([\"apt-get\", \"update\", \"-y\", \"-q\"], check=True, timeout=120)\n", + " subprocess.run([\"apt-get\", \"install\", \"-y\", \"-q\", \"protobuf-compiler\"], check=True, timeout=120)\n", " import ssl as _ssl, tempfile as _tmp, urllib.request as _ur\n", "\n", " _ctx = _ssl.create_default_context()\n", @@ -83,7 +88,7 @@ " \"pip\",\n", " \"install\",\n", " \"-q\",\n", - " \"git+https://github.com/structured-world/coordinode-python.git@8da94d694ecaabee6f8380147d02f08220061bfa#subdirectory=coordinode-embedded\",\n", + " \"git+https://github.com/structured-world/coordinode-python.git@50ddc08a89a21fca73be007cb22b57a0054225c3#subdirectory=coordinode-embedded\",\n", " ],\n", " check=True,\n", " timeout=600,\n", @@ -94,8 +99,15 @@ "# must match the Python SDK version.\n", "# - langchain-coordinode is pinned to the same commit so CoordinodeGraph(client=...)\n", "# is available; this parameter is not yet released to PyPI.\n", - "# The remaining packages (coordinode, LangChain, etc.) are installed without pins:\n", - "# they are pure Python, release frequently, and pip resolves a compatible version.\n", + "# coordinode (SDK) is git-pinned in Colab via _coordinode_spec to keep the\n", + "# SDK/proto in sync with this PR; outside Colab it resolves from PyPI.\n", + "# Other packages (LangChain / llama-index / nest_asyncio) are unpinned \u2014 pure\n", + "# Python, release frequently, pip resolves a compatible version.\n", + "_coordinode_spec = (\n", + " \"git+https://github.com/structured-world/coordinode-python.git@50ddc08a89a21fca73be007cb22b57a0054225c3#subdirectory=coordinode\"\n", + " if IN_COLAB\n", + " else \"coordinode\"\n", + ")\n", "subprocess.run(\n", " [\n", " sys.executable,\n", @@ -103,9 +115,9 @@ " \"pip\",\n", " \"install\",\n", " \"-q\",\n", - " \"coordinode\",\n", + " _coordinode_spec,\n", " \"langchain\",\n", - " \"git+https://github.com/structured-world/coordinode-python.git@8da94d694ecaabee6f8380147d02f08220061bfa#subdirectory=langchain-coordinode\",\n", + " \"git+https://github.com/structured-world/coordinode-python.git@50ddc08a89a21fca73be007cb22b57a0054225c3#subdirectory=langchain-coordinode\",\n", " \"langchain-community\",\n", " \"langchain-openai\",\n", " \"nest_asyncio\",\n", @@ -122,7 +134,7 @@ }, { "cell_type": "markdown", - "id": "c3d4e5f6-0002-0000-0000-000000000004", + "id": "3", "metadata": {}, "source": [ "## Adapter for embedded mode\n", @@ -135,16 +147,39 @@ { "cell_type": "code", "execution_count": null, - "id": "c3d4e5f6-0002-0000-0000-000000000005", + "id": "4", "metadata": {}, "outputs": [], "source": [ - "class _EmbeddedAdapter:\n \"\"\"Thin wrapper around LocalClient that adds CoordinodeClient-compatible methods.\"\"\"\n\n def __init__(self, local_client):\n self._lc = local_client\n\n def cypher(self, query, params=None):\n return self._lc.cypher(query, params or {})\n\n def get_schema_text(self):\n lbls = self._lc.cypher(\"MATCH (n) UNWIND labels(n) AS lbl RETURN DISTINCT lbl ORDER BY lbl\")\n rels = self._lc.cypher(\"MATCH ()-[r]->() RETURN DISTINCT type(r) AS t ORDER BY t\")\n lines = [\"Node labels:\"]\n for r in lbls:\n lines.append(f\" - {r['lbl']}\")\n lines.append(\"\\nEdge types:\")\n for r in rels:\n lines.append(f\" - {r['t']}\")\n return \"\\n\".join(lines)\n\n # Vector search not available in embedded mode — requires running CoordiNode server.\n\n def close(self):\n self._lc.close()\n" + "class _EmbeddedAdapter:\n", + " \"\"\"Thin wrapper around LocalClient that adds CoordinodeClient-compatible methods.\"\"\"\n", + "\n", + " def __init__(self, local_client):\n", + " self._lc = local_client\n", + "\n", + " def cypher(self, query, params=None):\n", + " return self._lc.cypher(query, params or {})\n", + "\n", + " def get_schema_text(self):\n", + " lbls = self._lc.cypher(\"MATCH (n) UNWIND labels(n) AS lbl RETURN DISTINCT lbl ORDER BY lbl\")\n", + " rels = self._lc.cypher(\"MATCH ()-[r]->() RETURN DISTINCT type(r) AS t ORDER BY t\")\n", + " lines = [\"Node labels:\"]\n", + " for r in lbls:\n", + " lines.append(f\" - {r['lbl']}\")\n", + " lines.append(\"\\nEdge types:\")\n", + " for r in rels:\n", + " lines.append(f\" - {r['t']}\")\n", + " return \"\\n\".join(lines)\n", + "\n", + " # Vector search not available in embedded mode \u2014 requires running CoordiNode server.\n", + "\n", + " def close(self):\n", + " self._lc.close()\n" ] }, { "cell_type": "markdown", - "id": "c3d4e5f6-0002-0000-0000-000000000006", + "id": "5", "metadata": {}, "source": [ "## Connect to CoordiNode" @@ -153,63 +188,58 @@ { "cell_type": "code", "execution_count": null, - "id": "c3d4e5f6-0002-0000-0000-000000000007", + "id": "6", "metadata": {}, "outputs": [], "source": [ - "import os, socket\n", - "\n", - "\n", - "def _port_open(port):\n", - " try:\n", - " with socket.create_connection((\"127.0.0.1\", port), timeout=1):\n", - " return True\n", - " except OSError:\n", - " return False\n", + "import os, tempfile\n", "\n", + "# Persistent embedded DB path. Colab has /content which persists across cell\n", + "# reruns within a runtime session; locally fall back to the OS temp dir\n", + "# (portable across Linux/macOS/Windows). Override via COORDINODE_EMBEDDED_PATH.\n", + "COORDINODE_EMBEDDED_PATH = os.environ.get(\n", + " \"COORDINODE_EMBEDDED_PATH\",\n", + " \"/content/coordinode-demo.db\"\n", + " if os.path.isdir(\"/content\")\n", + " else os.path.join(tempfile.gettempdir(), \"coordinode-demo.db\"),\n", + ")\n", "\n", "if os.environ.get(\"COORDINODE_ADDR\"):\n", " COORDINODE_ADDR = os.environ[\"COORDINODE_ADDR\"]\n", " from coordinode import CoordinodeClient\n", "\n", - " client = CoordinodeClient(COORDINODE_ADDR)\n", - " if not client.health():\n", + " _cc = CoordinodeClient(COORDINODE_ADDR)\n", + " if not _cc.health():\n", + " _cc.close()\n", " raise RuntimeError(f\"Health check failed for {COORDINODE_ADDR}\")\n", " print(f\"Connected to {COORDINODE_ADDR}\")\n", + " client = _cc\n", "else:\n", + " # No explicit server \u2014 use the embedded in-process engine backed by a file\n", + " # so the graph persists across cell reruns and between sibling demo\n", + " # notebooks within the same runtime.\n", " try:\n", - " grpc_port = int(os.environ.get(\"COORDINODE_PORT\", \"7080\"))\n", - " except ValueError as exc:\n", - " raise RuntimeError(\"COORDINODE_PORT must be an integer\") from exc\n", + " from coordinode_embedded import LocalClient\n", + " except ImportError as exc:\n", + " raise RuntimeError(\n", + " \"coordinode-embedded is not installed. \"\n", + " \"In Colab, rerun the install cell above. \"\n", + " \"Locally, install from source: \"\n", + " \"pip install 'git+https://github.com/structured-world/coordinode-python.git#subdirectory=coordinode-embedded' \"\n", + " \"(requires Rust toolchain, ~5 min build). \"\n", + " \"Alternatively start a CoordiNode server and set COORDINODE_ADDR.\"\n", + " ) from exc\n", "\n", - " if _port_open(grpc_port):\n", - " COORDINODE_ADDR = f\"localhost:{grpc_port}\"\n", - " from coordinode import CoordinodeClient\n", - "\n", - " client = CoordinodeClient(COORDINODE_ADDR)\n", - " if not client.health():\n", - " raise RuntimeError(f\"Health check failed for {COORDINODE_ADDR}\")\n", - " print(f\"Connected to {COORDINODE_ADDR}\")\n", - " else:\n", - " # No server available — use the embedded in-process engine.\n", - " # Works without Docker or any external service; data is in-memory.\n", - " try:\n", - " from coordinode_embedded import LocalClient\n", - " except ImportError as exc:\n", - " raise RuntimeError(\n", - " \"coordinode-embedded is not installed. \"\n", - " \"Run: pip install git+https://github.com/structured-world/coordinode-python.git@8da94d694ecaabee6f8380147d02f08220061bfa#subdirectory=coordinode-embedded\"\n", - " \" — or start a CoordiNode server and set COORDINODE_ADDR.\"\n", - " ) from exc\n", - "\n", - " _lc = LocalClient(\":memory:\")\n", - " client = _EmbeddedAdapter(_lc)\n", - " print(\"Using embedded LocalClient (in-process)\")" + " _lc = LocalClient(COORDINODE_EMBEDDED_PATH)\n", + " # Wrap in _EmbeddedAdapter so CoordinodeGraph/PropertyGraphStore can call\n", + " # get_schema_text() \u2014 LocalClient has .cypher() only.\n", + " client = _EmbeddedAdapter(_lc)\n", + " print(f\"Using embedded LocalClient at {COORDINODE_EMBEDDED_PATH}\")\n" ] }, { "cell_type": "markdown", - "id": "c3d4e5f6-0002-0000-0000-000000000008", + "id": "7", "metadata": {}, "source": [ "## Create the graph store\n", @@ -220,7 +250,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c3d4e5f6-0002-0000-0000-000000000009", + "id": "8", "metadata": {}, "outputs": [], "source": [ @@ -236,7 +266,7 @@ }, { "cell_type": "markdown", - "id": "c3d4e5f6-0002-0000-0000-000000000010", + "id": "9", "metadata": {}, "source": [ "## 1. add_graph_documents\n", @@ -248,25 +278,39 @@ { "cell_type": "code", "execution_count": null, - "id": "c3d4e5f6-0002-0000-0000-000000000011", + "id": "10", "metadata": {}, "outputs": [], "source": [ - "tag = uuid.uuid4().hex\n\nnodes = [\n Node(id=f\"Turing-{tag}\", type=\"Scientist\", properties={\"born\": 1912, \"field\": \"computer science\"}),\n Node(id=f\"Shannon-{tag}\", type=\"Scientist\", properties={\"born\": 1916, \"field\": \"information theory\"}),\n Node(id=f\"Cryptography-{tag}\", type=\"Field\", properties={\"era\": \"modern\"}),\n]\nrels = [\n Relationship(source=nodes[0], target=nodes[2], type=\"FOUNDED\", properties={\"year\": 1936}),\n Relationship(source=nodes[1], target=nodes[2], type=\"CONTRIBUTED_TO\"),\n Relationship(source=nodes[0], target=nodes[1], type=\"INFLUENCED\"),\n]\ndoc = GraphDocument(nodes=nodes, relationships=rels, source=Document(page_content=\"Turing and Shannon\"))\ngraph.add_graph_documents([doc])\nprint(\"Documents added\")" + "tag = uuid.uuid4().hex\n", + "\n", + "nodes = [\n", + " Node(id=f\"Turing-{tag}\", type=\"Scientist\", properties={\"born\": 1912, \"field\": \"computer science\"}),\n", + " Node(id=f\"Shannon-{tag}\", type=\"Scientist\", properties={\"born\": 1916, \"field\": \"information theory\"}),\n", + " Node(id=f\"Cryptography-{tag}\", type=\"Field\", properties={\"era\": \"modern\"}),\n", + "]\n", + "rels = [\n", + " Relationship(source=nodes[0], target=nodes[2], type=\"FOUNDED\", properties={\"year\": 1936}),\n", + " Relationship(source=nodes[1], target=nodes[2], type=\"CONTRIBUTED_TO\"),\n", + " Relationship(source=nodes[0], target=nodes[1], type=\"INFLUENCED\"),\n", + "]\n", + "doc = GraphDocument(nodes=nodes, relationships=rels, source=Document(page_content=\"Turing and Shannon\"))\n", + "graph.add_graph_documents([doc])\n", + "print(\"Documents added\")" ] }, { "cell_type": "markdown", - "id": "c3d4e5f6-0002-0000-0000-000000000012", + "id": "11", "metadata": {}, "source": [ - "## 2. query — direct Cypher" + "## 2. query \u2014 direct Cypher" ] }, { "cell_type": "code", "execution_count": null, - "id": "c3d4e5f6-0002-0000-0000-000000000013", + "id": "12", "metadata": {}, "outputs": [], "source": [ @@ -276,23 +320,23 @@ " \" RETURN s.name AS scientist, type(r) AS relation, f.name AS field\",\n", " params={\"prefix\": f\"Turing-{tag}\"},\n", ")\n", - "print(\"Scientists → Fields:\")\n", + "print(\"Scientists \u2192 Fields:\")\n", "for r in rows:\n", " print(f\" {r['scientist']} --[{r['relation']}]--> {r['field']}\")" ] }, { "cell_type": "markdown", - "id": "c3d4e5f6-0002-0000-0000-000000000014", + "id": "13", "metadata": {}, "source": [ - "## 3. refresh_schema — structured_schema dict" + "## 3. refresh_schema \u2014 structured_schema dict" ] }, { "cell_type": "code", "execution_count": null, - "id": "c3d4e5f6-0002-0000-0000-000000000015", + "id": "14", "metadata": {}, "outputs": [], "source": [ @@ -303,23 +347,23 @@ }, { "cell_type": "markdown", - "id": "c3d4e5f6-0002-0000-0000-000000000016", + "id": "15", "metadata": {}, "source": [ "## 4. Idempotency check\n", "\n", - "`add_graph_documents` uses MERGE internally — adding the same document twice must not\n", + "`add_graph_documents` uses MERGE internally \u2014 adding the same document twice must not\n", "create duplicate edges." ] }, { "cell_type": "code", "execution_count": null, - "id": "c3d4e5f6-0002-0000-0000-000000000017", + "id": "16", "metadata": {}, "outputs": [], "source": [ - "graph.add_graph_documents([doc]) # second upsert — must not create a duplicate edge\n", + "graph.add_graph_documents([doc]) # second upsert \u2014 must not create a duplicate edge\n", "cnt = graph.query(\n", " \"MATCH (a {name: $src})-[r:FOUNDED]->(b {name: $dst}) RETURN count(r) AS cnt\",\n", " params={\"src\": f\"Turing-{tag}\", \"dst\": f\"Cryptography-{tag}\"},\n", @@ -329,10 +373,10 @@ }, { "cell_type": "markdown", - "id": "c3d4e5f6-0002-0000-0000-000000000018", + "id": "17", "metadata": {}, "source": [ - "## 5. GraphCypherQAChain — LLM-powered Cypher (optional)\n", + "## 5. GraphCypherQAChain \u2014 LLM-powered Cypher (optional)\n", "\n", "> **This section requires `OPENAI_API_KEY`.** Set it in your environment or via\n", "> `os.environ['OPENAI_API_KEY'] = 'sk-...'` before running.\n", @@ -342,7 +386,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c3d4e5f6-0002-0000-0000-000000000019", + "id": "18", "metadata": {}, "outputs": [], "source": [ @@ -366,7 +410,7 @@ }, { "cell_type": "markdown", - "id": "c3d4e5f6-0002-0000-0000-000000000020", + "id": "19", "metadata": {}, "source": [ "## Cleanup" @@ -375,11 +419,18 @@ { "cell_type": "code", "execution_count": null, - "id": "c3d4e5f6-0002-0000-0000-000000000021", + "id": "20", "metadata": {}, "outputs": [], "source": [ - "# DETACH DELETE atomically removes all edges then the node in one operation.\n# Two-step MATCH (n)-[r]-() / DELETE r / DELETE n is avoided because an\n# undirected MATCH returns each edge from both endpoints, so the second pass\n# fails with \"cannot delete node with connected edges\".\ngraph.query(\"MATCH (n) WHERE n.name ENDS WITH $tag DETACH DELETE n\", params={\"tag\": tag})\nprint(\"Cleaned up\")\ngraph.close()\nclient.close() # injected client — owned by caller" + "# DETACH DELETE atomically removes all edges then the node in one operation.\n", + "# Two-step MATCH (n)-[r]-() / DELETE r / DELETE n is avoided because an\n", + "# undirected MATCH returns each edge from both endpoints, so the second pass\n", + "# fails with \"cannot delete node with connected edges\".\n", + "graph.query(\"MATCH (n) WHERE n.name ENDS WITH $tag DETACH DELETE n\", params={\"tag\": tag})\n", + "print(\"Cleaned up\")\n", + "graph.close()\n", + "client.close() # injected client \u2014 owned by caller" ] } ], diff --git a/demo/notebooks/03_langgraph_agent.ipynb b/demo/notebooks/03_langgraph_agent.ipynb index 5eae0b8..6d70466 100644 --- a/demo/notebooks/03_langgraph_agent.ipynb +++ b/demo/notebooks/03_langgraph_agent.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "d4e5f6a7-0003-0000-0000-000000000001", + "id": "0", "metadata": {}, "source": [ "# LangGraph + CoordiNode: Agent with graph memory\n", @@ -10,22 +10,22 @@ "[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/structured-world/coordinode-python/blob/main/demo/notebooks/03_langgraph_agent.ipynb)\n", "\n", "Demonstrates a LangGraph agent that uses CoordiNode as persistent **graph memory**:\n", - "- `save_fact` — store a subject → relation → object triple in the graph\n", - "- `query_facts` — run an arbitrary Cypher query against the graph\n", - "- `find_related` — traverse the graph from a given entity\n", - "- `list_all_facts` — dump every fact in the current session\n", + "- `save_fact` \u2014 store a subject \u2192 relation \u2192 object triple in the graph\n", + "- `query_facts` \u2014 run an arbitrary Cypher query against the graph\n", + "- `find_related` \u2014 traverse the graph from a given entity\n", + "- `list_all_facts` \u2014 dump every fact in the current session\n", "\n", - "**Works without OpenAI** — the mock demo section calls tools directly. \n", + "**Works without OpenAI** \u2014 the mock demo section calls tools directly. \n", "Set `OPENAI_API_KEY` to run the full `gpt-4o-mini` ReAct agent.\n", "\n", "**Environments:**\n", - "- **Google Colab** — uses `coordinode-embedded` (in-process Rust engine, no server needed). First run compiles from source (~5 min); subsequent runs use the pip cache.\n", - "- **Local / Docker Compose** — connects to a running CoordiNode server via gRPC." + "- **Google Colab** \u2014 uses `coordinode-embedded` (in-process Rust engine, no server needed). First run compiles from source (~5 min); subsequent runs use the pip cache.\n", + "- **Local / Docker Compose** \u2014 set `COORDINODE_ADDR=host:port` to connect to a running CoordiNode server via gRPC; otherwise uses embedded with a file-backed `COORDINODE_EMBEDDED_PATH`." ] }, { "cell_type": "markdown", - "id": "d4e5f6a7-0003-0000-0000-000000000002", + "id": "1", "metadata": {}, "source": [ "## Install dependencies" @@ -34,7 +34,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d4e5f6a7-0003-0000-0000-000000000003", + "id": "2", "metadata": {}, "outputs": [], "source": [ @@ -43,15 +43,19 @@ "IN_COLAB = \"google.colab\" in sys.modules\n", "_EMBEDDED_PIP_SPEC = (\n", " \"git+https://github.com/structured-world/coordinode-python.git\"\n", - " \"@8da94d694ecaabee6f8380147d02f08220061bfa#subdirectory=coordinode-embedded\"\n", + " \"@50ddc08a89a21fca73be007cb22b57a0054225c3#subdirectory=coordinode-embedded\"\n", + ")\n", + "_SDK_PIP_SPEC = (\n", + " \"git+https://github.com/structured-world/coordinode-python.git\"\n", + " \"@50ddc08a89a21fca73be007cb22b57a0054225c3#subdirectory=coordinode\"\n", ")\n", "\n", "# Install coordinode-embedded in Colab only (requires Rust build).\n", "if IN_COLAB and not os.environ.get(\"COORDINODE_ADDR\"):\n", " # Install Rust toolchain via rustup (https://rustup.rs).\n", - " # Colab's apt packages ship rustc ≤1.75, which cannot build coordinode-embedded\n", - " # (requires Rust ≥1.80 for maturin/pyo3). apt-get is not a viable alternative here.\n", - " # Download the installer to a temp file and execute it explicitly — this avoids\n", + " # Colab's apt packages ship rustc \u22641.75, which cannot build coordinode-embedded\n", + " # (requires Rust \u22651.80 for maturin/pyo3). apt-get is not a viable alternative here.\n", + " # Download the installer to a temp file and execute it explicitly \u2014 this avoids\n", " # piping remote content directly into a shell while maintaining HTTPS/TLS security\n", " # through Python's default ssl context (cert-verified, TLS 1.2+).\n", " # SHA256 pinning of rustup-init is intentionally omitted: rustup.rs does not\n", @@ -59,12 +63,17 @@ " # platform-specific rustup-init binaries), and pinning a hash here would break\n", " # silently on every rustup release. The HTTPS/TLS verification + temp-file\n", " # execution (not piped to shell) is the rustup team's recommended trust model.\n", - " # Skip embedded build if COORDINODE_ADDR is set — user has a gRPC server,\n", + " # Skip embedded build if COORDINODE_ADDR is set \u2014 user has a gRPC server,\n", " # no need to spend 5+ minutes building coordinode-embedded from source.\n", " # The `IN_COLAB` check already guards against local/server environments.\n", " # Security note: downloading rustup-init via HTTPS with cert verification and\n", - " # executing from a temp file (not piped to shell) is by design — this is the\n", + " # executing from a temp file (not piped to shell) is by design \u2014 this is the\n", " # rustup project's own recommended install method for automated environments.\n", + " # protoc is required by coordinode-raft build (prost-build). Skip if already present (faster reruns),\n", + " # otherwise refresh apt indexes first \u2014 Colab caches can go stale on long-lived runtimes.\n", + " if subprocess.run([\"which\", \"protoc\"], capture_output=True).returncode != 0:\n", + " subprocess.run([\"apt-get\", \"update\", \"-y\", \"-q\"], check=True, timeout=120)\n", + " subprocess.run([\"apt-get\", \"install\", \"-y\", \"-q\", \"protobuf-compiler\"], check=True, timeout=120)\n", " import ssl as _ssl, tempfile as _tmp, urllib.request as _ur\n", "\n", " _ctx = _ssl.create_default_context()\n", @@ -93,6 +102,7 @@ " timeout=600,\n", " )\n", "\n", + "_coordinode_spec = _SDK_PIP_SPEC if IN_COLAB else \"coordinode\"\n", "subprocess.run(\n", " [\n", " sys.executable,\n", @@ -100,7 +110,7 @@ " \"pip\",\n", " \"install\",\n", " \"-q\",\n", - " \"coordinode\",\n", + " _coordinode_spec,\n", " \"langchain-community\",\n", " \"langchain-openai\",\n", " \"langgraph\",\n", @@ -118,7 +128,7 @@ }, { "cell_type": "markdown", - "id": "d4e5f6a7-0003-0000-0000-000000000004", + "id": "3", "metadata": {}, "source": [ "## Connect to CoordiNode\n", @@ -130,78 +140,54 @@ { "cell_type": "code", "execution_count": null, - "id": "d4e5f6a7-0003-0000-0000-000000000005", + "id": "4", "metadata": {}, "outputs": [], "source": [ - "import os, socket\n", - "\n", - "\n", - "def _port_open(port):\n", - " try:\n", - " with socket.create_connection((\"127.0.0.1\", port), timeout=1):\n", - " return True\n", - " except OSError:\n", - " return False\n", - "\n", - "\n", - "_use_embedded = True\n", + "import os, tempfile\n", + "\n", + "# Persistent embedded DB path. Colab has /content which persists across cell\n", + "# reruns within a runtime session; locally fall back to the OS temp dir\n", + "# (portable across Linux/macOS/Windows). Override via COORDINODE_EMBEDDED_PATH.\n", + "COORDINODE_EMBEDDED_PATH = os.environ.get(\n", + " \"COORDINODE_EMBEDDED_PATH\",\n", + " \"/content/coordinode-demo.db\"\n", + " if os.path.isdir(\"/content\")\n", + " else os.path.join(tempfile.gettempdir(), \"coordinode-demo.db\"),\n", + ")\n", "\n", "if os.environ.get(\"COORDINODE_ADDR\"):\n", - " # Explicit address — fail hard if health check fails.\n", " COORDINODE_ADDR = os.environ[\"COORDINODE_ADDR\"]\n", " from coordinode import CoordinodeClient\n", "\n", " client = CoordinodeClient(COORDINODE_ADDR)\n", " if not client.health():\n", " client.close()\n", - " raise RuntimeError(f\"CoordiNode at {COORDINODE_ADDR} is not serving health checks\")\n", + " raise RuntimeError(f\"Health check failed for {COORDINODE_ADDR}\")\n", " print(f\"Connected to {COORDINODE_ADDR}\")\n", - " _use_embedded = False\n", "else:\n", - " try:\n", - " grpc_port = int(os.environ.get(\"COORDINODE_PORT\", \"7080\"))\n", - " except ValueError as exc:\n", - " raise RuntimeError(\"COORDINODE_PORT must be an integer\") from exc\n", - "\n", - " if _port_open(grpc_port):\n", - " COORDINODE_ADDR = f\"localhost:{grpc_port}\"\n", - " from coordinode import CoordinodeClient\n", - "\n", - " client = CoordinodeClient(COORDINODE_ADDR)\n", - " if client.health():\n", - " print(f\"Connected to {COORDINODE_ADDR}\")\n", - " _use_embedded = False\n", - " else:\n", - " # Port is open but not a CoordiNode server — fall through to embedded.\n", - " client.close()\n", - "\n", - "if _use_embedded:\n", - " # No server available — use the embedded in-process engine.\n", - " # Works without Docker or any external service; data is in-memory.\n", + " # No explicit server \u2014 use the embedded in-process engine backed by a file\n", + " # so the graph persists across cell reruns and between sibling demo\n", + " # notebooks within the same runtime.\n", " try:\n", " from coordinode_embedded import LocalClient\n", " except ImportError as exc:\n", - " # _EMBEDDED_PIP_SPEC is defined in the install cell; fall back to the\n", - " # pinned spec so this cell remains runnable if executed standalone.\n", - " _pip_spec = globals().get(\n", - " \"_EMBEDDED_PIP_SPEC\",\n", - " \"git+https://github.com/structured-world/coordinode-python.git\"\n", - " \"@8da94d694ecaabee6f8380147d02f08220061bfa#subdirectory=coordinode-embedded\",\n", - " )\n", " raise RuntimeError(\n", " \"coordinode-embedded is not installed. \"\n", - " f\"Run: pip install {_pip_spec}\"\n", - " \" — or start a CoordiNode server and set COORDINODE_ADDR.\"\n", + " \"In Colab, rerun the install cell above. \"\n", + " \"Locally, install from source: \"\n", + " \"pip install 'git+https://github.com/structured-world/coordinode-python.git#subdirectory=coordinode-embedded' \"\n", + " \"(requires Rust toolchain, ~5 min build). \"\n", + " \"Alternatively start a CoordiNode server and set COORDINODE_ADDR.\"\n", " ) from exc\n", "\n", - " client = LocalClient(\":memory:\")\n", - " print(\"Using embedded LocalClient (in-process)\")\n" + " client = LocalClient(COORDINODE_EMBEDDED_PATH)\n", + " print(f\"Using embedded LocalClient at {COORDINODE_EMBEDDED_PATH}\")\n" ] }, { "cell_type": "markdown", - "id": "d4e5f6a7-0003-0000-0000-000000000006", + "id": "5", "metadata": {}, "source": [ "## 1. Define LangChain tools\n", @@ -213,7 +199,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d4e5f6a7-0003-0000-0000-000000000007", + "id": "6", "metadata": {}, "outputs": [], "source": [ @@ -245,7 +231,7 @@ "\n", "@tool\n", "def save_fact(subject: str, relation: str, obj: str) -> str:\n", - " \"\"\"Save a fact (subject → relation → object) into the knowledge graph.\n", + " \"\"\"Save a fact (subject \u2192 relation \u2192 object) into the knowledge graph.\n", " Example: save_fact('Alice', 'WORKS_AT', 'Acme Corp')\"\"\"\n", " rel_type = relation.upper().replace(\" \", \"_\")\n", " # Validate rel_type before interpolating into Cypher to prevent injection.\n", @@ -278,7 +264,7 @@ " _LIMIT_AT_END_RE = re.compile(r\"\\bLIMIT\\s+(\\d+)\\s*;?\\s*$\", re.IGNORECASE | re.DOTALL)\n", " def _cap_limit(m):\n", " return f\"LIMIT {min(int(m.group(1)), 20)}\"\n", - " # Reject queries with parameters other than $sess — only {\"sess\": SESSION} is passed.\n", + " # Reject queries with parameters other than $sess \u2014 only {\"sess\": SESSION} is passed.\n", " extra_params = sorted(\n", " {m.group(1) for m in re.finditer(r\"\\$([A-Za-z_][A-Za-z0-9_]*)\", q)} - {\"sess\"}\n", " )\n", @@ -302,7 +288,7 @@ " safe_depth = max(1, min(int(depth), 3))\n", " # Note: session constraint is on both endpoints (n, m). Constraining\n", " # intermediate nodes via path variables (MATCH p=..., WHERE ALL(x IN nodes(p)...))\n", - " # is not yet supported by CoordiNode — planned for a future release.\n", + " # is not yet supported by CoordiNode \u2014 planned for a future release.\n", " # In practice, session isolation holds because all nodes are MERGE'd with\n", " # their session scope, so cross-session paths cannot form.\n", " rows = client.cypher(\n", @@ -335,10 +321,10 @@ }, { "cell_type": "markdown", - "id": "d4e5f6a7-0003-0000-0000-000000000008", + "id": "7", "metadata": {}, "source": [ - "## 2. Mock demo — no LLM required (direct tool calls)\n", + "## 2. Mock demo \u2014 no LLM required (direct tool calls)\n", "\n", "Shows the full graph memory workflow by calling the tools directly." ] @@ -346,7 +332,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d4e5f6a7-0003-0000-0000-000000000009", + "id": "8", "metadata": {}, "outputs": [], "source": [ @@ -381,10 +367,10 @@ }, { "cell_type": "markdown", - "id": "d4e5f6a7-0003-0000-0000-000000000010", + "id": "9", "metadata": {}, "source": [ - "## 3. LangGraph StateGraph — manual workflow\n", + "## 3. LangGraph StateGraph \u2014 manual workflow\n", "\n", "Shows how to wire CoordiNode tool calls into a LangGraph state machine without an LLM." ] @@ -392,7 +378,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d4e5f6a7-0003-0000-0000-000000000011", + "id": "10", "metadata": {}, "outputs": [], "source": [ @@ -440,10 +426,10 @@ }, { "cell_type": "markdown", - "id": "d4e5f6a7-0003-0000-0000-000000000012", + "id": "11", "metadata": {}, "source": [ - "## 4. LangGraph ReAct agent (optional — requires OPENAI_API_KEY)\n", + "## 4. LangGraph ReAct agent (optional \u2014 requires OPENAI_API_KEY)\n", "\n", "> Set `OPENAI_API_KEY` in your environment or via\n", "> `os.environ['OPENAI_API_KEY'] = 'sk-...'` before running.\n", @@ -453,12 +439,12 @@ { "cell_type": "code", "execution_count": null, - "id": "d4e5f6a7-0003-0000-0000-000000000013", + "id": "12", "metadata": {}, "outputs": [], "source": [ "if not os.environ.get(\"OPENAI_API_KEY\"):\n", - " print(\"OPENAI_API_KEY not set — skipping LLM agent. See section 2 for the mock demo.\")\n", + " print(\"OPENAI_API_KEY not set \u2014 skipping LLM agent. See section 2 for the mock demo.\")\n", "else:\n", " from langchain_openai import ChatOpenAI\n", " from langgraph.prebuilt import create_react_agent\n", @@ -483,7 +469,7 @@ }, { "cell_type": "markdown", - "id": "d4e5f6a7-0003-0000-0000-000000000014", + "id": "13", "metadata": {}, "source": [ "## Cleanup" @@ -492,7 +478,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d4e5f6a7-0003-0000-0000-000000000015", + "id": "14", "metadata": {}, "outputs": [], "source": [ diff --git a/docker-compose.yml b/docker-compose.yml index 795b849..a81972a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -7,7 +7,7 @@ version: "3.9" services: coordinode: - image: ghcr.io/structured-world/coordinode:0.3.17 + image: ghcr.io/structured-world/coordinode:0.4.1 container_name: coordinode ports: - "7080:7080" # gRPC diff --git a/proto b/proto index e1ab91d..eb472a4 160000 --- a/proto +++ b/proto @@ -1 +1 @@ -Subproject commit e1ab91d180c4b1f61106a09e4b56da6a9736ebdb +Subproject commit eb472a4c6a6069ee4167061442402ceffe42e0cc diff --git a/tests/integration/test_sdk.py b/tests/integration/test_sdk.py index 68f69eb..3cf00df 100644 --- a/tests/integration/test_sdk.py +++ b/tests/integration/test_sdk.py @@ -18,7 +18,6 @@ AsyncCoordinodeClient, CoordinodeClient, EdgeTypeInfo, - HybridResult, LabelInfo, TextIndexInfo, TextResult, @@ -696,43 +695,47 @@ def test_text_search_fuzzy(client): client.cypher(f"MATCH (n:{label} {{tag: $tag}}) DELETE n", params={"tag": tag}) -@_fts -def test_hybrid_text_vector_search_returns_results(client): - """hybrid_text_vector_search() returns HybridResult list with RRF scores.""" - label = f"FtsHybridTest_{uid()}" +def test_cypher_accepts_consistency_kwargs(client): + """cypher() wires read_concern / write_concern / read_preference / after_index into the request.""" + label = f"ConsistencyTest_{uid()}" tag = uid() - idx_name = f"idx_{label.lower()}" - vec = [float(i) / 16 for i in range(16)] - # Same node-as-int pattern: RETURN n → Value::Int(node_id) in CoordiNode executor. - rows = client.cypher( - f"CREATE (n:{label} {{tag: $tag, body: 'graph neural network embedding', embedding: $vec}}) RETURN n AS node_id", - params={"tag": tag, "vec": vec}, - ) - seed_id = rows[0]["node_id"] - idx_created = False try: - client.create_text_index(idx_name, label, "body") - idx_created = True - results = client.hybrid_text_vector_search( - label, - "graph neural", - vec, - limit=5, + client.cypher( + f"CREATE (n:{label} {{tag: $tag, v: 1}})", + params={"tag": tag}, + write_concern="majority", ) - assert isinstance(results, list) - if not results: - pytest.xfail("hybrid_text_vector_search returned no results — vector index not available on this server") - assert any(r.node_id == seed_id for r in results), ( - f"seeded node {seed_id} not found in hybrid_text_vector_search results: {results}" + rows = client.cypher( + f"MATCH (n:{label} {{tag: $tag}}) RETURN n.v AS v", + params={"tag": tag}, + read_concern="majority", + read_preference="primary", + after_index=0, ) - r = results[0] - assert isinstance(r, HybridResult) - assert isinstance(r.node_id, int) - assert isinstance(r.score, float) - assert r.score > 0 + assert rows and rows[0]["v"] == 1 finally: - try: - if idx_created: - client.drop_text_index(idx_name) - finally: - client.cypher(f"MATCH (n:{label} {{tag: $tag}}) DETACH DELETE n", params={"tag": tag}) + # DELETE is a no-op when no nodes match — safe to run unconditionally. + client.cypher(f"MATCH (n:{label} {{tag: $tag}}) DELETE n", params={"tag": tag}) + + +def test_cypher_rejects_invalid_consistency_values(client): + """Invalid consistency kwargs raise ValueError before the RPC.""" + with pytest.raises(ValueError, match="invalid read_concern"): + client.cypher("RETURN 1", read_concern="strong") + with pytest.raises(ValueError, match="invalid write_concern"): + client.cypher("RETURN 1", write_concern="w9") + with pytest.raises(ValueError, match="invalid read_preference"): + client.cypher("RETURN 1", read_preference="leader") + with pytest.raises(ValueError, match="after_index must be a non-negative integer"): + client.cypher("RETURN 1", after_index=-1) + # Causal reads (after_index > 0) require write_concern='majority'. + with pytest.raises(ValueError, match="after_index > 0 requires write_concern='majority'"): + client.cypher("RETURN 1", after_index=42) + with pytest.raises(ValueError, match="after_index > 0 requires write_concern='majority'"): + client.cypher("RETURN 1", after_index=42, write_concern="w1") + # Type validation runs before the causal-read check so bools/strings + # surface the non-negative-integer error rather than a misleading one. + with pytest.raises(ValueError, match="after_index must be a non-negative integer"): + client.cypher("RETURN 1", after_index=True) + with pytest.raises(ValueError, match="after_index must be a non-negative integer"): + client.cypher("RETURN 1", after_index="7") # type: ignore[arg-type] diff --git a/tests/unit/test_consistency_helpers.py b/tests/unit/test_consistency_helpers.py new file mode 100644 index 0000000..c987fc0 --- /dev/null +++ b/tests/unit/test_consistency_helpers.py @@ -0,0 +1,92 @@ +"""Unit tests for consistency-parameter helpers in coordinode.client.""" + +from __future__ import annotations + +import pytest + +# Proto stubs are generated by `make proto` and gitignored; skip the whole module +# when running on a fresh checkout that has not regenerated them yet. +pb = pytest.importorskip("coordinode._proto.coordinode.v1.replication.consistency_pb2") + +from coordinode.client import ( # noqa: E402 — guarded import above + _make_read_concern, + _make_read_preference, + _make_write_concern, +) + + +class TestReadConcern: + def test_level_only(self) -> None: + rc = _make_read_concern("majority", None) + assert rc.level == pb.READ_CONCERN_LEVEL_MAJORITY + assert rc.after_index == 0 + + def test_after_index_only(self) -> None: + rc = _make_read_concern(None, 42) + assert rc.after_index == 42 + + def test_level_and_after_index(self) -> None: + rc = _make_read_concern("linearizable", 7) + assert rc.level == pb.READ_CONCERN_LEVEL_LINEARIZABLE + assert rc.after_index == 7 + + def test_case_insensitive(self) -> None: + assert _make_read_concern("MAJORITY", None).level == pb.READ_CONCERN_LEVEL_MAJORITY + + def test_invalid_level_raises(self) -> None: + with pytest.raises(ValueError, match="invalid read_concern"): + _make_read_concern("strong", None) + + @pytest.mark.parametrize("bad", ["", " ", 5, True]) + def test_rejects_blank_or_non_string_level(self, bad: object) -> None: + with pytest.raises(ValueError, match="read_concern must be a non-empty string"): + _make_read_concern(bad, None) # type: ignore[arg-type] + + @pytest.mark.parametrize("bad", [True, False, -1, 1.5, "7"]) + def test_rejects_bool_negative_non_int_after_index(self, bad: object) -> None: + with pytest.raises(ValueError, match="after_index must be a non-negative integer"): + _make_read_concern(None, bad) # type: ignore[arg-type] + + +class TestWriteConcern: + @pytest.mark.parametrize( + ("level", "expected"), + [ + ("w0", pb.WRITE_CONCERN_LEVEL_W0), + ("w1", pb.WRITE_CONCERN_LEVEL_W1), + ("majority", pb.WRITE_CONCERN_LEVEL_MAJORITY), + ], + ) + def test_valid_levels(self, level: str, expected: int) -> None: + assert _make_write_concern(level).level == expected + + def test_invalid_raises(self) -> None: + with pytest.raises(ValueError, match="invalid write_concern"): + _make_write_concern("w9") + + @pytest.mark.parametrize("bad", ["", " ", None, 1]) + def test_rejects_blank_or_non_string(self, bad: object) -> None: + with pytest.raises(ValueError, match="write_concern must be a non-empty string"): + _make_write_concern(bad) # type: ignore[arg-type] + + +class TestReadPreference: + @pytest.mark.parametrize( + ("pref", "expected"), + [ + ("primary", pb.READ_PREFERENCE_PRIMARY), + ("secondary_preferred", pb.READ_PREFERENCE_SECONDARY_PREFERRED), + ("nearest", pb.READ_PREFERENCE_NEAREST), + ], + ) + def test_valid(self, pref: str, expected: int) -> None: + assert _make_read_preference(pref) == expected + + def test_invalid_raises(self) -> None: + with pytest.raises(ValueError, match="invalid read_preference"): + _make_read_preference("leader") + + @pytest.mark.parametrize("bad", ["", " ", None, 0]) + def test_rejects_blank_or_non_string(self, bad: object) -> None: + with pytest.raises(ValueError, match="read_preference must be a non-empty string"): + _make_read_preference(bad) # type: ignore[arg-type]