Skip to content

SDK Types

The SDK provides three canonical types for connector output.

EntityInput

Document-level metadata for items being indexed.

@dataclass
class EntityInput:
# === REQUIRED ===
source: str
"""Must match connector.source_name."""
source_id: str
"""
Stable identifier within source.
CRITICAL: Must NOT change when content is edited.
Good: page UUID, file path, URL
Bad: content hash, random UUID
"""
entity_type: str
"""
Type classification.
Standard: 'document', 'note', 'page', 'bookmark', 'email'
Custom types allowed (e.g., 'jira_ticket').
"""
title: str
"""Human-readable title. Indexed in FTS."""
# === OPTIONAL ===
uri: Optional[str] = None
"""
Canonical link to original.
Examples:
- file:///path/to/file.md
- obsidian://open?vault=notes&file=page
- https://notion.so/page-id
"""
mime_type: Optional[str] = None
"""MIME type: 'text/markdown', 'text/html', etc."""
tags: Optional[List[str]] = None
"""List of tags/labels. Indexed for search."""
metadata: Optional[Dict[str, Any]] = None
"""
Source-specific fields stored as JSON.
Examples:
- {"database_id": "...", "properties": {...}}
- {"from": "...", "thread_id": "..."}
"""
sensitivity: str = "normal"
"""
Access control: 'normal', 'sensitive', or 'secret'.
Both 'sensitive' and 'secret' require the 'sensitive' scope for MCP access.
"""
content_hash: Optional[str] = None
"""
SHA256[:32] of full content for change detection.
Use hoard.sdk.compute_content_hash()
"""
created_at: Optional[datetime] = None
"""Original creation timestamp."""
updated_at: Optional[datetime] = None
"""Last modification. Used for recency ranking."""
connector_name: Optional[str] = None
"""Set automatically by Hoard."""
connector_version: Optional[str] = None
"""Set automatically by Hoard."""

Example

entity = EntityInput(
source="my_source",
source_id="doc-abc-123", # Stable ID!
entity_type="document",
title="Project Notes",
uri="file:///path/to/notes.md",
mime_type="text/markdown",
tags=["project", "planning"],
metadata={"author": "Alice"},
updated_at=datetime.now(),
content_hash=compute_content_hash(content),
)

ChunkInput

Text spans within an entity.

@dataclass
class ChunkInput:
# === REQUIRED ===
content: str
"""
Chunk text.
Target: 200-500 tokens for optimal:
- Embedding quality
- Citation precision
- Context efficiency
"""
# === OPTIONAL ===
chunk_type: str = "semantic"
"""How chunk was created. Default: 'semantic'"""
char_offset_start: Optional[int] = None
"""Character offset in original content."""
char_offset_end: Optional[int] = None
"""End character offset."""

Example

chunks = [
ChunkInput(
content="The project aims to improve...",
char_offset_start=0,
char_offset_end=450,
),
ChunkInput(
content="Key milestones include...",
char_offset_start=400, # Overlap!
char_offset_end=850,
),
]

DiscoverResult

Result of discover() validation.

@dataclass
class DiscoverResult:
success: bool
"""Whether connector is ready to sync."""
message: str = ""
"""
Human-readable status.
On success: "Found 150 pages"
On failure: "Export not found at /path"
"""
entity_count_estimate: Optional[int] = None
"""Approximate entity count for progress indication."""
source_info: Optional[Dict[str, Any]] = None
"""
Source diagnostics.
Examples:
- {"export_date": "2026-02-01"}
- {"api_version": "2.1"}
"""

Examples

Success:

return DiscoverResult(
success=True,
message="Found 150 pages in export",
entity_count_estimate=150,
source_info={"export_date": "2026-02-01"},
)

Failure:

return DiscoverResult(
success=False,
message="Export not found: /path/to/export.zip",
)

source_id Best Practices

Good source_ids

# Page UUID from export
source_id = page["id"] # "abc-123-def"
# File path
source_id = str(file_path.absolute()) # "/Users/me/notes.md"
# URL
source_id = bookmark["url"] # "https://example.com"
# Database record ID
source_id = f"{table}:{row_id}" # "tasks:42"

Bad source_ids

# Content hash — changes on every edit!
source_id = hashlib.sha256(content.encode()).hexdigest()
# Random UUID — creates duplicates!
source_id = str(uuid.uuid4())
# Timestamp — not stable!
source_id = str(datetime.now())

Next Steps