ConnectorV1 Interface
The ConnectorV1 class is the base interface all connectors must implement.
Full Interface
from abc import ABC, abstractmethodfrom typing import Iterator, Tuple, List, Set, Optional
class ConnectorV1(ABC): """ Base class for all Hoard connectors.
Connectors must implement: - name, version, source_name (properties) - discover() (validation) - scan() (full sync)
Connectors may implement: - capabilities (property) - cleanup() (post-sync cleanup) """
# ========================================================================= # REQUIRED PROPERTIES # =========================================================================
@property @abstractmethod def name(self) -> str: """ Unique connector identifier.
Convention: lowercase, underscores (e.g., 'notion_export') """
@property @abstractmethod def version(self) -> str: """ Semantic version string.
Format: MAJOR.MINOR.PATCH (e.g., '1.0.0') """
@property @abstractmethod def source_name(self) -> str: """ Source identifier used in entities.
Groups entities from the same logical source. Convention: lowercase, no underscores (e.g., 'notion') """
# ========================================================================= # OPTIONAL PROPERTIES # =========================================================================
@property def capabilities(self) -> Set[str]: """ Declare connector capabilities.
Default: {"scan"} """ return {"scan"}
# ========================================================================= # REQUIRED METHODS # =========================================================================
@abstractmethod def discover(self, config: dict) -> DiscoverResult: """ Validate configuration and check source accessibility.
Called before each sync. Should be fast. """
@abstractmethod def scan(self, config: dict) -> Iterator[Tuple[EntityInput, List[ChunkInput]]]: """ Full scan of the source.
Yields all entities and their chunks. """
# ========================================================================= # OPTIONAL METHODS # =========================================================================
def cleanup(self) -> None: """Called after sync. Release resources.""" passRequired Properties
name
Unique connector identifier used for:
- Configuration:
connectors.{name}.enabled - Provenance tracking
- Status display
@propertydef name(self) -> str: return "notion_export"version
Semantic version stored with every entity for provenance.
@propertydef version(self) -> str: return "1.0.0"source_name
Source identifier for entities. Multiple connectors can share a source_name if they index the same source differently.
@propertydef source_name(self) -> str: return "notion" # Both notion_export and notion_live use thisRequired Methods
discover()
Validates configuration and checks source accessibility.
def discover(self, config: dict) -> DiscoverResult: path = Path(config.get("export_path", ""))
if not path.exists(): return DiscoverResult( success=False, message=f"Export not found: {path}" )
pages = list(path.glob("*.html")) return DiscoverResult( success=True, message=f"Found {len(pages)} pages", entity_count_estimate=len(pages) )Should check:
- Required config fields present
- Paths exist and are readable
- API credentials valid (for live connectors)
Should NOT:
- Download large amounts of data
- Make many API calls
- Take more than a few seconds
scan()
Full scan yielding all entities and chunks.
def scan(self, config: dict) -> Iterator[Tuple[EntityInput, List[ChunkInput]]]: export_path = Path(config["export_path"])
for html_file in export_path.rglob("*.html"): try: content = html_file.read_text() text = html_to_text(content)
entity = EntityInput( source=self.source_name, source_id=extract_page_id(html_file), entity_type="page", title=extract_title(content), content_hash=compute_content_hash(text), )
chunks = [ ChunkInput(content=c.text, char_offset_start=c.start, char_offset_end=c.end) for c in chunk_plain_text(text) ]
yield entity, chunks
except Exception as e: logger.warning(f"Skipping {html_file}: {e}") continueOptional Properties
capabilities
Declare what the connector can do:
@propertydef capabilities(self) -> Set[str]: return {"scan"}Default is {"scan"}.
Optional Methods
cleanup()
Release resources after sync:
def cleanup(self) -> None: if self.temp_dir: shutil.rmtree(self.temp_dir, ignore_errors=True)Lifecycle
1. LOAD ├── Read manifest.yaml ├── Validate config schema └── Import entry_point
2. DISCOVER ├── connector.discover(config) └── Return success/failure
3. SYNC ├── connector.scan(config) ├── Core ingests each (entity, chunks) └── Core tombstones unseen entities
4. CLEANUP └── connector.cleanup()