An Airbyte source connector for extracting data from Webflow CMS collections
npx @tessl/cli install tessl/pypi-airbyte-source-webflow@0.1.0An Airbyte source connector that extracts data from Webflow CMS collections. This connector enables data extraction from Webflow, a content management system for hosting websites, by dynamically discovering available collections and creating schemas based on Webflow's API field definitions.
pip install airbyte-source-webflow or poetry add airbyte-source-webflowsource-webflow (requires installation first)from source_webflow import SourceWebflowFor running the connector:
from source_webflow.run import runfrom source_webflow import SourceWebflow
import logging
# Configuration for the connector
config = {
"api_key": "your_webflow_api_token",
"site_id": "your_webflow_site_id",
"accept_version": "1.0.0" # Optional, no default in spec
}
# Create source instance
source = SourceWebflow()
# Check connection
logger = logging.getLogger(__name__)
is_connected, error = source.check_connection(logger, config)
if is_connected:
# Get available streams (collections)
streams = source.streams(config)
for stream in streams:
print(f"Available collection: {stream.name}")
else:
print(f"Connection failed: {error}")Command-line usage (Airbyte protocol):
# Install the connector first
pip install airbyte-source-webflow
# Run the connector with Airbyte protocol
source-webflow check --config config.json
source-webflow discover --config config.json
source-webflow read --config config.json --catalog catalog.jsonNote: This connector is designed for use within the Airbyte platform but can be run standalone for testing and development purposes.
The connector follows Airbyte's CDK (Connector Development Kit) patterns and implements a dynamic discovery approach:
The connector performs full-refresh synchronization, downloading all available data from selected collections on each run, as Webflow data volumes are typically small and incremental sync is not supported.
Main source connector class with configuration validation, connection testing, and stream discovery functionality.
class SourceWebflow(AbstractSource):
def check_connection(self, logger: logging.Logger, config: Mapping[str, Any]) -> Tuple[bool, any]: ...
def streams(self, config: Mapping[str, Any]) -> List[Stream]: ...
@staticmethod
def get_authenticator(config): ...Dynamic stream classes for handling Webflow collections, schemas, and data extraction with automatic pagination and type conversion.
class CollectionContents(WebflowStream):
def __init__(self, site_id: str = None, collection_id: str = None, collection_name: str = None, **kwargs): ...
def get_json_schema(self) -> Mapping[str, Any]: ...
class CollectionsList(WebflowStream):
def __init__(self, site_id: str = None, **kwargs): ...
class CollectionSchema(WebflowStream):
def __init__(self, collection_id: str = None, **kwargs): ...Token-based authentication with Webflow API version headers and configuration schema validation.
class WebflowTokenAuthenticator(WebflowAuthMixin, TokenAuthenticator): ...
class WebflowAuthMixin:
def __init__(self, *, accept_version_header: str = "accept-version", accept_version: str, **kwargs): ...
def get_auth_header(self) -> Mapping[str, Any]: ...Configuration parameters required by the connector:
# Required configuration parameters
config = {
"api_key": str, # Webflow API token (required, secret)
"site_id": str, # Webflow site identifier (required)
"accept_version": str # API version (optional, no default)
}Complete JSON schema specification for the connector configuration:
# Configuration schema from spec.yaml
SPEC = {
"documentationUrl": "https://docs.airbyte.com/integrations/sources/webflow",
"connectionSpecification": {
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Webflow Spec",
"type": "object",
"required": ["api_key", "site_id"],
"additionalProperties": True,
"properties": {
"site_id": {
"title": "Site id",
"type": "string",
"description": "The id of the Webflow site you are requesting data from. See https://developers.webflow.com/#sites",
"example": "a relatively long hex sequence",
"order": 0
},
"api_key": {
"title": "API token",
"type": "string",
"description": "The API token for authenticating to Webflow. See https://university.webflow.com/lesson/intro-to-the-webflow-api",
"example": "a very long hex sequence",
"order": 1,
"airbyte_secret": True
},
"accept_version": {
"title": "Accept Version",
"type": "string",
"description": "The version of the Webflow API to use. See https://developers.webflow.com/#versioning",
"example": "1.0.0",
"order": 2
}
}
}
}# Webflow API version constant
WEBFLOW_ACCEPT_VERSION = "1.0.0" # Default API version used by connectordef run():
"""Main entry point for the connector CLI."""Utilities for converting Webflow field types to Airbyte-compatible JSON schema types.
class WebflowToAirbyteMapping:
"""Utility class for mapping Webflow field types to JSON schema types."""
webflow_to_airbyte_mapping = {
"Bool": {"type": ["null", "boolean"]},
"Date": {"type": ["null", "string"], "format": "date-time"},
"Email": {"type": ["null", "string"]},
"ImageRef": {"type": ["null", "object"], "additionalProperties": True},
"ItemRef": {"type": ["null", "string"]},
"ItemRefSet": {"type": ["null", "array"]},
"Link": {"type": ["null", "string"]},
"Number": {"type": ["null", "number"]},
"Option": {"type": ["null", "string"]},
"PlainText": {"type": ["null", "string"]},
"RichText": {"type": ["null", "string"]},
"User": {"type": ["null", "string"]},
"Video": {"type": ["null", "string"]},
"FileRef": {"type": ["null", "object"]}
}# External types from airbyte_cdk
from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple
import logging
import requests
# Airbyte CDK base classes
class AbstractSource:
"""Base class for Airbyte source connectors."""
def check_connection(self, logger: logging.Logger, config: Mapping[str, Any]) -> Tuple[bool, Any]: ...
def streams(self, config: Mapping[str, Any]) -> List['Stream']: ...
class Stream:
"""Base class for data streams."""
def read_records(self, sync_mode: str) -> Iterable[Mapping]: ...
class HttpStream(Stream):
"""Base class for HTTP-based streams."""
url_base: str
def request_params(self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None) -> MutableMapping[str, Any]: ...
def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: ...
def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: ...
class TokenAuthenticator:
"""Base token authenticator from airbyte_cdk."""
def __init__(self, token: str, **kwargs): ...
def get_auth_header(self) -> Mapping[str, Any]: ...
# Configuration schema type
ConfigSpec = {
"api_key": str, # Webflow API token (required, secret)
"site_id": str, # Webflow site identifier (required)
"accept_version": str # API version (optional, no default)
}