Airbyte source connector for extracting data from Microsoft OneDrive cloud storage with OAuth authentication and file-based streaming capabilities.
npx @tessl/cli install tessl/pypi-source-microsoft-onedrive@0.2.0An Airbyte source connector that enables data extraction and synchronization from Microsoft OneDrive cloud storage. Built on the Airbyte CDK file-based framework with OAuth 2.0 authentication integration, automated file discovery, and comprehensive configuration management for enterprise data integration workflows.
pip install source-microsoft-onedrivefrom source_microsoft_onedrive import SourceMicrosoftOneDriveFor CLI usage:
from source_microsoft_onedrive.run import runInternal imports (for advanced usage):
from source_microsoft_onedrive.spec import SourceMicrosoftOneDriveSpec
from source_microsoft_onedrive.stream_reader import SourceMicrosoftOneDriveStreamReader, SourceMicrosoftOneDriveClientfrom source_microsoft_onedrive import SourceMicrosoftOneDrive
from airbyte_cdk import launch
# Configuration with OAuth credentials
config = {
"credentials": {
"auth_type": "Client",
"tenant_id": "your-tenant-id",
"client_id": "your-client-id",
"client_secret": "your-client-secret",
"refresh_token": "your-refresh-token"
},
"drive_name": "OneDrive",
"search_scope": "ALL",
"folder_path": ".",
"streams": [{
"name": "files",
"globs": ["*.csv", "*.json"],
"validation_policy": "Emit Record",
"format": {"filetype": "csv"}
}]
}
# Initialize and run connector
source = SourceMicrosoftOneDrive(None, config, None)
launch(source, ["read", "--config", "config.json", "--catalog", "catalog.json"])# Install via poetry
poetry install
# Run connector commands
source-microsoft-onedrive spec
source-microsoft-onedrive check --config config.json
source-microsoft-onedrive discover --config config.json
source-microsoft-onedrive read --config config.json --catalog catalog.jsonThe connector is built on Airbyte's file-based framework with these key components:
The connector supports both OAuth (user delegation) and service principal authentication, can search across accessible drives and shared items, handles nested folder structures, and integrates with smart-open for efficient file reading across various formats.
Core Airbyte source connector functionality including specification generation, configuration validation, stream discovery, and data reading with OAuth authentication support.
class SourceMicrosoftOneDrive(FileBasedSource):
def __init__(self, catalog: Optional[ConfiguredAirbyteCatalog], config: Optional[Mapping[str, Any]], state: Optional[TState]): ...
def spec(self, *args: Any, **kwargs: Any) -> ConnectorSpecification: ...Comprehensive configuration models supporting OAuth and service authentication with validation, schema generation, and documentation URL management.
class SourceMicrosoftOneDriveSpec(AbstractFileBasedSpec, BaseModel):
credentials: Union[OAuthCredentials, ServiceCredentials]
drive_name: Optional[str]
search_scope: str
folder_path: str
@classmethod
def documentation_url(cls) -> str: ...
@classmethod
def schema(cls, *args: Any, **kwargs: Any) -> Dict[str, Any]: ...File discovery, enumeration, and reading capabilities across OneDrive drives and shared items with glob pattern matching and metadata extraction.
class SourceMicrosoftOneDriveStreamReader(AbstractFileBasedStreamReader):
def get_matching_files(self, globs: List[str], prefix: Optional[str], logger: logging.Logger) -> Iterable[RemoteFile]: ...
def open_file(self, file: RemoteFile, mode: FileReadMode, encoding: Optional[str], logger: logging.Logger) -> IOBase: ...
def get_all_files(self): ...Microsoft Graph API authentication using MSAL with support for OAuth refresh tokens and service principal credentials.
class SourceMicrosoftOneDriveClient:
def __init__(self, config: SourceMicrosoftOneDriveSpec): ...
@property
def client(self): ...
def _get_access_token(self): ...def run():
"""Main CLI entry point that processes command-line arguments and launches the connector."""from typing import Any, Dict, List, Mapping, Optional, Union, Iterable
from datetime import datetime
from io import IOBase
# Airbyte CDK imports
from airbyte_cdk import ConfiguredAirbyteCatalog, ConnectorSpecification, TState
from airbyte_cdk.sources.file_based.file_based_source import FileBasedSource
from airbyte_cdk.sources.file_based.stream.cursor.default_file_based_cursor import DefaultFileBasedCursor
from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec
# Pydantic for configuration models
from pydantic import BaseModel, Field
# Microsoft authentication
from msal import ConfidentialClientApplication
from office365.graph_client import GraphClient
# Additional imports for error handling and web requests
from airbyte_cdk import AirbyteTracedException, FailureType
import requests
import smart_open
import logging