Airbyte source connector for extracting data from Microsoft OneDrive cloud storage with OAuth authentication and file-based streaming capabilities.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
An Airbyte source connector that enables data extraction and synchronization from Microsoft OneDrive cloud storage. Built on the Airbyte CDK file-based framework with OAuth 2.0 authentication integration, automated file discovery, and comprehensive configuration management for enterprise data integration workflows.
pip install source-microsoft-onedrivefrom source_microsoft_onedrive import SourceMicrosoftOneDriveFor CLI usage:
from source_microsoft_onedrive.run import runInternal imports (for advanced usage):
from source_microsoft_onedrive.spec import SourceMicrosoftOneDriveSpec
from source_microsoft_onedrive.stream_reader import SourceMicrosoftOneDriveStreamReader, SourceMicrosoftOneDriveClientfrom source_microsoft_onedrive import SourceMicrosoftOneDrive
from airbyte_cdk import launch
# Configuration with OAuth credentials
config = {
"credentials": {
"auth_type": "Client",
"tenant_id": "your-tenant-id",
"client_id": "your-client-id",
"client_secret": "your-client-secret",
"refresh_token": "your-refresh-token"
},
"drive_name": "OneDrive",
"search_scope": "ALL",
"folder_path": ".",
"streams": [{
"name": "files",
"globs": ["*.csv", "*.json"],
"validation_policy": "Emit Record",
"format": {"filetype": "csv"}
}]
}
# Initialize and run connector
source = SourceMicrosoftOneDrive(None, config, None)
launch(source, ["read", "--config", "config.json", "--catalog", "catalog.json"])# Install via poetry
poetry install
# Run connector commands
source-microsoft-onedrive spec
source-microsoft-onedrive check --config config.json
source-microsoft-onedrive discover --config config.json
source-microsoft-onedrive read --config config.json --catalog catalog.jsonThe connector is built on Airbyte's file-based framework with these key components:
The connector supports both OAuth (user delegation) and service principal authentication, can search across accessible drives and shared items, handles nested folder structures, and integrates with smart-open for efficient file reading across various formats.
Core Airbyte source connector functionality including specification generation, configuration validation, stream discovery, and data reading with OAuth authentication support.
class SourceMicrosoftOneDrive(FileBasedSource):
def __init__(self, catalog: Optional[ConfiguredAirbyteCatalog], config: Optional[Mapping[str, Any]], state: Optional[TState]): ...
def spec(self, *args: Any, **kwargs: Any) -> ConnectorSpecification: ...Comprehensive configuration models supporting OAuth and service authentication with validation, schema generation, and documentation URL management.
class SourceMicrosoftOneDriveSpec(AbstractFileBasedSpec, BaseModel):
credentials: Union[OAuthCredentials, ServiceCredentials]
drive_name: Optional[str]
search_scope: str
folder_path: str
@classmethod
def documentation_url(cls) -> str: ...
@classmethod
def schema(cls, *args: Any, **kwargs: Any) -> Dict[str, Any]: ...File discovery, enumeration, and reading capabilities across OneDrive drives and shared items with glob pattern matching and metadata extraction.
class SourceMicrosoftOneDriveStreamReader(AbstractFileBasedStreamReader):
def get_matching_files(self, globs: List[str], prefix: Optional[str], logger: logging.Logger) -> Iterable[RemoteFile]: ...
def open_file(self, file: RemoteFile, mode: FileReadMode, encoding: Optional[str], logger: logging.Logger) -> IOBase: ...
def get_all_files(self): ...Microsoft Graph API authentication using MSAL with support for OAuth refresh tokens and service principal credentials.
class SourceMicrosoftOneDriveClient:
def __init__(self, config: SourceMicrosoftOneDriveSpec): ...
@property
def client(self): ...
def _get_access_token(self): ...def run():
"""Main CLI entry point that processes command-line arguments and launches the connector."""from typing import Any, Dict, List, Mapping, Optional, Union, Iterable
from datetime import datetime
from io import IOBase
# Airbyte CDK imports
from airbyte_cdk import ConfiguredAirbyteCatalog, ConnectorSpecification, TState
from airbyte_cdk.sources.file_based.file_based_source import FileBasedSource
from airbyte_cdk.sources.file_based.stream.cursor.default_file_based_cursor import DefaultFileBasedCursor
from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec
# Pydantic for configuration models
from pydantic import BaseModel, Field
# Microsoft authentication
from msal import ConfidentialClientApplication
from office365.graph_client import GraphClient
# Additional imports for error handling and web requests
from airbyte_cdk import AirbyteTracedException, FailureType
import requests
import smart_open
import loggingInstall with Tessl CLI
npx tessl i tessl/pypi-source-microsoft-onedrive