CtrlK
BlogDocsLog inGet started
Tessl Logo

jobe-skills/ebook-library

Use when the user asks about a Calibre ebook library or book collection: find books, search metadata or full text in EPUB/AZW3 files, or locate book paths via Calibre databases.

100

1.35x

Quality

100%

Does it follow best practices?

Impact

100%

1.35x

Average score across 3 eval scenarios

Overview
Skills
Evals
Files

search_content.pyscripts/

#!/usr/bin/env python3
"""
Search book content using Calibre's full-text-search.db.

Note: Calibre's FTS5 tables use a custom tokenizer that isn't available outside
Calibre, so we query the underlying books_text table directly with LIKE.
"""
import sqlite3
import json
import argparse
import os
import sys

from calibre_utils import build_excerpt, choose_preferred_format, normalize_format


def _load_book_metadata(meta_conn, book_id):
    meta_cur = meta_conn.cursor()
    meta_cur.execute(
        """
        SELECT b.title, group_concat(a.name, ', ')
        FROM books b
        LEFT JOIN books_authors_link bal ON bal.book = b.id
        LEFT JOIN authors a ON a.id = bal.author
        WHERE b.id = ?
        GROUP BY b.id
        """,
        (book_id,),
    )
    meta_row = meta_cur.fetchone()
    if not meta_row:
        return None
    return {"title": meta_row[0], "authors": meta_row[1]}


def _search_single_book(fts_conn, meta_conn, book_id, query, limit, context_chars, format_filter):
    metadata = _load_book_metadata(meta_conn, book_id)
    if not metadata:
        print(json.dumps({"error": f"Book {book_id} not found"}))
        return 1

    fts_cur = fts_conn.cursor()
    if format_filter:
        fts_cur.execute(
            """
            SELECT format, searchable_text
            FROM books_text
            WHERE book = ? AND upper(format) = ?
            ORDER BY format
            """,
            (book_id, normalize_format(format_filter)),
        )
    else:
        fts_cur.execute(
            """
            SELECT format, searchable_text
            FROM books_text
            WHERE book = ?
            ORDER BY format
            """,
            (book_id,),
        )

    rows = fts_cur.fetchall()
    if not rows:
        print(json.dumps({"error": f"No text found for book {book_id}"}))
        return 1

    selected_format = choose_preferred_format([row[0] for row in rows], format_filter)
    searchable_text = next(text for fmt, text in rows if fmt == selected_format)
    lower_text = searchable_text.lower()
    lower_query = query.lower()

    results = []
    start = 0
    while len(results) < limit:
        pos = lower_text.find(lower_query, start)
        if pos == -1:
            break
        results.append(
            {
                "book_id": book_id,
                "title": metadata["title"],
                "authors": metadata["authors"],
                "format": selected_format,
                "snippet": build_excerpt(searchable_text, pos, context_chars),
            }
        )
        start = pos + 1

    print(json.dumps(results, indent=2, ensure_ascii=False))
    return 0


def search_content(
    fts_db,
    metadata_db,
    query,
    limit=20,
    context_chars=300,
    book_id=None,
    format_filter=None,
):
    if not os.path.exists(fts_db):
        print(json.dumps({"error": f"FTS DB not found: {fts_db}"}))
        return 2
    if not os.path.exists(metadata_db):
        print(json.dumps({"error": f"Metadata DB not found: {metadata_db}"}))
        return 2
    if not query:
        print(json.dumps({"error": "Query must not be empty"}))
        return 1

    fts_conn = sqlite3.connect(fts_db)
    meta_conn = sqlite3.connect(metadata_db)

    try:
        if book_id is not None:
            return _search_single_book(
                fts_conn,
                meta_conn,
                book_id,
                query,
                limit,
                context_chars,
                format_filter,
            )

        pattern = f"%{query}%"
        fts_cur = fts_conn.cursor()

        if format_filter:
            fts_cur.execute("""
                SELECT book, format, instr(lower(searchable_text), lower(?)) as pos
                FROM books_text
                WHERE lower(searchable_text) LIKE lower(?) AND upper(format) = ?
                ORDER BY book
                LIMIT ?
            """, (query, pattern, normalize_format(format_filter), limit * 3))
        else:
            fts_cur.execute("""
                SELECT DISTINCT book, format, instr(lower(searchable_text), lower(?)) as pos
                FROM books_text
                WHERE lower(searchable_text) LIKE lower(?)
                ORDER BY book,
                    CASE upper(format)
                        WHEN 'EPUB' THEN 0
                        WHEN 'AZW3' THEN 1
                        WHEN 'KFX' THEN 2
                        WHEN 'MOBI' THEN 3
                        WHEN 'PDF' THEN 4
                        ELSE 5
                    END,
                    format
                LIMIT ?
            """, (query, pattern, limit * 3))

        seen_books = set()
        results = []

        for row in fts_cur.fetchall():
            bid, fmt, pos = row
            if bid in seen_books:
                continue
            seen_books.add(bid)

            # Get book metadata
            metadata = _load_book_metadata(meta_conn, bid)
            title = metadata["title"] if metadata else f"Book {bid}"
            authors = metadata["authors"] if metadata else None

            # Get context snippet centered on the match
            start = max(1, pos - context_chars // 2)
            fts_cur.execute("""
                SELECT substr(searchable_text, ?, ?)
                FROM books_text WHERE book = ? AND format = ?
            """, (start, context_chars, bid, fmt))
            snippet_row = fts_cur.fetchone()
            snippet = snippet_row[0].strip() if snippet_row and snippet_row[0] else ""
            
            results.append({
                "book_id": bid,
                "title": title,
                "authors": authors,
                "format": fmt,
                "snippet": snippet
            })
            
            if len(results) >= limit:
                break
        
        print(json.dumps(results, indent=2, ensure_ascii=False))
        return 0
        
    except Exception as e:
        print(json.dumps({"error": str(e)}))
        return 3

if __name__ == '__main__':
    p = argparse.ArgumentParser(description='Search book content via Calibre FTS')
    p.add_argument('--fts-db', required=True, help='Path to full-text-search.db')
    p.add_argument('--metadata-db', required=True, help='Path to metadata.db')
    p.add_argument('--query', required=True, help='Search term')
    p.add_argument('--book-id', type=int, help='Limit search to a specific book ID (faster)')
    p.add_argument('--format', help='Prefer or restrict to a specific format (for example EPUB)')
    p.add_argument('--limit', type=int, default=10, help='Max results (default: 10)')
    p.add_argument('--context', type=int, default=300, help='Context chars around match (default: 300)')
    args = p.parse_args()
    sys.exit(
        search_content(
            args.fts_db,
            args.metadata_db,
            args.query,
            args.limit,
            args.context,
            args.book_id,
            args.format,
        )
    )

Install with Tessl CLI

npx tessl i jobe-skills/ebook-library

SKILL.md

tile.json