or run

tessl search

tessl/golang-cloud-google-com--go--bigquery

tessl install tessl/golang-cloud-google-com--go--bigquery@1.72.0

Google Cloud BigQuery client library providing comprehensive Go APIs for querying, loading data, managing datasets and tables, streaming inserts, and accessing BigQuery's ecosystem of services including Storage, Analytics Hub, Data Transfer, and Migration APIs

BigQuery Storage Read API

This document covers using the BigQuery Storage Read API for high-performance parallel reads of large datasets.

Overview

The BigQuery Storage Read API provides fast, parallel access to BigQuery table data using Apache Arrow or Avro formats. It's optimized for reading large amounts of data and can be significantly faster than the traditional query API.

Enabling Storage Read API

func (c *Client) EnableStorageReadClient(ctx context.Context, opts ...option.ClientOption) error

Enable Storage Read API for the client:

client, err := bigquery.NewClient(ctx, projectID)
if err != nil {
    return err
}

err = client.EnableStorageReadClient(ctx)
if err != nil {
    return err
}

Important limitations when Storage Read API is enabled:

PageInfo().Token pagination is not supported
RowIterator.StartIndex is not supported
Calling EnableStorageReadClient() twice returns an error

Reading with Storage API

Once enabled, table reads and query results automatically use the Storage Read API:

// Table read uses Storage API
it := table.Read(ctx)
for {
    var row []bigquery.Value
    err := it.Next(&row)
    if err == iterator.Done {
        break
    }
    if err != nil {
        return err
    }
    // Process row
}

// Query results use Storage API
q := client.Query("SELECT * FROM dataset.large_table")
it, err := q.Read(ctx)

Arrow Format

Arrow Iterator

type ArrowIterator interface {
    Next() (*ArrowRecordBatch, error)
    Schema() Schema
    SerializedArrowSchema() []byte
}

type ArrowRecordBatch struct {
    Data        []byte
    Schema      []byte
    PartitionID string
}

Get Arrow format data:

it, err := q.Read(ctx)
if err != nil {
    return err
}

arrowIt := it.ArrowIterator()
for {
    batch, err := arrowIt.Next()
    if err == iterator.Done {
        break
    }
    if err != nil {
        return err
    }

    // Process Arrow batch
    fmt.Printf("Partition: %s, Size: %d bytes\n",
        batch.PartitionID, len(batch.Data))
}

Arrow IO Reader

func NewArrowIteratorReader(it ArrowIterator) io.Reader

Convert to io.Reader for use with Arrow libraries:

arrowIt := it.ArrowIterator()
reader := bigquery.NewArrowIteratorReader(arrowIt)

// Use with Arrow libraries
// import "github.com/apache/arrow/go/v10/arrow/ipc"
// arrowReader, err := ipc.NewReader(reader)

Storage API Client Package

The cloud.google.com/go/bigquery/storage/apiv1 package provides direct access to the Storage Read API.

Client Creation

import "cloud.google.com/go/bigquery/storage/apiv1"
import "cloud.google.com/go/bigquery/storage/apiv1/storagepb"

client, err := storage.NewBigQueryReadClient(ctx)
if err != nil {
    return err
}
defer client.Close()

Creating Read Sessions

req := &storagepb.CreateReadSessionRequest{
    Parent: "projects/my-project",
    ReadSession: &storagepb.ReadSession{
        Table:      "projects/my-project/datasets/my_dataset/tables/my_table",
        DataFormat: storagepb.DataFormat_ARROW,
        ReadOptions: &storagepb.ReadSession_TableReadOptions{
            SelectedFields: []string{"field1", "field2", "field3"},
            RowRestriction: "field1 > 100",
        },
    },
    MaxStreamCount: 10,
}

session, err := client.CreateReadSession(ctx, req)

Reading Streams

for _, stream := range session.Streams {
    go func(streamName string) {
        req := &storagepb.ReadRowsRequest{
            ReadStream: streamName,
        }

        streamReader, err := client.ReadRows(ctx, req)
        if err != nil {
            return
        }

        for {
            resp, err := streamReader.Recv()
            if err == io.EOF {
                break
            }
            if err != nil {
                return
            }

            // Process Arrow batch
            arrowRecordBatch := resp.GetArrowRecordBatch()
            // ...
        }
    }(stream.Name)
}

Performance Considerations

The Storage Read API is best for:

Reading large amounts of data (>1 GB)
Parallel processing across multiple workers
Exporting data for analysis outside BigQuery
Integration with Apache Arrow ecosystem

Use traditional query API for:

Small result sets
Complex SQL transformations
Interactive queries
Queries requiring JOINs and aggregations

Complete Storage Read Example

package main

import (
    "context"
    "fmt"
    "log"

    "cloud.google.com/go/bigquery"
    "google.golang.org/api/iterator"
    "google.golang.org/api/option"
)

func main() {
    ctx := context.Background()

    // Create BigQuery client
    client, err := bigquery.NewClient(ctx, "my-project")
    if err != nil {
        log.Fatal(err)
    }
    defer client.Close()

    // Enable Storage Read API
    err = client.EnableStorageReadClient(ctx)
    if err != nil {
        log.Fatal(err)
    }

    // Read large table using Storage API
    dataset := client.Dataset("my_dataset")
    table := dataset.Table("large_table")

    it := table.Read(ctx)

    // Option 1: Read as regular rows
    count := 0
    for {
        var row []bigquery.Value
        err := it.Next(&row)
        if err == iterator.Done {
            break
        }
        if err != nil {
            log.Fatal(err)
        }

        count++
        if count%1000000 == 0 {
            fmt.Printf("Processed %d rows\n", count)
        }
    }

    fmt.Printf("Total rows: %d\n", count)

    // Option 2: Read as Arrow batches
    it2 := table.Read(ctx)
    arrowIt := it2.ArrowIterator()

    batchCount := 0
    for {
        batch, err := arrowIt.Next()
        if err == iterator.Done {
            break
        }
        if err != nil {
            log.Fatal(err)
        }

        batchCount++
        fmt.Printf("Arrow batch %d: %d bytes from partition %s\n",
            batchCount, len(batch.Data), batch.PartitionID)
    }
}

Version

tessl/golang-cloud-google-com--go--bigquery

storage-read.mddocs/

BigQuery Storage Read API

Overview

Enabling Storage Read API

Reading with Storage API

Arrow Format

Arrow Iterator

Arrow IO Reader

Storage API Client Package

Client Creation

Creating Read Sessions

Reading Streams

Performance Considerations

Complete Storage Read Example

Version

tessl/golang-cloud-google-com--go--bigquery

storage-read.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

BigQuery Storage Read API

Overview

Enabling Storage Read API

Reading with Storage API

Arrow Format

Arrow Iterator

Arrow IO Reader

Storage API Client Package

Client Creation

Creating Read Sessions

Reading Streams

Performance Considerations

Complete Storage Read Example

storage-read.mddocs/