or run

tessl search

tessl/golang-cloud-google-com--go--bigquery

tessl install tessl/golang-cloud-google-com--go--bigquery@1.72.0

Google Cloud BigQuery client library providing comprehensive Go APIs for querying, loading data, managing datasets and tables, streaming inserts, and accessing BigQuery's ecosystem of services including Storage, Analytics Hub, Data Transfer, and Migration APIs

Query Execution

This document covers executing SQL queries in BigQuery, including parameterized queries, query configuration, result iteration, and query statistics.

Overview

BigQuery supports Standard SQL and Legacy SQL for querying data. Queries can be executed synchronously (waiting for results) or asynchronously (starting a job and checking status later).

Creating Queries

Basic Query Creation

func (c *Client) Query(q string) *Query

Create a query:

q := client.Query(`
    SELECT name, COUNT(*) as count
    FROM ` + "`bigquery-public-data.usa_names.usa_1910_2013`" + `
    WHERE year > 2000
    GROUP BY name
    ORDER BY count DESC
    LIMIT 10
`)

Query Type

type Query struct {
    JobIDConfig
    QueryConfig
}

Query Configuration

QueryConfig Type

type QueryConfig struct {
    // The query to execute
    Q string

    // Destination table for query results
    Dst *Table

    // CreateDisposition specifies the circumstances under which the destination table will be created
    CreateDisposition TableCreateDisposition

    // WriteDisposition specifies how existing data in the destination table is treated
    WriteDisposition TableWriteDisposition

    // DisableQueryCache prevents results from being cached
    DisableQueryCache bool

    // DisableFlattenedResults prevents the creation of flattened results
    DisableFlattenedResults bool

    // AllowLargeResults allows large result sets (when writing to a destination table)
    AllowLargeResults bool

    // Priority sets the priority of the query
    Priority QueryPriority

    // MaxBillingTier limits the billing tier for the query
    MaxBillingTier int

    // MaxBytesBilled limits bytes billed for the query
    MaxBytesBilled int64

    // UseStandardSQL causes the query to use Standard SQL
    UseStandardSQL bool

    // UseLegacySQL causes the query to use Legacy SQL
    UseLegacySQL bool

    // Parameters for parameterized queries
    Parameters []QueryParameter

    // TimePartitioning for destination table
    TimePartitioning *TimePartitioning

    // RangePartitioning for destination table
    RangePartitioning *RangePartitioning

    // Clustering for destination table
    Clustering *Clustering

    // DestinationEncryptionConfig for destination table
    DestinationEncryptionConfig *EncryptionConfig

    // SchemaUpdateOptions allows schema modifications
    SchemaUpdateOptions []string

    // Labels for the query job
    Labels map[string]string

    // DefaultDatasetID for unqualified table names
    DefaultProjectID string
    DefaultDatasetID string

    // MaximumBytesBilled is deprecated, use MaxBytesBilled
    MaximumBytesBilled int64

    // ConnectionProperties for external connections
    ConnectionProperties []*ConnectionProperty

    // CreateSession enables session creation
    CreateSession bool

    // JobTimeout sets a timeout for the query job
    JobTimeout time.Duration

    // Reservation for the query
    Reservation string

    // MaxSlots limits slot usage
    MaxSlots int32
}

Setting Query Options

q := client.Query(`SELECT * FROM dataset.table WHERE id = @id`)
q.Parameters = []bigquery.QueryParameter{
    {Name: "id", Value: 12345},
}
q.UseLegacySQL = false
q.MaxBytesBilled = 1000000000 // 1 GB limit
q.Priority = bigquery.InteractivePriority
q.Labels = map[string]string{
    "team": "analytics",
}

Executing Queries

Synchronous Execution

func (q *Query) Read(ctx context.Context) (*RowIterator, error)

Execute a query and wait for results:

it, err := q.Read(ctx)
if err != nil {
    return err
}

for {
    var values []bigquery.Value
    err := it.Next(&values)
    if err == iterator.Done {
        break
    }
    if err != nil {
        return err
    }
    fmt.Println(values)
}

Note: Read() uses the optimized jobs.query API for queries with small result sets. For large results, it automatically falls back to creating a job.

Asynchronous Execution

func (q *Query) Run(ctx context.Context) (*Job, error)

Start a query as a background job:

job, err := q.Run(ctx)
if err != nil {
    return err
}

// Save job ID for later retrieval
jobID := job.ID()
fmt.Printf("Query job ID: %s\n", jobID)

// Wait for job completion
status, err := job.Wait(ctx)
if err != nil {
    return err
}

if err := status.Err(); err != nil {
    return err
}

// Read results
it, err := job.Read(ctx)

Parameterized Queries

Query Parameters

type QueryParameter struct {
    Name  string
    Value interface{}
}

Named Parameters

q := client.Query(`
    SELECT *
    FROM dataset.users
    WHERE age > @min_age
      AND country = @country
      AND created_at > @start_date
`)

q.Parameters = []bigquery.QueryParameter{
    {Name: "min_age", Value: 18},
    {Name: "country", Value: "US"},
    {Name: "start_date", Value: time.Date(2020, 1, 1, 0, 0, 0, 0, time.UTC)},
}

Positional Parameters

q := client.Query(`
    SELECT *
    FROM dataset.users
    WHERE age > ?
      AND country = ?
`)

q.Parameters = []bigquery.QueryParameter{
    {Value: 18},
    {Value: "US"},
}

Array Parameters

q := client.Query(`
    SELECT *
    FROM dataset.products
    WHERE category IN UNNEST(@categories)
`)

q.Parameters = []bigquery.QueryParameter{
    {
        Name:  "categories",
        Value: []string{"electronics", "books", "toys"},
    },
}

Struct Parameters

type DateRange struct {
    Start time.Time `bigquery:"start"`
    End   time.Time `bigquery:"end"`
}

q := client.Query(`
    SELECT *
    FROM dataset.events
    WHERE timestamp BETWEEN @range.start AND @range.end
`)

q.Parameters = []bigquery.QueryParameter{
    {
        Name: "range",
        Value: DateRange{
            Start: time.Date(2023, 1, 1, 0, 0, 0, 0, time.UTC),
            End:   time.Date(2023, 12, 31, 23, 59, 59, 0, time.UTC),
        },
    },
}

Result Iteration

RowIterator Type

type RowIterator struct {
    // Schema is the schema of the table. Available after a call to Next.
    Schema Schema

    // TotalRows is the total number of rows in the result set.
    // Available after a call to Next.
    TotalRows uint64

    // StartIndex is the index of the first row in the iterator.
    // Used for pagination.
    StartIndex uint64
}

func (it *RowIterator) Next(dst interface{}) error
func (it *RowIterator) PageInfo() *iterator.PageInfo
func (it *RowIterator) ArrowIterator() ArrowIterator

Iterating with Value Slices

it, err := q.Read(ctx)
if err != nil {
    return err
}

for {
    var values []bigquery.Value
    err := it.Next(&values)
    if err == iterator.Done {
        break
    }
    if err != nil {
        return err
    }

    // Access values by index
    name := values[0].(string)
    count := values[1].(int64)
    fmt.Printf("%s: %d\n", name, count)
}

Iterating with Structs

type Result struct {
    Name  string
    Count int64
}

it, err := q.Read(ctx)
if err != nil {
    return err
}

for {
    var result Result
    err := it.Next(&result)
    if err == iterator.Done {
        break
    }
    if err != nil {
        return err
    }
    fmt.Printf("%s: %d\n", result.Name, result.Count)
}

Iterating with Maps

it, err := q.Read(ctx)
if err != nil {
    return err
}

for {
    var row map[string]bigquery.Value
    err := it.Next(&row)
    if err == iterator.Done {
        break
    }
    if err != nil {
        return err
    }
    fmt.Printf("Row: %+v\n", row)
}

Getting Schema and Total Rows

it, err := q.Read(ctx)
if err != nil {
    return err
}

// Call Next once to populate schema and total rows
var firstRow []bigquery.Value
if err := it.Next(&firstRow); err != nil && err != iterator.Done {
    return err
}

fmt.Printf("Schema: %v\n", it.Schema)
fmt.Printf("Total rows: %d\n", it.TotalRows)

Pagination

it, err := q.Read(ctx)
if err != nil {
    return err
}

pageInfo := it.PageInfo()
pageInfo.MaxSize = 1000 // 1000 rows per page

// Get first page
for i := 0; i < pageInfo.MaxSize; i++ {
    var row []bigquery.Value
    err := it.Next(&row)
    if err == iterator.Done {
        break
    }
    if err != nil {
        return err
    }
    // Process row
}

// Get page token for next page
token := pageInfo.Token
// Use token to continue pagination

Arrow Format Iteration

Arrow Iterator

type ArrowIterator interface {
    Next() (*ArrowRecordBatch, error)
    Schema() Schema
    SerializedArrowSchema() []byte
}

type ArrowRecordBatch struct {
    Data        []byte
    Schema      []byte
    PartitionID string
}

Get Arrow format results (experimental):

it, err := q.Read(ctx)
if err != nil {
    return err
}

arrowIt := it.ArrowIterator()
for {
    batch, err := arrowIt.Next()
    if err == iterator.Done {
        break
    }
    if err != nil {
        return err
    }

    // Process Arrow batch
    fmt.Printf("Batch size: %d bytes\n", len(batch.Data))
}

Arrow IO Reader

func NewArrowIteratorReader(it ArrowIterator) io.Reader

Convert Arrow iterator to io.Reader:

arrowIt := it.ArrowIterator()
reader := bigquery.NewArrowIteratorReader(arrowIt)

// Use reader with Arrow libraries
// data, err := ioutil.ReadAll(reader)

Query Priority

type QueryPriority string

const (
    InteractivePriority QueryPriority = "INTERACTIVE"
    BatchPriority       QueryPriority = "BATCH"
)

Set query priority:

q := client.Query("SELECT * FROM dataset.large_table")
q.Priority = bigquery.BatchPriority // Lower priority, more economical

Destination Tables

Writing to a Table

dataset := client.Dataset("my_dataset")
destTable := dataset.Table("query_results")

q := client.Query("SELECT * FROM source_table WHERE condition")
q.Dst = destTable
q.WriteDisposition = bigquery.WriteTruncate

job, err := q.Run(ctx)
if err != nil {
    return err
}

status, err := job.Wait(ctx)
if err != nil {
    return err
}

Table Disposition

type TableCreateDisposition string

const (
    CreateIfNeeded TableCreateDisposition = "CREATE_IF_NEEDED"
    CreateNever    TableCreateDisposition = "CREATE_NEVER"
)

type TableWriteDisposition string

const (
    WriteEmpty    TableWriteDisposition = "WRITE_EMPTY"
    WriteTruncate TableWriteDisposition = "WRITE_TRUNCATE"
    WriteAppend   TableWriteDisposition = "WRITE_APPEND"
)

q.CreateDisposition = bigquery.CreateIfNeeded
q.WriteDisposition = bigquery.WriteAppend

Query Statistics

Getting Query Statistics

job, err := q.Run(ctx)
if err != nil {
    return err
}

status, err := job.Wait(ctx)
if err != nil {
    return err
}

stats := job.LastStatus().Statistics

queryStats := stats.Details.(*bigquery.QueryStatistics)
fmt.Printf("Total bytes processed: %d\n", queryStats.TotalBytesProcessed)
fmt.Printf("Total bytes billed: %d\n", queryStats.TotalBytesBilled)
fmt.Printf("Cache hit: %v\n", queryStats.CacheHit)
fmt.Printf("Total slot ms: %d\n", queryStats.TotalSlotMs)

QueryStatistics Type

type QueryStatistics struct {
    TotalBytesProcessed        int64
    TotalBytesBilled           int64
    BillingTier                int64
    CacheHit                   bool
    DDLTargetTable             *Table
    DDLOperationPerformed      string
    DDLTargetRoutine           *Routine
    DDLTargetRowAccessPolicy   *RowAccessPolicyReference
    DDLTargetDataset           *DatasetReference
    DDLAffectedRowAccessPolicyCount int64
    StatementType              string
    TotalPartitionsProcessed   int64
    TotalSlotMs                int64
    ReferencedTables           []*Table
    ReferencedRoutines         []*Routine
    Schema                     Schema
    NumDMLAffectedRows         int64
    DMLStats                   *DMLStatistics
    Timeline                   []*QueryTimelineIteration
    QueryPlan                  []*ExplainQueryStage
    UndeclaredQueryParameters  []QueryParameter
    SearchStatistics           *SearchStatistics
    PerformanceInsights        *PerformanceInsights
    MaterializedViewStatistics *MaterializedViewStatistics
    MetadataCacheStatistics    *MetadataCacheStatistics
    BiEngineStatistics         *BIEngineStatistics
    DclTargetTable             *Table
    DclTargetView              *Table
    DclTargetDataset           *DatasetReference
    ExportDataStatistics       *ExportDataStatistics
    ExternalServiceCosts       []*ExternalServiceCost
    TransactionInfo            *TransactionInfo
}

DML Statistics

type DMLStatistics struct {
    InsertedRowCount int64
    DeletedRowCount  int64
    UpdatedRowCount  int64
}

Get DML statistics:

q := client.Query("DELETE FROM dataset.table WHERE condition")
job, err := q.Run(ctx)
if err != nil {
    return err
}

status, err := job.Wait(ctx)
if err != nil {
    return err
}

queryStats := job.LastStatus().Statistics.Details.(*bigquery.QueryStatistics)
if queryStats.DMLStats != nil {
    fmt.Printf("Deleted rows: %d\n", queryStats.DMLStats.DeletedRowCount)
}

Query Plan

type ExplainQueryStage struct {
    CompletedParallelInputs   int64
    ComputeAvg                time.Duration
    ComputeMax                time.Duration
    ComputeRatioAvg           float64
    ComputeRatioMax           float64
    EndTime                   time.Time
    ID                        int64
    InputStages               []int64
    Name                      string
    ParallelInputs            int64
    ReadAvg                   time.Duration
    ReadMax                   time.Duration
    ReadRatioAvg              float64
    ReadRatioMax              float64
    RecordsRead               int64
    RecordsWritten            int64
    ShuffleOutputBytes        int64
    ShuffleOutputBytesSpilled int64
    StartTime                 time.Time
    Status                    string
    Steps                     []*ExplainQueryStep
    WaitAvg                   time.Duration
    WaitMax                   time.Duration
    WaitRatioAvg              float64
    WaitRatioMax              float64
    WriteAvg                  time.Duration
    WriteMax                  time.Duration
    WriteRatioAvg             float64
    WriteRatioMax             float64
}

type ExplainQueryStep struct {
    Kind      string
    Substeps  []string
}

Get query execution plan:

queryStats := job.LastStatus().Statistics.Details.(*bigquery.QueryStatistics)
for _, stage := range queryStats.QueryPlan {
    fmt.Printf("Stage %d: %s\n", stage.ID, stage.Name)
    fmt.Printf("  Records read: %d\n", stage.RecordsRead)
    fmt.Printf("  Records written: %d\n", stage.RecordsWritten)
    for _, step := range stage.Steps {
        fmt.Printf("  Step: %s\n", step.Kind)
    }
}

Dry Run Queries

Execute a dry run to estimate costs without running the query:

q := client.Query("SELECT * FROM large_dataset.large_table")
q.DryRun = true

job, err := q.Run(ctx)
if err != nil {
    return err
}

queryStats := job.LastStatus().Statistics.Details.(*bigquery.QueryStatistics)
fmt.Printf("This query will process %d bytes\n", queryStats.TotalBytesProcessed)

estimatedCostUSD := float64(queryStats.TotalBytesProcessed) / 1e12 * 5.0 // $5 per TB
fmt.Printf("Estimated cost: $%.2f\n", estimatedCostUSD)

Performance Insights

Performance insights provide detailed information about query performance issues and optimization opportunities.

PerformanceInsights Type

type PerformanceInsights struct {
    AvgPreviousExecution               time.Duration
    StagePerformanceStandaloneInsights []*StagePerformanceStandaloneInsight
    StagePerformanceChangeInsights     []*StagePerformanceChangeInsight
}

type StagePerformanceStandaloneInsight struct {
    StageID                  int64
    BIEngineReasons          []*BIEngineReason
    HighCardinalityJoins     []*HighCardinalityJoin
    SlotContention           bool
    InsufficientShuffleQuota bool
    PartitionSkew            *PartitionSkew
}

type StagePerformanceChangeInsight struct {
    StageID         int64
    InputDataChange *InputDataChange
}

Performance Insight Details

type BIEngineReason struct {
    Code    string
    Message string
}

type HighCardinalityJoin struct {
    LeftRows   int64
    RightRows  int64
    OutputRows int64
    StepIndex  int64
}

type PartitionSkew struct {
    SkewSources []*SkewSource
}

type SkewSource struct {
    StageID int64
}

type InputDataChange struct {
    RecordsReadDiffPercentage float64
}

Using Performance Insights

Performance insights are available in query statistics to help diagnose and optimize query performance:

q := client.Query("SELECT * FROM large_table JOIN other_table USING (id)")
job, err := q.Run(ctx)
if err != nil {
    return err
}

status, err := job.Wait(ctx)
if err != nil {
    return err
}

queryStats := status.Statistics.Details.(*bigquery.QueryStatistics)
if queryStats.PerformanceInsights != nil {
    insights := queryStats.PerformanceInsights

    fmt.Printf("Average previous execution: %s\n", insights.AvgPreviousExecution)

    // Check for standalone performance issues
    for _, standalone := range insights.StagePerformanceStandaloneInsights {
        fmt.Printf("\nStage %d performance issues:\n", standalone.StageID)

        if standalone.SlotContention {
            fmt.Println("  - Slot contention detected")
        }

        if standalone.InsufficientShuffleQuota {
            fmt.Println("  - Insufficient shuffle quota")
        }

        for _, join := range standalone.HighCardinalityJoins {
            fmt.Printf("  - High cardinality join: %d left × %d right = %d output rows\n",
                join.LeftRows, join.RightRows, join.OutputRows)
        }

        if standalone.PartitionSkew != nil {
            fmt.Println("  - Partition skew detected")
        }

        for _, reason := range standalone.BIEngineReasons {
            fmt.Printf("  - BI Engine disabled: %s - %s\n", reason.Code, reason.Message)
        }
    }

    // Check for performance changes compared to previous runs
    for _, change := range insights.StagePerformanceChangeInsights {
        if change.InputDataChange != nil {
            fmt.Printf("Stage %d: Input data changed by %.2f%%\n",
                change.StageID, change.InputDataChange.RecordsReadDiffPercentage)
        }
    }
}

Default Dataset

Set a default dataset for unqualified table names:

q := client.Query("SELECT * FROM users") // Unqualified table name
q.DefaultProjectID = "my-project"
q.DefaultDatasetID = "my_dataset"
// Query will use `my-project.my_dataset.users`

Job ID Configuration

JobIDConfig Type

type JobIDConfig struct {
    JobID    string
    Location string

    // AddJobIDSuffix causes a job ID suffix to be added
    AddJobIDSuffix bool
}

Specify custom job ID:

q := client.Query("SELECT 1")
q.JobID = "my-custom-job-id"
q.Location = "US"

job, err := q.Run(ctx)

Add random suffix to job ID:

q.JobID = "my-job"
q.AddJobIDSuffix = true // Results in "my-job-<random-suffix>"

Sessions

Create a session for multi-statement transactions:

q := client.Query("BEGIN TRANSACTION")
q.CreateSession = true

job, err := q.Run(ctx)
if err != nil {
    return err
}

// Get session ID from job
sessionID := job.SessionID()

// Use session in subsequent queries
q2 := client.Query("INSERT INTO table VALUES (1, 'test')")
q2.ConnectionProperties = []*bigquery.ConnectionProperty{
    {Key: "session_id", Value: sessionID},
}

Complete Example

package main

import (
    "context"
    "fmt"
    "log"

    "cloud.google.com/go/bigquery"
    "google.golang.org/api/iterator"
)

type SalesData struct {
    Date         string
    Product      string
    Revenue      float64
    Units        int64
}

func main() {
    ctx := context.Background()
    client, err := bigquery.NewClient(ctx, "my-project")
    if err != nil {
        log.Fatal(err)
    }
    defer client.Close()

    // Create parameterized query
    q := client.Query(`
        SELECT
            DATE(timestamp) as date,
            product_name as product,
            SUM(amount) as revenue,
            COUNT(*) as units
        FROM ` + "`my-project.sales.transactions`" + `
        WHERE DATE(timestamp) BETWEEN @start_date AND @end_date
          AND region = @region
        GROUP BY date, product
        ORDER BY revenue DESC
        LIMIT @limit
    `)

    // Set parameters
    q.Parameters = []bigquery.QueryParameter{
        {Name: "start_date", Value: "2023-01-01"},
        {Name: "end_date", Value: "2023-12-31"},
        {Name: "region", Value: "US"},
        {Name: "limit", Value: 100},
    }

    // Configure query
    q.Priority = bigquery.InteractivePriority
    q.MaxBytesBilled = 10 * 1024 * 1024 * 1024 // 10 GB
    q.Labels = map[string]string{
        "team":   "analytics",
        "report": "sales-summary",
    }

    // Execute query
    it, err := q.Read(ctx)
    if err != nil {
        log.Fatal(err)
    }

    // Process results
    fmt.Printf("Total rows: %d\n", it.TotalRows)

    for {
        var sales SalesData
        err := it.Next(&sales)
        if err == iterator.Done {
            break
        }
        if err != nil {
            log.Fatal(err)
        }

        fmt.Printf("%s | %s | $%.2f | %d units\n",
            sales.Date, sales.Product, sales.Revenue, sales.Units)
    }
}

Version