From 1664444970ec8ad0506cc840879a45497ef2a1c7 Mon Sep 17 00:00:00 2001 From: Mikhail Kot Date: Fri, 12 Jun 2026 11:50:18 +0100 Subject: [PATCH 1/5] initial Signed-off-by: Mikhail Kot --- .github/workflows/duckdb-r2.yml | 197 ++++++++++++++++++++++++++++++++ vortex-duckdb/build.rs | 5 +- 2 files changed, 201 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/duckdb-r2.yml diff --git a/.github/workflows/duckdb-r2.yml b/.github/workflows/duckdb-r2.yml new file mode 100644 index 00000000000..9d04abfabe2 --- /dev/null +++ b/.github/workflows/duckdb-r2.yml @@ -0,0 +1,197 @@ +name: DuckDB R2 builds + +# Mirror release Duckdb library files to R2 if they were not present. +# Build from source Duckdb library files if build.rs contains a commit, then +# upload to R2 if this commit was not present (testing pre-release builds). +# +# Launched only on PRs from maintainers which change build.rs inside +# "duckdb-build" environment +on: + pull_request: + paths: + - "vortex-duckdb/build.rs" + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: false + +permissions: + contents: read + +env: + PUBLIC_BASE_URL: "https://ci-builds.vortex.dev" + R2_BUCKET: "duckdb-builds" + R2_ENDPOINT_URL: "https://52bdeab5651e1584747feefd051fd566.r2.cloudflarestorage.com" + +jobs: + check: + name: "Resolve version and check R2" + runs-on: ubuntu-latest + timeout-minutes: 10 + outputs: + version: ${{ steps.resolve.outputs.version }} + ref_dir: ${{ steps.resolve.outputs.ref_dir }} + release: ${{ steps.resolve.outputs.release }} + matrix: ${{ steps.resolve.outputs.matrix }} + any_missing: ${{ steps.resolve.outputs.any_missing }} + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6 + - name: Resolve version and check R2 + id: resolve + run: | + set -Eeuo pipefail + + version=$(grep -oP 'DEFAULT_DUCKDB_VERSION:\s*&str\s*=\s*"\K[^"]+' \ + vortex-duckdb/build.rs) + + # Same as in vortex-duckdb/build.rs: >=2 dot-separated numeric + # components is a tagged release (ref dir "vX.Y.Z"), anything + # else is a commit. + ref="${version#v}" + if [[ "$ref" =~ ^[0-9]+(\.[0-9]+)+$ ]]; then + release=true + ref_dir="v$ref" + else + release=false + ref_dir="$ref" + fi + + echo "DuckDB version: $version (release=$release, ref_dir=$ref_dir)" + + entries=() + for archive in \ + libduckdb-linux-amd64.zip \ + libduckdb-linux-arm64.zip \ + libduckdb-osx-universal.zip; do + + url="${PUBLIC_BASE_URL}/${ref_dir}/${archive}" + code=$(curl -o /dev/null -s -w '%{http_code}' --head "$url" || echo 000) + if [ "$code" = "200" ]; then + echo "present in R2: $archive" + continue + fi + + echo "missing in R2 (HTTP $code): $archive" + case "$archive" in + *linux-amd64*) runner="ubuntu-latest"; os="linux"; arch="amd64" ;; + *linux-arm64*) runner="ubuntu-24.04-arm"; os="linux"; arch="arm64" ;; + *osx-universal*) runner="macos-14"; os="osx"; arch="universal" ;; + esac + entries+=("$(jq -nc \ + --arg archive "$archive" \ + --arg runner "$runner" \ + --arg os "$os" \ + --arg arch "$arch" \ + '{archive: $archive, runner: $runner, os: $os, arch: $arch}')") + done + + if [ "${#entries[@]}" -eq 0 ]; then + matrix='{"include":[]}' + any_missing=false + else + include=$(printf '%s\n' "${entries[@]}" | jq -sc '.') + matrix=$(jq -nc --argjson include "$include" '{include: $include}') + any_missing=true + fi + + { + echo "version=$version" + echo "ref_dir=$ref_dir" + echo "release=$release" + echo "matrix=$matrix" + echo "any_missing=$any_missing" + } >> "$GITHUB_OUTPUT" + + build-and-upload: + name: "Build and upload ${{ matrix.archive }}" + needs: check + if: >- + needs.check.outputs.any_missing == 'true' && + github.repository == 'vortex-data/vortex' && + github.event.pull_request.head.repo.full_name == github.repository && + contains(fromJSON('["OWNER", "MEMBER", "COLLABORATOR"]'), github.event.pull_request.author_association) + environment: duckdb-build + timeout-minutes: 120 + strategy: + fail-fast: false + matrix: ${{ fromJSON(needs.check.outputs.matrix) }} + runs-on: ${{ matrix.runner }} + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6 + + - name: Install build dependencies (Linux) + if: needs.check.outputs.release != 'true' && runner.os == 'Linux' + run: | + sudo apt-get update + sudo apt-get install -y ninja-build libcurl4-openssl-dev zip unzip + + - name: Install build dependencies (macOS) + if: needs.check.outputs.release != 'true' && runner.os == 'macOS' + run: brew install ninja + + - name: Prepare ${{ matrix.archive }} + env: + ARCHIVE: ${{ matrix.archive }} + REF_DIR: ${{ needs.check.outputs.ref_dir }} + RELEASE: ${{ needs.check.outputs.release }} + PLATFORM_OS: ${{ matrix.os }} + run: | + set -Eeuo pipefail + + if [ "$RELEASE" = "true" ]; then + echo "Mirroring DuckDB release ${REF_DIR}/${ARCHIVE}" + curl -fSL --retry 3 -o "$ARCHIVE" \ + "https://github.com/duckdb/duckdb/releases/download/${REF_DIR}/${ARCHIVE}" + else + echo "Building DuckDB commit ${REF_DIR} from source" + + curl -fSL --retry 3 -o duckdb-src.zip \ + "https://github.com/duckdb/duckdb/archive/${REF_DIR}.zip" + unzip -q duckdb-src.zip + + src_dir="duckdb-${REF_DIR}" + extra="" + if [ "$PLATFORM_OS" = "osx" ]; then + extra="OSX_BUILD_UNIVERSAL=1" + fi + + # Same as in build.rs + make -C "$src_dir" \ + GEN=ninja \ + DISABLE_SANITIZER=1 \ + THREADSAN=0 \ + BUILD_SHELL=false \ + BUILD_UNITTESTS=false \ + ENABLE_UNITTEST_CPP_TESTS=false \ + BUILD_EXTENSIONS="parquet;jemalloc;httpfs;tpch;tpcds" \ + $extra + + lib_dir="${src_dir}/build/release/src" + stage="stage" + rm -rf "$stage" + mkdir -p "$stage" + + cp -a "${lib_dir}/libduckdb.so" "$stage/" 2>/dev/null || true + cp -a "${lib_dir}/libduckdb.dylib" "$stage/" 2>/dev/null || true + cp -a "${lib_dir}/libduckdb_static.a" "$stage/" + cp -a "${src_dir}/src/include/duckdb.h" "$stage/" 2>/dev/null || true + cp -a "${src_dir}/src/include/duckdb.hpp" "$stage/" 2>/dev/null || true + + ( cd "$stage" && zip -r "../${ARCHIVE}" . ) + fi + + ls -la "$ARCHIVE" + + - name: Upload to R2 + env: + AWS_ACCESS_KEY_ID: ${{ secrets.DUCKDB_R2_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.DUCKDB_R2_SECRET_ACCESS_KEY }} + AWS_REGION: "us-east-1" + AWS_ENDPOINT_URL: ${{ env.R2_ENDPOINT_URL }} + run: | + set -Eeuo pipefail + python3 scripts/s3-upload.py \ + --bucket "$R2_BUCKET" \ + --key "${{ needs.check.outputs.ref_dir }}/${{ matrix.archive }}" \ + --body "${{ matrix.archive }}" \ + --checksum-algorithm CRC32 diff --git a/vortex-duckdb/build.rs b/vortex-duckdb/build.rs index ce3ace44123..d330fea3127 100644 --- a/vortex-duckdb/build.rs +++ b/vortex-duckdb/build.rs @@ -17,7 +17,10 @@ use std::process::exit; use bindgen::Abi; use bindgen::callbacks::ParseCallbacks; -const DUCKDB_RELEASES_URL: &str = "https://github.com/duckdb/duckdb/releases/download"; +// You can subsitute this URL for https://github.com/duckdb/duckdb/releases/download +// We want own infrastructure for testing pre-release builds +const DUCKDB_RELEASES_URL: &str = "https://ci-builds.vortex.dev"; + const DUCKDB_SOURCE_RELEASE_URL: &str = "https://github.com/duckdb/duckdb/archive/refs/tags"; const DUCKDB_SOURCE_COMMIT_URL: &str = "https://github.com/duckdb/duckdb/archive"; const DEFAULT_DUCKDB_VERSION: &str = "1.5.3"; From 84065dc3777d1bfcd1269f7e5d323bcdcfdfbf70 Mon Sep 17 00:00:00 2001 From: Mikhail Kot Date: Thu, 18 Jun 2026 10:44:46 +0100 Subject: [PATCH 2/5] fix --- .github/workflows/duckdb-r2.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/duckdb-r2.yml b/.github/workflows/duckdb-r2.yml index 9d04abfabe2..666bdbb6042 100644 --- a/.github/workflows/duckdb-r2.yml +++ b/.github/workflows/duckdb-r2.yml @@ -56,7 +56,7 @@ jobs: ref_dir="$ref" fi - echo "DuckDB version: $version (release=$release, ref_dir=$ref_dir)" + echo "DuckDB $version release=$release" entries=() for archive in \ @@ -94,6 +94,11 @@ jobs: any_missing=true fi + echo "any_missing=$any_missing" + echo "repository=${{ github.repository }}" + echo "head_repo_name=${{ github.event.pull_request.head.repo.full_name }}" + echo "author_association=${{ github.event.pull_request.author_association }}" + { echo "version=$version" echo "ref_dir=$ref_dir" From d3fc1da645bff27f3a79d1d7ae80a0533b88c450 Mon Sep 17 00:00:00 2001 From: Mikhail Kot Date: Thu, 18 Jun 2026 11:44:47 +0100 Subject: [PATCH 3/5] better --- .github/workflows/ci.yml | 206 ++++++++++++++++++++++++++++++++ .github/workflows/duckdb-r2.yml | 202 ------------------------------- 2 files changed, 206 insertions(+), 202 deletions(-) delete mode 100644 .github/workflows/duckdb-r2.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bf9c0a785b6..00c629cb8c6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -25,6 +25,208 @@ env: NIGHTLY_TOOLCHAIN: nightly-2026-02-05 jobs: + # Mirror release Duckdb library files to R2 if they were not present. + # Build from source Duckdb library files if build.rs contains a commit, then + # upload to R2 if this commit was not present (testing pre-release builds). + # + # Launched only on PRs from maintainers which change build.rs inside + # "duckdb-build" environment + duckdb-check: + name: "Resolve DuckDB version and check R2" + # Launch only on PRs. On develop the version is already in R2 (from merged + # PR) and build.rs is unchanged. + if: github.event_name == 'pull_request' + runs-on: ubuntu-latest + timeout-minutes: 10 + env: + PUBLIC_BASE_URL: "https://ci-builds.vortex.dev" + outputs: + version: ${{ steps.resolve.outputs.version }} + ref_dir: ${{ steps.resolve.outputs.ref_dir }} + release: ${{ steps.resolve.outputs.release }} + matrix: ${{ steps.resolve.outputs.matrix }} + any_missing: ${{ steps.resolve.outputs.any_missing }} + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6 + - name: Resolve version and check R2 + id: resolve + run: | + set -Eeuo pipefail + + version=$(grep -oP 'DEFAULT_DUCKDB_VERSION:\s*&str\s*=\s*"\K[^"]+' \ + vortex-duckdb/build.rs) + + # Same as in vortex-duckdb/build.rs: >=2 dot-separated numeric + # components is a tagged release (ref dir "vX.Y.Z"), anything + # else is a commit. + ref="${version#v}" + if [[ "$ref" =~ ^[0-9]+(\.[0-9]+)+$ ]]; then + release=true + ref_dir="v$ref" + else + release=false + ref_dir="$ref" + fi + + echo "DuckDB $version release=$release" + + entries=() + for archive in \ + libduckdb-linux-amd64.zip \ + libduckdb-linux-arm64.zip \ + libduckdb-osx-universal.zip; do + + url="${PUBLIC_BASE_URL}/${ref_dir}/${archive}" + code=$(curl -o /dev/null -s -w '%{http_code}' --head "$url" || echo 000) + if [ "$code" = "200" ]; then + echo "present in R2: $archive" + continue + fi + + echo "missing in R2 (HTTP $code): $archive" + case "$archive" in + *linux-amd64*) runner="ubuntu-latest"; os="linux"; arch="amd64" ;; + *linux-arm64*) runner="ubuntu-24.04-arm"; os="linux"; arch="arm64" ;; + *osx-universal*) runner="macos-14"; os="osx"; arch="universal" ;; + esac + entries+=("$(jq -nc \ + --arg archive "$archive" \ + --arg runner "$runner" \ + --arg os "$os" \ + --arg arch "$arch" \ + '{archive: $archive, runner: $runner, os: $os, arch: $arch}')") + done + + if [ "${#entries[@]}" -eq 0 ]; then + matrix='{"include":[]}' + any_missing=false + else + include=$(printf '%s\n' "${entries[@]}" | jq -sc '.') + matrix=$(jq -nc --argjson include "$include" '{include: $include}') + any_missing=true + fi + + echo "any_missing=$any_missing" + + { + echo "version=$version" + echo "ref_dir=$ref_dir" + echo "release=$release" + echo "matrix=$matrix" + echo "any_missing=$any_missing" + } >> "$GITHUB_OUTPUT" + + duckdb-mirror: + name: "Mirror DuckDB ${{ matrix.archive }} to R2" + needs: duckdb-check + if: >- + needs.duckdb-check.outputs.any_missing == 'true' && + github.repository == 'vortex-data/vortex' && + github.event.pull_request.head.repo.full_name == github.repository + environment: duckdb-build + timeout-minutes: 120 + strategy: + fail-fast: false + matrix: ${{ fromJSON(needs.duckdb-check.outputs.matrix) }} + runs-on: ${{ matrix.runner }} + env: + R2_BUCKET: "duckdb-builds" + R2_ENDPOINT_URL: "https://52bdeab5651e1584747feefd051fd566.r2.cloudflarestorage.com" + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6 + + - name: Install build dependencies (Linux) + if: needs.duckdb-check.outputs.release != 'true' && runner.os == 'Linux' + run: | + sudo apt-get update + sudo apt-get install -y ninja-build libcurl4-openssl-dev zip unzip + + - name: Install build dependencies (macOS) + if: needs.duckdb-check.outputs.release != 'true' && runner.os == 'macOS' + run: brew install ninja + + - name: Prepare ${{ matrix.archive }} + env: + ARCHIVE: ${{ matrix.archive }} + REF_DIR: ${{ needs.duckdb-check.outputs.ref_dir }} + RELEASE: ${{ needs.duckdb-check.outputs.release }} + PLATFORM_OS: ${{ matrix.os }} + run: | + set -Eeuo pipefail + + if [ "$RELEASE" = "true" ]; then + echo "Mirroring DuckDB release ${REF_DIR}/${ARCHIVE}" + curl -fSL --retry 3 -o "$ARCHIVE" \ + "https://github.com/duckdb/duckdb/releases/download/${REF_DIR}/${ARCHIVE}" + else + echo "Building DuckDB commit ${REF_DIR} from source" + + curl -fSL --retry 3 -o duckdb-src.zip \ + "https://github.com/duckdb/duckdb/archive/${REF_DIR}.zip" + unzip -q duckdb-src.zip + + src_dir="duckdb-${REF_DIR}" + extra="" + if [ "$PLATFORM_OS" = "osx" ]; then + extra="OSX_BUILD_UNIVERSAL=1" + fi + + # Same as in build.rs + make -C "$src_dir" \ + GEN=ninja \ + DISABLE_SANITIZER=1 \ + THREADSAN=0 \ + BUILD_SHELL=false \ + BUILD_UNITTESTS=false \ + ENABLE_UNITTEST_CPP_TESTS=false \ + BUILD_EXTENSIONS="parquet;jemalloc;httpfs;tpch;tpcds" \ + $extra + + lib_dir="${src_dir}/build/release/src" + stage="stage" + rm -rf "$stage" + mkdir -p "$stage" + + cp -a "${lib_dir}/libduckdb.so" "$stage/" 2>/dev/null || true + cp -a "${lib_dir}/libduckdb.dylib" "$stage/" 2>/dev/null || true + cp -a "${lib_dir}/libduckdb_static.a" "$stage/" + cp -a "${src_dir}/src/include/duckdb.h" "$stage/" 2>/dev/null || true + cp -a "${src_dir}/src/include/duckdb.hpp" "$stage/" 2>/dev/null || true + + ( cd "$stage" && zip -r "../${ARCHIVE}" . ) + fi + + ls -la "$ARCHIVE" + + - name: Upload to R2 + env: + AWS_ACCESS_KEY_ID: ${{ secrets.DUCKDB_R2_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.DUCKDB_R2_SECRET_ACCESS_KEY }} + AWS_REGION: "us-east-1" + AWS_ENDPOINT_URL: ${{ env.R2_ENDPOINT_URL }} + run: | + set -Eeuo pipefail + python3 scripts/s3-upload.py \ + --bucket "$R2_BUCKET" \ + --key "${{ needs.duckdb-check.outputs.ref_dir }}/${{ matrix.archive }}" \ + --body "${{ matrix.archive }}" \ + --checksum-algorithm CRC32 + + duckdb-ready: + name: "DuckDB libraries available in R2" + needs: [duckdb-check, duckdb-mirror] + if: ${{ !cancelled() }} + runs-on: ubuntu-latest + timeout-minutes: 5 + steps: + - name: Verify DuckDB mirror pipeline + if: >- + needs.duckdb-check.result == 'failure' || + needs.duckdb-mirror.result == 'failure' + run: | + echo "DuckDB check or mirror failed" + exit 1 + lint-toml: runs-on: ubuntu-latest timeout-minutes: 10 @@ -115,6 +317,7 @@ jobs: rust-docs: name: "Rust (docs)" + needs: duckdb-ready timeout-minutes: 30 runs-on: >- ${{ github.repository == 'vortex-data/vortex' @@ -204,6 +407,7 @@ jobs: rust-lint: name: "Rust (lint)" + needs: duckdb-ready timeout-minutes: 30 runs-on: >- ${{ github.repository == 'vortex-data/vortex' @@ -301,6 +505,7 @@ jobs: rust-test-other: name: "Rust tests (${{ matrix.os }})" + needs: duckdb-ready timeout-minutes: 30 strategy: fail-fast: false @@ -422,6 +627,7 @@ jobs: sqllogic-test: name: "SQL logic tests" + needs: duckdb-ready runs-on: >- ${{ github.repository == 'vortex-data/vortex' && format('runs-on={0}/runner=amd64-medium/image=ubuntu24-full-x64-pre-v2/tag=sql-logic-test', github.run_id) diff --git a/.github/workflows/duckdb-r2.yml b/.github/workflows/duckdb-r2.yml deleted file mode 100644 index 666bdbb6042..00000000000 --- a/.github/workflows/duckdb-r2.yml +++ /dev/null @@ -1,202 +0,0 @@ -name: DuckDB R2 builds - -# Mirror release Duckdb library files to R2 if they were not present. -# Build from source Duckdb library files if build.rs contains a commit, then -# upload to R2 if this commit was not present (testing pre-release builds). -# -# Launched only on PRs from maintainers which change build.rs inside -# "duckdb-build" environment -on: - pull_request: - paths: - - "vortex-duckdb/build.rs" - -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: false - -permissions: - contents: read - -env: - PUBLIC_BASE_URL: "https://ci-builds.vortex.dev" - R2_BUCKET: "duckdb-builds" - R2_ENDPOINT_URL: "https://52bdeab5651e1584747feefd051fd566.r2.cloudflarestorage.com" - -jobs: - check: - name: "Resolve version and check R2" - runs-on: ubuntu-latest - timeout-minutes: 10 - outputs: - version: ${{ steps.resolve.outputs.version }} - ref_dir: ${{ steps.resolve.outputs.ref_dir }} - release: ${{ steps.resolve.outputs.release }} - matrix: ${{ steps.resolve.outputs.matrix }} - any_missing: ${{ steps.resolve.outputs.any_missing }} - steps: - - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6 - - name: Resolve version and check R2 - id: resolve - run: | - set -Eeuo pipefail - - version=$(grep -oP 'DEFAULT_DUCKDB_VERSION:\s*&str\s*=\s*"\K[^"]+' \ - vortex-duckdb/build.rs) - - # Same as in vortex-duckdb/build.rs: >=2 dot-separated numeric - # components is a tagged release (ref dir "vX.Y.Z"), anything - # else is a commit. - ref="${version#v}" - if [[ "$ref" =~ ^[0-9]+(\.[0-9]+)+$ ]]; then - release=true - ref_dir="v$ref" - else - release=false - ref_dir="$ref" - fi - - echo "DuckDB $version release=$release" - - entries=() - for archive in \ - libduckdb-linux-amd64.zip \ - libduckdb-linux-arm64.zip \ - libduckdb-osx-universal.zip; do - - url="${PUBLIC_BASE_URL}/${ref_dir}/${archive}" - code=$(curl -o /dev/null -s -w '%{http_code}' --head "$url" || echo 000) - if [ "$code" = "200" ]; then - echo "present in R2: $archive" - continue - fi - - echo "missing in R2 (HTTP $code): $archive" - case "$archive" in - *linux-amd64*) runner="ubuntu-latest"; os="linux"; arch="amd64" ;; - *linux-arm64*) runner="ubuntu-24.04-arm"; os="linux"; arch="arm64" ;; - *osx-universal*) runner="macos-14"; os="osx"; arch="universal" ;; - esac - entries+=("$(jq -nc \ - --arg archive "$archive" \ - --arg runner "$runner" \ - --arg os "$os" \ - --arg arch "$arch" \ - '{archive: $archive, runner: $runner, os: $os, arch: $arch}')") - done - - if [ "${#entries[@]}" -eq 0 ]; then - matrix='{"include":[]}' - any_missing=false - else - include=$(printf '%s\n' "${entries[@]}" | jq -sc '.') - matrix=$(jq -nc --argjson include "$include" '{include: $include}') - any_missing=true - fi - - echo "any_missing=$any_missing" - echo "repository=${{ github.repository }}" - echo "head_repo_name=${{ github.event.pull_request.head.repo.full_name }}" - echo "author_association=${{ github.event.pull_request.author_association }}" - - { - echo "version=$version" - echo "ref_dir=$ref_dir" - echo "release=$release" - echo "matrix=$matrix" - echo "any_missing=$any_missing" - } >> "$GITHUB_OUTPUT" - - build-and-upload: - name: "Build and upload ${{ matrix.archive }}" - needs: check - if: >- - needs.check.outputs.any_missing == 'true' && - github.repository == 'vortex-data/vortex' && - github.event.pull_request.head.repo.full_name == github.repository && - contains(fromJSON('["OWNER", "MEMBER", "COLLABORATOR"]'), github.event.pull_request.author_association) - environment: duckdb-build - timeout-minutes: 120 - strategy: - fail-fast: false - matrix: ${{ fromJSON(needs.check.outputs.matrix) }} - runs-on: ${{ matrix.runner }} - steps: - - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6 - - - name: Install build dependencies (Linux) - if: needs.check.outputs.release != 'true' && runner.os == 'Linux' - run: | - sudo apt-get update - sudo apt-get install -y ninja-build libcurl4-openssl-dev zip unzip - - - name: Install build dependencies (macOS) - if: needs.check.outputs.release != 'true' && runner.os == 'macOS' - run: brew install ninja - - - name: Prepare ${{ matrix.archive }} - env: - ARCHIVE: ${{ matrix.archive }} - REF_DIR: ${{ needs.check.outputs.ref_dir }} - RELEASE: ${{ needs.check.outputs.release }} - PLATFORM_OS: ${{ matrix.os }} - run: | - set -Eeuo pipefail - - if [ "$RELEASE" = "true" ]; then - echo "Mirroring DuckDB release ${REF_DIR}/${ARCHIVE}" - curl -fSL --retry 3 -o "$ARCHIVE" \ - "https://github.com/duckdb/duckdb/releases/download/${REF_DIR}/${ARCHIVE}" - else - echo "Building DuckDB commit ${REF_DIR} from source" - - curl -fSL --retry 3 -o duckdb-src.zip \ - "https://github.com/duckdb/duckdb/archive/${REF_DIR}.zip" - unzip -q duckdb-src.zip - - src_dir="duckdb-${REF_DIR}" - extra="" - if [ "$PLATFORM_OS" = "osx" ]; then - extra="OSX_BUILD_UNIVERSAL=1" - fi - - # Same as in build.rs - make -C "$src_dir" \ - GEN=ninja \ - DISABLE_SANITIZER=1 \ - THREADSAN=0 \ - BUILD_SHELL=false \ - BUILD_UNITTESTS=false \ - ENABLE_UNITTEST_CPP_TESTS=false \ - BUILD_EXTENSIONS="parquet;jemalloc;httpfs;tpch;tpcds" \ - $extra - - lib_dir="${src_dir}/build/release/src" - stage="stage" - rm -rf "$stage" - mkdir -p "$stage" - - cp -a "${lib_dir}/libduckdb.so" "$stage/" 2>/dev/null || true - cp -a "${lib_dir}/libduckdb.dylib" "$stage/" 2>/dev/null || true - cp -a "${lib_dir}/libduckdb_static.a" "$stage/" - cp -a "${src_dir}/src/include/duckdb.h" "$stage/" 2>/dev/null || true - cp -a "${src_dir}/src/include/duckdb.hpp" "$stage/" 2>/dev/null || true - - ( cd "$stage" && zip -r "../${ARCHIVE}" . ) - fi - - ls -la "$ARCHIVE" - - - name: Upload to R2 - env: - AWS_ACCESS_KEY_ID: ${{ secrets.DUCKDB_R2_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.DUCKDB_R2_SECRET_ACCESS_KEY }} - AWS_REGION: "us-east-1" - AWS_ENDPOINT_URL: ${{ env.R2_ENDPOINT_URL }} - run: | - set -Eeuo pipefail - python3 scripts/s3-upload.py \ - --bucket "$R2_BUCKET" \ - --key "${{ needs.check.outputs.ref_dir }}/${{ matrix.archive }}" \ - --body "${{ matrix.archive }}" \ - --checksum-algorithm CRC32 From 30e146cebcbd430c053fc266f1b9b9063c9fb5b1 Mon Sep 17 00:00:00 2001 From: Mikhail Kot Date: Thu, 18 Jun 2026 12:08:35 +0100 Subject: [PATCH 4/5] better --- .github/workflows/ci.yml | 199 +----------------------- .github/workflows/duckdb-r2.yml | 192 +++++++++++++++++++++++ .github/workflows/rust-instrumented.yml | 20 +++ 3 files changed, 220 insertions(+), 191 deletions(-) create mode 100644 .github/workflows/duckdb-r2.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 00c629cb8c6..07e6bb708a7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -25,206 +25,23 @@ env: NIGHTLY_TOOLCHAIN: nightly-2026-02-05 jobs: - # Mirror release Duckdb library files to R2 if they were not present. - # Build from source Duckdb library files if build.rs contains a commit, then - # upload to R2 if this commit was not present (testing pre-release builds). - # - # Launched only on PRs from maintainers which change build.rs inside - # "duckdb-build" environment - duckdb-check: - name: "Resolve DuckDB version and check R2" - # Launch only on PRs. On develop the version is already in R2 (from merged - # PR) and build.rs is unchanged. - if: github.event_name == 'pull_request' - runs-on: ubuntu-latest - timeout-minutes: 10 - env: - PUBLIC_BASE_URL: "https://ci-builds.vortex.dev" - outputs: - version: ${{ steps.resolve.outputs.version }} - ref_dir: ${{ steps.resolve.outputs.ref_dir }} - release: ${{ steps.resolve.outputs.release }} - matrix: ${{ steps.resolve.outputs.matrix }} - any_missing: ${{ steps.resolve.outputs.any_missing }} - steps: - - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6 - - name: Resolve version and check R2 - id: resolve - run: | - set -Eeuo pipefail - - version=$(grep -oP 'DEFAULT_DUCKDB_VERSION:\s*&str\s*=\s*"\K[^"]+' \ - vortex-duckdb/build.rs) - - # Same as in vortex-duckdb/build.rs: >=2 dot-separated numeric - # components is a tagged release (ref dir "vX.Y.Z"), anything - # else is a commit. - ref="${version#v}" - if [[ "$ref" =~ ^[0-9]+(\.[0-9]+)+$ ]]; then - release=true - ref_dir="v$ref" - else - release=false - ref_dir="$ref" - fi - - echo "DuckDB $version release=$release" - - entries=() - for archive in \ - libduckdb-linux-amd64.zip \ - libduckdb-linux-arm64.zip \ - libduckdb-osx-universal.zip; do - - url="${PUBLIC_BASE_URL}/${ref_dir}/${archive}" - code=$(curl -o /dev/null -s -w '%{http_code}' --head "$url" || echo 000) - if [ "$code" = "200" ]; then - echo "present in R2: $archive" - continue - fi - - echo "missing in R2 (HTTP $code): $archive" - case "$archive" in - *linux-amd64*) runner="ubuntu-latest"; os="linux"; arch="amd64" ;; - *linux-arm64*) runner="ubuntu-24.04-arm"; os="linux"; arch="arm64" ;; - *osx-universal*) runner="macos-14"; os="osx"; arch="universal" ;; - esac - entries+=("$(jq -nc \ - --arg archive "$archive" \ - --arg runner "$runner" \ - --arg os "$os" \ - --arg arch "$arch" \ - '{archive: $archive, runner: $runner, os: $os, arch: $arch}')") - done - - if [ "${#entries[@]}" -eq 0 ]; then - matrix='{"include":[]}' - any_missing=false - else - include=$(printf '%s\n' "${entries[@]}" | jq -sc '.') - matrix=$(jq -nc --argjson include "$include" '{include: $include}') - any_missing=true - fi - - echo "any_missing=$any_missing" - - { - echo "version=$version" - echo "ref_dir=$ref_dir" - echo "release=$release" - echo "matrix=$matrix" - echo "any_missing=$any_missing" - } >> "$GITHUB_OUTPUT" - duckdb-mirror: - name: "Mirror DuckDB ${{ matrix.archive }} to R2" - needs: duckdb-check - if: >- - needs.duckdb-check.outputs.any_missing == 'true' && - github.repository == 'vortex-data/vortex' && - github.event.pull_request.head.repo.full_name == github.repository - environment: duckdb-build - timeout-minutes: 120 - strategy: - fail-fast: false - matrix: ${{ fromJSON(needs.duckdb-check.outputs.matrix) }} - runs-on: ${{ matrix.runner }} - env: - R2_BUCKET: "duckdb-builds" - R2_ENDPOINT_URL: "https://52bdeab5651e1584747feefd051fd566.r2.cloudflarestorage.com" - steps: - - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6 - - - name: Install build dependencies (Linux) - if: needs.duckdb-check.outputs.release != 'true' && runner.os == 'Linux' - run: | - sudo apt-get update - sudo apt-get install -y ninja-build libcurl4-openssl-dev zip unzip - - - name: Install build dependencies (macOS) - if: needs.duckdb-check.outputs.release != 'true' && runner.os == 'macOS' - run: brew install ninja - - - name: Prepare ${{ matrix.archive }} - env: - ARCHIVE: ${{ matrix.archive }} - REF_DIR: ${{ needs.duckdb-check.outputs.ref_dir }} - RELEASE: ${{ needs.duckdb-check.outputs.release }} - PLATFORM_OS: ${{ matrix.os }} - run: | - set -Eeuo pipefail - - if [ "$RELEASE" = "true" ]; then - echo "Mirroring DuckDB release ${REF_DIR}/${ARCHIVE}" - curl -fSL --retry 3 -o "$ARCHIVE" \ - "https://github.com/duckdb/duckdb/releases/download/${REF_DIR}/${ARCHIVE}" - else - echo "Building DuckDB commit ${REF_DIR} from source" - - curl -fSL --retry 3 -o duckdb-src.zip \ - "https://github.com/duckdb/duckdb/archive/${REF_DIR}.zip" - unzip -q duckdb-src.zip - - src_dir="duckdb-${REF_DIR}" - extra="" - if [ "$PLATFORM_OS" = "osx" ]; then - extra="OSX_BUILD_UNIVERSAL=1" - fi - - # Same as in build.rs - make -C "$src_dir" \ - GEN=ninja \ - DISABLE_SANITIZER=1 \ - THREADSAN=0 \ - BUILD_SHELL=false \ - BUILD_UNITTESTS=false \ - ENABLE_UNITTEST_CPP_TESTS=false \ - BUILD_EXTENSIONS="parquet;jemalloc;httpfs;tpch;tpcds" \ - $extra - - lib_dir="${src_dir}/build/release/src" - stage="stage" - rm -rf "$stage" - mkdir -p "$stage" - - cp -a "${lib_dir}/libduckdb.so" "$stage/" 2>/dev/null || true - cp -a "${lib_dir}/libduckdb.dylib" "$stage/" 2>/dev/null || true - cp -a "${lib_dir}/libduckdb_static.a" "$stage/" - cp -a "${src_dir}/src/include/duckdb.h" "$stage/" 2>/dev/null || true - cp -a "${src_dir}/src/include/duckdb.hpp" "$stage/" 2>/dev/null || true - - ( cd "$stage" && zip -r "../${ARCHIVE}" . ) - fi - - ls -la "$ARCHIVE" - - - name: Upload to R2 - env: - AWS_ACCESS_KEY_ID: ${{ secrets.DUCKDB_R2_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.DUCKDB_R2_SECRET_ACCESS_KEY }} - AWS_REGION: "us-east-1" - AWS_ENDPOINT_URL: ${{ env.R2_ENDPOINT_URL }} - run: | - set -Eeuo pipefail - python3 scripts/s3-upload.py \ - --bucket "$R2_BUCKET" \ - --key "${{ needs.duckdb-check.outputs.ref_dir }}/${{ matrix.archive }}" \ - --body "${{ matrix.archive }}" \ - --checksum-algorithm CRC32 + name: "Mirror DuckDB to R2" + if: github.event_name == 'pull_request' + uses: ./.github/workflows/duckdb-r2.yml + secrets: inherit duckdb-ready: name: "DuckDB libraries available in R2" - needs: [duckdb-check, duckdb-mirror] + needs: duckdb-mirror if: ${{ !cancelled() }} runs-on: ubuntu-latest timeout-minutes: 5 steps: - - name: Verify DuckDB mirror pipeline - if: >- - needs.duckdb-check.result == 'failure' || - needs.duckdb-mirror.result == 'failure' + - name: Verify DuckDB mirror + if: ${{ needs.duckdb-mirror.result == 'failure' }} run: | - echo "DuckDB check or mirror failed" + echo "DuckDB mirror failed; downstream builds would 404" exit 1 lint-toml: diff --git a/.github/workflows/duckdb-r2.yml b/.github/workflows/duckdb-r2.yml new file mode 100644 index 00000000000..4368ae807df --- /dev/null +++ b/.github/workflows/duckdb-r2.yml @@ -0,0 +1,192 @@ +name: DuckDB R2 mirror + +# Mirror DuckDB libraries referenced by vortex-duckdb/build.rs to R2 when they +# are not present yet. Download tagged archives or build commits from source. +on: + workflow_call: { } + +concurrency: + group: duckdb-r2-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: false + +permissions: + contents: read + +env: + PUBLIC_BASE_URL: "https://ci-builds.vortex.dev" + R2_BUCKET: "duckdb-builds" + R2_ENDPOINT_URL: "https://52bdeab5651e1584747feefd051fd566.r2.cloudflarestorage.com" + +jobs: + check: + name: "Resolve DuckDB version and check R2" + runs-on: ubuntu-latest + timeout-minutes: 10 + outputs: + version: ${{ steps.resolve.outputs.version }} + ref_dir: ${{ steps.resolve.outputs.ref_dir }} + release: ${{ steps.resolve.outputs.release }} + matrix: ${{ steps.resolve.outputs.matrix }} + any_missing: ${{ steps.resolve.outputs.any_missing }} + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6 + - name: Resolve version and check R2 + id: resolve + run: | + set -Eeuo pipefail + + version=$(grep -oP 'DEFAULT_DUCKDB_VERSION:\s*&str\s*=\s*"\K[^"]+' \ + vortex-duckdb/build.rs) + + # Same as in vortex-duckdb/build.rs: >=2 dot-separated numeric + # components is a tagged release (ref dir "vX.Y.Z"), anything + # else is a commit. + ref="${version#v}" + if [[ "$ref" =~ ^[0-9]+(\.[0-9]+)+$ ]]; then + release=true + ref_dir="v$ref" + else + release=false + ref_dir="$ref" + fi + + echo "DuckDB $version release=$release" + + entries=() + for archive in \ + libduckdb-linux-amd64.zip \ + libduckdb-linux-arm64.zip \ + libduckdb-osx-universal.zip; do + + url="${PUBLIC_BASE_URL}/${ref_dir}/${archive}" + code=$(curl -o /dev/null -s -w '%{http_code}' --head "$url" || echo 000) + if [ "$code" = "200" ]; then + echo "present in R2: $archive" + continue + fi + + echo "missing in R2 (HTTP $code): $archive" + case "$archive" in + *linux-amd64*) runner="ubuntu-latest"; os="linux"; arch="amd64" ;; + *linux-arm64*) runner="ubuntu-24.04-arm"; os="linux"; arch="arm64" ;; + *osx-universal*) runner="macos-14"; os="osx"; arch="universal" ;; + esac + entries+=("$(jq -nc \ + --arg archive "$archive" \ + --arg runner "$runner" \ + --arg os "$os" \ + --arg arch "$arch" \ + '{archive: $archive, runner: $runner, os: $os, arch: $arch}')") + done + + if [ "${#entries[@]}" -eq 0 ]; then + matrix='{"include":[]}' + any_missing=false + else + include=$(printf '%s\n' "${entries[@]}" | jq -sc '.') + matrix=$(jq -nc --argjson include "$include" '{include: $include}') + any_missing=true + fi + + echo "any_missing=$any_missing" + + { + echo "version=$version" + echo "ref_dir=$ref_dir" + echo "release=$release" + echo "matrix=$matrix" + echo "any_missing=$any_missing" + } >> "$GITHUB_OUTPUT" + + mirror: + name: "Mirror DuckDB ${{ matrix.archive }} to R2" + needs: check + if: >- + needs.check.outputs.any_missing == 'true' && + github.repository == 'vortex-data/vortex' && + github.event.pull_request.head.repo.full_name == github.repository + environment: duckdb-build + timeout-minutes: 120 + strategy: + fail-fast: false + matrix: ${{ fromJSON(needs.check.outputs.matrix) }} + runs-on: ${{ matrix.runner }} + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6 + + - name: Install build dependencies (Linux) + if: needs.check.outputs.release != 'true' && runner.os == 'Linux' + run: | + sudo apt-get update + sudo apt-get install -y ninja-build libcurl4-openssl-dev zip unzip + + - name: Install build dependencies (macOS) + if: needs.check.outputs.release != 'true' && runner.os == 'macOS' + run: brew install ninja + + - name: Prepare ${{ matrix.archive }} + env: + ARCHIVE: ${{ matrix.archive }} + REF_DIR: ${{ needs.check.outputs.ref_dir }} + RELEASE: ${{ needs.check.outputs.release }} + PLATFORM_OS: ${{ matrix.os }} + run: | + set -Eeuo pipefail + + if [ "$RELEASE" = "true" ]; then + echo "Mirroring DuckDB release ${REF_DIR}/${ARCHIVE}" + curl -fSL --retry 3 -o "$ARCHIVE" \ + "https://github.com/duckdb/duckdb/releases/download/${REF_DIR}/${ARCHIVE}" + else + echo "Building DuckDB commit ${REF_DIR} from source" + + curl -fSL --retry 3 -o duckdb-src.zip \ + "https://github.com/duckdb/duckdb/archive/${REF_DIR}.zip" + unzip -q duckdb-src.zip + + src_dir="duckdb-${REF_DIR}" + extra="" + if [ "$PLATFORM_OS" = "osx" ]; then + extra="OSX_BUILD_UNIVERSAL=1" + fi + + # Same as in build.rs + make -C "$src_dir" \ + GEN=ninja \ + DISABLE_SANITIZER=1 \ + THREADSAN=0 \ + BUILD_SHELL=false \ + BUILD_UNITTESTS=false \ + ENABLE_UNITTEST_CPP_TESTS=false \ + BUILD_EXTENSIONS="parquet;jemalloc;httpfs;tpch;tpcds" \ + $extra + + lib_dir="${src_dir}/build/release/src" + stage="stage" + rm -rf "$stage" + mkdir -p "$stage" + + cp -a "${lib_dir}/libduckdb.so" "$stage/" 2>/dev/null || true + cp -a "${lib_dir}/libduckdb.dylib" "$stage/" 2>/dev/null || true + cp -a "${lib_dir}/libduckdb_static.a" "$stage/" + cp -a "${src_dir}/src/include/duckdb.h" "$stage/" 2>/dev/null || true + cp -a "${src_dir}/src/include/duckdb.hpp" "$stage/" 2>/dev/null || true + + ( cd "$stage" && zip -r "../${ARCHIVE}" . ) + fi + + ls -la "$ARCHIVE" + + - name: Upload to R2 + env: + AWS_ACCESS_KEY_ID: ${{ secrets.DUCKDB_R2_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.DUCKDB_R2_SECRET_ACCESS_KEY }} + AWS_REGION: "us-east-1" + AWS_ENDPOINT_URL: ${{ env.R2_ENDPOINT_URL }} + run: | + set -Eeuo pipefail + python3 scripts/s3-upload.py \ + --bucket "$R2_BUCKET" \ + --key "${{ needs.check.outputs.ref_dir }}/${{ matrix.archive }}" \ + --body "${{ matrix.archive }}" \ + --checksum-algorithm CRC32 diff --git a/.github/workflows/rust-instrumented.yml b/.github/workflows/rust-instrumented.yml index e4dc48ee8b7..b72b943895c 100644 --- a/.github/workflows/rust-instrumented.yml +++ b/.github/workflows/rust-instrumented.yml @@ -22,8 +22,28 @@ env: NIGHTLY_TOOLCHAIN: nightly-2026-02-05 jobs: + duckdb-mirror: + name: "Mirror DuckDB to R2" + if: github.event_name == 'pull_request' + uses: ./.github/workflows/duckdb-r2.yml + secrets: inherit + + duckdb-ready: + name: "DuckDB libraries available in R2" + needs: duckdb-mirror + if: ${{ !cancelled() }} + runs-on: ubuntu-latest + timeout-minutes: 5 + steps: + - name: Verify DuckDB mirror + if: ${{ needs.duckdb-mirror.result == 'failure' }} + run: | + echo "DuckDB mirror failed" + exit 1 + rust-coverage: name: "Rust tests (coverage) (${{ matrix.suite }})" + needs: duckdb-ready timeout-minutes: 30 permissions: id-token: write From fdcb459050deccc6b66942702aa548a33489d8de Mon Sep 17 00:00:00 2001 From: Mikhail Kot Date: Thu, 18 Jun 2026 14:39:00 +0100 Subject: [PATCH 5/5] test commit build from source --- vortex-duckdb/build.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vortex-duckdb/build.rs b/vortex-duckdb/build.rs index d330fea3127..22ec8a7a746 100644 --- a/vortex-duckdb/build.rs +++ b/vortex-duckdb/build.rs @@ -23,7 +23,7 @@ const DUCKDB_RELEASES_URL: &str = "https://ci-builds.vortex.dev"; const DUCKDB_SOURCE_RELEASE_URL: &str = "https://github.com/duckdb/duckdb/archive/refs/tags"; const DUCKDB_SOURCE_COMMIT_URL: &str = "https://github.com/duckdb/duckdb/archive"; -const DEFAULT_DUCKDB_VERSION: &str = "1.5.3"; +const DEFAULT_DUCKDB_VERSION: &str = "08e34c447bae34eaee3723cac61f2878b6bdf787"; const BUILD_ARTIFACTS: [&str; 3] = ["libduckdb.dylib", "libduckdb.so", "libduckdb_static.a"];