Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions lib/sea/SeaArrowIpc.ts
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,19 @@ export function patchIpcBytes(ipcBytes: Buffer): Buffer {
function arrowTypeToTTypeId(field: Field<DataType>): TTypeId {
const typeName = field.metadata.get(DATABRICKS_TYPE_NAME)?.toUpperCase();

// `intervals_as_string` (set by the SEA backend for Thrift parity)
// renders INTERVAL columns as physical Arrow `Utf8` while the kernel
// keeps the `INTERVAL …` type_name metadata. The Thrift driver reports
// such string-rendered intervals as STRING (type 7), so honour the
// physical type here rather than the semantic metadata — otherwise the
// SEA path would report INTERVAL (20/21) and diverge from Thrift on a
// column whose values are already identical strings. Native interval
// encodings (the kernel default) are Duration / MonthInterval, never
// Utf8, so this guard is inert unless `intervals_as_string` is on.
if (typeName?.startsWith('INTERVAL') && DataType.isUtf8(field.type)) {
return TTypeId.STRING_TYPE;
}

switch (typeName) {
case 'BOOLEAN':
return TTypeId.BOOLEAN_TYPE;
Expand Down
25 changes: 25 additions & 0 deletions lib/sea/SeaAuth.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,24 @@ export interface SeaSessionDefaults {
catalog?: string;
schema?: string;
sessionConf?: Record<string, string>;
/**
* Render `INTERVAL` / `DURATION` result columns as strings
* (kernel `ResultConfig.intervals_as_string`). The kernel default is
* native Arrow `month_interval` / `duration[us]`, but the NodeJS
* Thrift driver surfaces intervals as strings — so the SEA path sets
* this `true` so its result shape is a byte-compatible drop-in for the
* Thrift backend. Omitting it falls back to the kernel's native types.
*/
intervalsAsString?: boolean;
/**
* Render complex (`ARRAY` / `MAP` / `STRUCT` / `VARIANT`) result
* columns as JSON strings (kernel `ResultConfig.complex_types_as_json`).
* Left unset on the SEA path: native Arrow nested types already decode
* identically to the Thrift backend through the shared Arrow converter,
* so forcing JSON here would *introduce* a divergence rather than
* remove one.
*/
complexTypesAsJson?: boolean;
}

export type SeaNativeConnectionOptions = SeaSessionDefaults &
Expand Down Expand Up @@ -161,6 +179,13 @@ export function buildSeaConnectionOptions(options: ConnectionOptions): SeaNative
const base = {
hostName: options.host,
httpPath: prependSlash(options.path),
// Match the NodeJS Thrift driver, which surfaces INTERVAL columns as
// strings. The kernel defaults to native Arrow interval/duration
// types; forcing the string rendering here keeps the SEA path a
// byte-compatible drop-in. Complex types are intentionally left at
// the kernel default (native Arrow) — they already decode identically
// to Thrift via the shared Arrow converter.
intervalsAsString: true,
};

const oauth = options as {
Expand Down
13 changes: 12 additions & 1 deletion lib/sea/SeaErrorMapping.ts
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,18 @@ export function mapKernelErrorToJsError(kErr: KernelErrorShape): ErrorWithSqlSta
error = new ParameterError(message);
break;

case 'SqlError':
// A server-side SQL execution failure (the statement reached an
// ERROR state on the warehouse — bad SQL, PERMISSION_DENIED,
// SCHEMA_ALREADY_EXISTS, …). The Thrift backend surfaces exactly
// this situation as an `OperationStateError(ERROR)` after polling
// the operation status, so we mirror that class here for
// drop-in parity (both extend HiveDriverError, so existing
// `catch (HiveDriverError)` callers are unaffected).
error = new OperationStateError(OperationStateErrorCode.Error);
error.message = message;
break;

// All remaining kernel ErrorCode variants map to the base driver error class.
// M0 intentionally does not introduce new error classes; M1 may add nuance.
case 'NotFound':
Expand All @@ -156,7 +168,6 @@ export function mapKernelErrorToJsError(kErr: KernelErrorShape): ErrorWithSqlSta
case 'Internal':
case 'InvalidStatementHandle':
case 'NetworkError':
case 'SqlError':
error = new HiveDriverError(message);
break;

Expand Down
114 changes: 114 additions & 0 deletions lib/sea/SeaInputValidation.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
// Copyright (c) 2026 Databricks, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import Int64 from 'node-int64';
import { DBSQLParameter, DBSQLParameterValue } from '../DBSQLParameter';
import ParameterError from '../errors/ParameterError';

/**
* Coerce an empty-string metadata argument to `undefined`.
*
* The kernel's `Identifier` / `LikePattern` reject empty strings with
* `InvalidArgument`, whereas the Thrift backend forwards `""` to the server
* which treats it as "unspecified" (match-all / session default). To keep the
* SEA metadata surface behaviourally identical to Thrift, the SEA adapter
* maps `""` → `undefined` before crossing the napi boundary so the kernel
* sees "argument omitted" rather than "empty identifier".
*/
export function emptyToUndefined(value: string | undefined | null): string | undefined {
return value == null || value === '' ? undefined : value;
}

/**
* Walk a SQL string counting `?` parameter markers, ignoring markers inside
* string literals (`'...'`, `"..."`), backtick-quoted identifiers, and
* comments (`-- ...`, `/* ... *​/`). Mirrors the kernel's
* `statement::params::count_parameter_markers` state machine so the JS-side
* arity check matches what the kernel binds.
*/
export function countParameterMarkers(sql: string): number {
let count = 0;
let i = 0;
const n = sql.length;
type State = 'normal' | 'single' | 'double' | 'backtick' | 'line' | 'block';
let state: State = 'normal';
while (i < n) {
const c = sql[i];
const next = i + 1 < n ? sql[i + 1] : '';
switch (state) {
case 'normal':
if (c === '?') {
count += 1;
} else if (c === "'") {
state = 'single';
} else if (c === '"') {
state = 'double';
} else if (c === '`') {
state = 'backtick';
} else if (c === '-' && next === '-') {
state = 'line';
i += 1;
} else if (c === '/' && next === '*') {
state = 'block';
i += 1;
}
break;
case 'single':
if (c === "'" && next === "'") i += 1; // escaped ''
else if (c === "'") state = 'normal';
break;
case 'double':
if (c === '"' && next === '"') i += 1; // escaped ""
else if (c === '"') state = 'normal';
break;
case 'backtick':
if (c === '`') state = 'normal';
break;
case 'line':
if (c === '\n') state = 'normal';
break;
case 'block':
if (c === '*' && next === '/') {
state = 'normal';
i += 1;
}
break;
}
i += 1;
}
return count;
}

/**
* Reject a parameter value that cannot be bound as a scalar. Arrays and plain
* objects stringify to garbage (e.g. `[1,2,3]` → `"1,2,3"`) that the server
* fails to coerce — on the Thrift path the operation never returns to
* FINISHED (a DoS hazard), and on SEA it surfaces an opaque server error. We
* fail fast at bind time instead, mirroring the kernel's compound-type
* rejection. `DBSQLParameter`, `Int64`, `Date`, and JS primitives are allowed.
*/
export function assertBindableValue(value: DBSQLParameter | DBSQLParameterValue, label: string): void {
if (value instanceof DBSQLParameter) return;
if (value === null || value === undefined) return;
if (Array.isArray(value)) {
throw new ParameterError(
`${label} is an array; compound types (ARRAY/MAP/STRUCT) are not bindable as a parameter value`,
);
}
if (typeof value === 'object' && !(value instanceof Date) && !(value instanceof Int64)) {
throw new ParameterError(
`${label} is an object; only scalar values (string/number/bigint/boolean), Date, and Int64 are bindable`,
);
}
}
44 changes: 41 additions & 3 deletions lib/sea/SeaNativeLoader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,51 @@ export interface SeaNativeStatement {
* napi-rs emits `string | undefined | null` for every Rust `Option<String>`
* parameter — both `undefined` and `null` are accepted at the call site.
*/
/**
* A single positional bound parameter in the napi `{ sqlType, value? }`
* shape the kernel's param codec (`parse_typed_value`) accepts. `sqlType`
* is the Databricks SQL type name (`INT`, `STRING`, `TIMESTAMP`, … and the
* parenthesised `DECIMAL(p,s)`); a missing `value` is SQL NULL. Built by
* `SeaPositionalParams.buildSeaPositionalParams`.
*/
export interface SeaNativeTypedValueInput {
sqlType: string;
value?: string;
}

/**
* A single named bound parameter — a `SeaNativeTypedValueInput` plus its
* `:name`. Maps to the kernel's `param_named`. Built by
* `SeaPositionalParams.buildSeaNamedParams`.
*/
export interface SeaNativeNamedTypedValueInput {
name: string;
sqlType: string;
value?: string;
}

/**
* Per-statement options accepted by the napi `executeStatement`. Matches
* the kernel `ExecuteOptions`. All fields optional; an omitted/empty
* object is the no-options fast path.
*/
export interface SeaNativeExecuteOptions {
statementConf?: Record<string, string>;
queryTags?: Record<string, string>;
rowLimit?: number;
queryTimeoutSecs?: number;
positionalParams?: Array<SeaNativeTypedValueInput>;
namedParams?: Array<SeaNativeNamedTypedValueInput>;
}

export interface SeaNativeConnection {
/**
* Execute a SQL statement. Catalog / schema / sessionConf are
* session-level — set on `openSession`, applied to every statement
* executed on the resulting `Connection`. No per-statement options.
* session-level — set on `openSession`. Per-statement options (bound
* positional parameters, row limit, query timeout, tags) ride on the
* optional `options` argument.
*/
executeStatement(sql: string): Promise<SeaNativeStatement>;
executeStatement(sql: string, options?: SeaNativeExecuteOptions): Promise<SeaNativeStatement>;

// ── Metadata methods ──────────────────────────────────────────────────
/** All catalogs visible to the session. */
Expand Down
99 changes: 99 additions & 0 deletions lib/sea/SeaPositionalParams.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
// Copyright (c) 2026 Databricks, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

import { DBSQLParameter, DBSQLParameterValue } from '../DBSQLParameter';
import { SeaNativeTypedValueInput, SeaNativeNamedTypedValueInput } from './SeaNativeLoader';
import { assertBindableValue } from './SeaInputValidation';

/**
* Derive `(precision,scale)` from a decimal value string for the SEA
* `DECIMAL(p,s)` type name — the kernel param codec requires the
* parenthesised form (plain `"DECIMAL"` is rejected) so it can preserve
* the caller's fractional digits. `"99.99"` ⇒ `"4,2"`; `"-123"` ⇒ `"3,0"`.
* Clamped to the Databricks max precision of 38.
*/
function decimalPrecisionScale(v: string): string {
const digits = (v.match(/\d/g) ?? []).length;
const dot = v.indexOf('.');
const scale = dot < 0 ? 0 : (v.slice(dot + 1).match(/\d/g) ?? []).length;
const precision = Math.min(Math.max(digits, 1), 38);
return `${precision},${Math.min(scale, precision)}`;
}

/**
* Reduce a `DBSQLParameter | DBSQLParameterValue` to the napi
* `TypedValueInput` (`{ sqlType, value? }`) the kernel's positional-param
* codec (`parse_typed_value`) accepts. Reuses `DBSQLParameter.toSparkParameter`
* — the same type-inference + value-stringification the Thrift backend uses —
* then adapts the type name to the codec's expectations:
* - DECIMAL → `DECIMAL(p,s)` (parenthesised form required)
* - INTERVAL * → `INTERVAL` (the codec's single interval type name)
* - a missing value ⇒ SQL NULL (`parse_typed_value` maps `value: None` to NULL).
*/
function toTypedValueInput(value: DBSQLParameter | DBSQLParameterValue): SeaNativeTypedValueInput {
const param = value instanceof DBSQLParameter ? value : new DBSQLParameter({ value });
const spark = param.toSparkParameter();
const stringValue = spark.value?.stringValue ?? undefined;

// NULL: no value (and `VOID` ignores any type), matching toSparkParameter's
// type/value-less shape for null/undefined.
if (stringValue === undefined || stringValue === null) {
return { sqlType: 'VOID' };
}

let sqlType = spark.type ?? 'STRING';
const upper = sqlType.toUpperCase();
if (upper === 'DECIMAL') {
sqlType = `DECIMAL(${decimalPrecisionScale(stringValue)})`;
} else if (upper.startsWith('INTERVAL')) {
sqlType = 'INTERVAL';
}
return { sqlType, value: stringValue };
}

/**
* Convert the public `ordinalParameters` option into the napi
* `positionalParams` array (1-based `?` placeholders). Returns `undefined`
* when none were supplied, so the caller can keep the minimal no-options
* call shape.
*/
export function buildSeaPositionalParams(
ordinalParameters?: Array<DBSQLParameter | DBSQLParameterValue>,
): Array<SeaNativeTypedValueInput> | undefined {
if (ordinalParameters === undefined || ordinalParameters.length === 0) {
return undefined;
}
return ordinalParameters.map((value, i) => {
assertBindableValue(value, `ordinalParameters[${i}]`);
return toTypedValueInput(value);
});
}

/**
* Convert the public `namedParameters` option (`Record<name, value>`) into
* the napi `namedParams` array (`:name` placeholders). Each value reuses the
* same `toTypedValueInput` mapping (DECIMAL → DECIMAL(p,s), NULL → VOID, …),
* then carries its name. Returns `undefined` when none were supplied.
*/
export function buildSeaNamedParams(
namedParameters?: Record<string, DBSQLParameter | DBSQLParameterValue>,
): Array<SeaNativeNamedTypedValueInput> | undefined {
if (namedParameters === undefined || Object.keys(namedParameters).length === 0) {
return undefined;
}
return Object.keys(namedParameters).map((name) => {
assertBindableValue(namedParameters[name], `namedParameters[${name}]`);
return { name, ...toTypedValueInput(namedParameters[name]) };
});
}
Loading
Loading