add duckdb-ui-client & other ts pkgs (#10)

* add duckdb-ui-client & other ts pkgs * workflow fixes * fix working dir * no sparse checkout; specify package.json path * path to pnpm-lock.yaml * add check & build test * workflow step descriptions * use comments & names * one more naming tweak
2025-06-13 09:06:55 -07:00
parent d6cc9eeea4
commit 0edb52054a
133 changed files with 11112 additions and 4 deletions
--- a/ts/pkgs/duckdb-data-reader/package.json
+++ b/ts/pkgs/duckdb-data-reader/package.json
@@ -0,0 +1,38 @@
+{
+  "name": "@duckdb/data-reader",
+  "version": "0.0.1",
+  "description": "Utilities for representing and reading tabular data returned by DuckDB",
+  "type": "module",
+  "main": "./out/index.js",
+  "module": "./out/index.js",
+  "types": "./out/index.d.ts",
+  "scripts": {
+    "preinstall": "pnpm build:src",
+    "build": "tsc -b src test",
+    "build:src": "tsc -b src",
+    "build:test": "tsc -b test",
+    "build:watch": "tsc -b src test --watch",
+    "check": "pnpm format:check && pnpm lint",
+    "clean": "rimraf out",
+    "format:check": "prettier . --ignore-path $(find-up .prettierignore) --check",
+    "format:write": "prettier . --ignore-path $(find-up .prettierignore) --write",
+    "lint": "pnpm eslint src test",
+    "test": "vitest run",
+    "test:watch": "vitest"
+  },
+  "dependencies": {
+    "@duckdb/data-types": "workspace:*",
+    "@duckdb/data-values": "workspace:*"
+  },
+  "devDependencies": {
+    "@eslint/js": "^9.24.0",
+    "eslint": "^9.24.0",
+    "find-up-cli": "^6.0.0",
+    "prettier": "^3.5.3",
+    "rimraf": "^6.0.1",
+    "typescript": "^5.8.3",
+    "typescript-eslint": "^8.30.1",
+    "vite": "^6.2.6",
+    "vitest": "^3.1.1"
+  }
+}
--- a/ts/pkgs/duckdb-data-reader/src/AsyncDuckDBDataBatchIterator.ts
+++ b/ts/pkgs/duckdb-data-reader/src/AsyncDuckDBDataBatchIterator.ts
@@ -0,0 +1,11 @@
+import { DuckDBData } from './DuckDBData.js';
+
+export type DuckDBDataBatchIteratorResult = IteratorResult<
+  DuckDBData,
+  DuckDBData | undefined
+>;
+
+export type AsyncDuckDBDataBatchIterator = AsyncIterator<
+  DuckDBData,
+  DuckDBData | undefined
+>;
--- a/ts/pkgs/duckdb-data-reader/src/ColumnFilteredDuckDBData.ts
+++ b/ts/pkgs/duckdb-data-reader/src/ColumnFilteredDuckDBData.ts
@@ -0,0 +1,55 @@
+import { DuckDBType } from '@duckdb/data-types';
+import { DuckDBValue } from '@duckdb/data-values';
+import { DuckDBData } from './DuckDBData.js';
+
+export class ColumnFilteredDuckDBData extends DuckDBData {
+  private readonly inputColumnIndexForOutputColumnIndex: readonly number[];
+
+  constructor(
+    private data: DuckDBData,
+    columnVisibility: readonly boolean[],
+  ) {
+    super();
+
+    const inputColumnIndexForOutputColumnIndex: number[] = [];
+    const inputColumnCount = data.columnCount;
+    let inputIndex = 0;
+    while (inputIndex < inputColumnCount) {
+      while (inputIndex < inputColumnCount && !columnVisibility[inputIndex]) {
+        inputIndex++;
+      }
+      if (inputIndex < inputColumnCount) {
+        inputColumnIndexForOutputColumnIndex.push(inputIndex++);
+      }
+    }
+    this.inputColumnIndexForOutputColumnIndex =
+      inputColumnIndexForOutputColumnIndex;
+  }
+
+  get columnCount() {
+    return this.inputColumnIndexForOutputColumnIndex.length;
+  }
+
+  get rowCount() {
+    return this.data.rowCount;
+  }
+
+  columnName(columnIndex: number): string {
+    return this.data.columnName(
+      this.inputColumnIndexForOutputColumnIndex[columnIndex],
+    );
+  }
+
+  columnType(columnIndex: number): DuckDBType {
+    return this.data.columnType(
+      this.inputColumnIndexForOutputColumnIndex[columnIndex],
+    );
+  }
+
+  value(columnIndex: number, rowIndex: number): DuckDBValue {
+    return this.data.value(
+      this.inputColumnIndexForOutputColumnIndex[columnIndex],
+      rowIndex,
+    );
+  }
+}
--- a/ts/pkgs/duckdb-data-reader/src/DuckDBData.ts
+++ b/ts/pkgs/duckdb-data-reader/src/DuckDBData.ts
@@ -0,0 +1,114 @@
+import { DuckDBType } from '@duckdb/data-types';
+import { DuckDBValue } from '@duckdb/data-values';
+import { DuckDBRow } from './DuckDBRow.js';
+
+/**
+ * A two-dimensional table of data along with column metadata.
+ *
+ * May represent either a partial or full result set, or a batch of rows read from a result stream.
+ * */
+export abstract class DuckDBData {
+  /**
+   * Number of columns.
+   *
+   * May be zero until the first part of the result is read. Will not change after the initial read.
+   */
+  abstract get columnCount(): number;
+
+  /**
+   * Current number of rows.
+   *
+   * For a partial result set, this may change as more rows are read.
+   * For a full result, or a batch, this will not change.
+   */
+  abstract get rowCount(): number;
+
+  /**
+   * Returns the name of column at the given index (starting at zero).
+   *
+   * Note that duplicate column names are possible.
+   */
+  abstract columnName(columnIndex: number): string;
+
+  /**
+   * Returns the type of the column at the given index (starting at zero).
+   */
+  abstract columnType(columnIndex: number): DuckDBType;
+
+  /**
+   * Returns the value for the given column and row. Both are zero-indexed.
+   */
+  abstract value(columnIndex: number, rowIndex: number): DuckDBValue;
+
+  /**
+   * Returns the single value, assuming exactly one column and row. Throws otherwise.
+   */
+  singleValue(): DuckDBValue {
+    const { columnCount, rowCount } = this;
+    if (columnCount === 0) {
+      throw Error('no column data');
+    }
+    if (rowCount === 0) {
+      throw Error('no rows');
+    }
+    if (columnCount > 1) {
+      throw Error('more than one column');
+    }
+    if (rowCount > 1) {
+      throw Error('more than one row');
+    }
+    return this.value(0, 0);
+  }
+
+  /**
+   * Returns the column names as an array.
+   */
+  columnNames(): readonly string[] {
+    const { columnCount } = this;
+    const outputColumnNames: string[] = [];
+    for (let columnIndex = 0; columnIndex < columnCount; columnIndex++) {
+      outputColumnNames.push(this.columnName(columnIndex));
+    }
+    return outputColumnNames;
+  }
+
+  /**
+   * Returns the column names as an array, deduplicated following DuckDB's "Auto-Increment Duplicate Column Names"
+   * behavior.
+   */
+  deduplicatedColumnNames(): readonly string[] {
+    const { columnCount } = this;
+    const outputColumnNames: string[] = [];
+    const columnNameCount: { [columnName: string]: number } = {};
+    for (let columnIndex = 0; columnIndex < columnCount; columnIndex++) {
+      const inputColumnName = this.columnName(columnIndex);
+      const nameCount = (columnNameCount[inputColumnName] || 0) + 1;
+      columnNameCount[inputColumnName] = nameCount;
+      if (nameCount > 1) {
+        outputColumnNames.push(`${inputColumnName}:${nameCount - 1}`);
+      } else {
+        outputColumnNames.push(inputColumnName);
+      }
+    }
+    return outputColumnNames;
+  }
+
+  /**
+   * Returns the data as an array of row objects, keyed by column names.
+   *
+   * The column names are deduplicated following DuckDB's "Auto-Increment Duplicate Column Names" behavior.
+   */
+  toRows(): readonly DuckDBRow[] {
+    const { rowCount, columnCount } = this;
+    const outputColumnNames = this.deduplicatedColumnNames();
+    const outputRows: DuckDBRow[] = [];
+    for (let rowIndex = 0; rowIndex < rowCount; rowIndex++) {
+      const row: { [columnName: string]: DuckDBValue } = {};
+      for (let columnIndex = 0; columnIndex < columnCount; columnIndex++) {
+        row[outputColumnNames[columnIndex]] = this.value(columnIndex, rowIndex);
+      }
+      outputRows.push(row);
+    }
+    return outputRows;
+  }
+}
--- a/ts/pkgs/duckdb-data-reader/src/DuckDBDataReader.ts
+++ b/ts/pkgs/duckdb-data-reader/src/DuckDBDataReader.ts
@@ -0,0 +1,179 @@
+import { DuckDBType } from '@duckdb/data-types';
+import { DuckDBValue } from '@duckdb/data-values';
+import { AsyncDuckDBDataBatchIterator } from './AsyncDuckDBDataBatchIterator.js';
+import { DuckDBData } from './DuckDBData.js';
+
+// Stores information about a run of similarly-sized batches.
+interface BatchSizeRun {
+  batchCount: number;
+  batchSize: number;
+  rowCount: number; // Always equal to batchCount * batchSize. Precalculated for efficiency.
+}
+
+/**
+ * A result set that can be read incrementally.
+ *
+ * Represents either a partial or full result.
+ * For full results, the `done` property will be true.
+ * To read more rows into a partial result, use the `readUntil` or `readAll` methods.
+ */
+export class DuckDBDataReader extends DuckDBData {
+  private readonly iterator: AsyncDuckDBDataBatchIterator;
+
+  private iteratorDone: boolean = false;
+
+  private totalRowsRead: number = 0;
+
+  private readonly batches: DuckDBData[] = [];
+
+  // Stores the sizes of the batches using run-length encoding to make lookup efficient.
+  // Since batches before the last should be a consistent size, this array is not expected to grow beyond length 2.
+  // (One run for the N-1 batches of consistent size, plus one run for the differently-size last batch, if any.)
+  private readonly batchSizeRuns: BatchSizeRun[] = [];
+
+  constructor(iterator: AsyncDuckDBDataBatchIterator) {
+    super();
+    this.iterator = iterator;
+  }
+
+  /**
+   * Number of columns.
+   *
+   * Will be zero until the first part of the result is read. Will not change after the initial read.
+   */
+  public get columnCount(): number {
+    if (this.batches.length === 0) {
+      return 0;
+    }
+    return this.batches[0].columnCount;
+  }
+
+  /**
+   * Current number of rows.
+   *
+   * For a partial result set, with `done` false, this may change as more rows are read.
+   * For a full result, with `done` true, this will not change.
+   */
+  public get rowCount(): number {
+    return this.totalRowsRead;
+  }
+
+  /**
+   * Returns the name of column at the given index (starting at zero).
+   *
+   * Note that duplicate column names are possible.
+   *
+   * Will return an error if no part of the result has been read yet.
+   */
+  public columnName(columnIndex: number): string {
+    if (this.batches.length === 0) {
+      throw Error('no column data');
+    }
+    return this.batches[0].columnName(columnIndex);
+  }
+
+  /**
+   * Returns the type of the column at the given index (starting at zero).
+   *
+   * Will return an error if no part of the result has been read yet.
+   */
+  public columnType(columnIndex: number): DuckDBType {
+    if (this.batches.length === 0) {
+      throw Error('no column data');
+    }
+    return this.batches[0].columnType(columnIndex);
+  }
+
+  /**
+   * Returns the value for the given column and row. Both are zero-indexed.
+   *
+   * Will return an error if `rowIndex` is not less than the current `rowCount`.
+   */
+  public value(columnIndex: number, rowIndex: number): DuckDBValue {
+    if (this.totalRowsRead === 0) {
+      throw Error('no data');
+    }
+    let batchIndex = 0;
+    let currentRowIndex = rowIndex;
+    // Find which run of batches our row is in.
+    // Since batchSizeRuns shouldn't ever be longer than 2, this should be O(1).
+    for (const run of this.batchSizeRuns) {
+      if (currentRowIndex < run.rowCount) {
+        // The row we're looking for is in this run.
+        // Calculate the batch index and the row index in that batch.
+        batchIndex += Math.floor(currentRowIndex / run.batchSize);
+        const rowIndexInBatch = currentRowIndex % run.batchSize;
+        const batch = this.batches[batchIndex];
+        return batch.value(columnIndex, rowIndexInBatch);
+      }
+      // The row we're looking for is not in this run.
+      // Update our counts for this run and move to the next one.
+      batchIndex += run.batchCount;
+      currentRowIndex -= run.rowCount;
+    }
+    // We didn't find our row. It must have been out of range.
+    throw Error(
+      `Row index ${rowIndex} requested, but only ${this.totalRowsRead} row have been read so far.`,
+    );
+  }
+
+  /**
+   * Returns true if all rows have been read.
+   */
+  public get done(): boolean {
+    return this.iteratorDone;
+  }
+
+  /**
+   * Read all rows.
+   */
+  public async readAll(): Promise<void> {
+    return this.read();
+  }
+
+  /**
+   * Read rows until at least the given target row count has been met.
+   *
+   * Note that the resulting row count could be greater than the target, since rows are read in batches, typically of 2048 rows each.
+   */
+  public async readUntil(targetRowCount: number): Promise<void> {
+    return this.read(targetRowCount);
+  }
+
+  private async read(targetRowCount?: number): Promise<void> {
+    while (
+      !(
+        this.iteratorDone ||
+        (targetRowCount !== undefined && this.totalRowsRead >= targetRowCount)
+      )
+    ) {
+      const { value, done } = await this.iterator.next();
+      if (value) {
+        this.updateBatchSizeRuns(value);
+        this.batches.push(value);
+        this.totalRowsRead += value.rowCount;
+      }
+      if (done) {
+        this.iteratorDone = done;
+      }
+    }
+  }
+
+  private updateBatchSizeRuns(batch: DuckDBData) {
+    if (this.batchSizeRuns.length > 0) {
+      const lastRun = this.batchSizeRuns[this.batchSizeRuns.length - 1];
+      if (lastRun.batchSize === batch.rowCount) {
+        // If the new batch is the same size as the last one, just update our last run.
+        lastRun.batchCount += 1;
+        lastRun.rowCount += lastRun.batchSize;
+        return;
+      }
+    }
+    // If this is our first batch, or it's a different size, create a new run.
+    this.batchSizeRuns.push({
+      batchCount: 1,
+      batchSize: batch.rowCount,
+      rowCount: batch.rowCount,
+    });
+  }
+}
--- a/ts/pkgs/duckdb-data-reader/src/DuckDBRow.ts
+++ b/ts/pkgs/duckdb-data-reader/src/DuckDBRow.ts
@@ -0,0 +1,5 @@
+import { DuckDBValue } from '@duckdb/data-values';
+
+export interface DuckDBRow {
+  readonly [columnName: string]: DuckDBValue;
+}
--- a/ts/pkgs/duckdb-data-reader/src/MemoryDuckDBData.ts
+++ b/ts/pkgs/duckdb-data-reader/src/MemoryDuckDBData.ts
@@ -0,0 +1,32 @@
+import { DuckDBType } from '@duckdb/data-types';
+import { DuckDBValue } from '@duckdb/data-values';
+import { DuckDBData } from './DuckDBData.js';
+
+export class MemoryDuckDBData extends DuckDBData {
+  constructor(
+    private columns: { name: string; type: DuckDBType }[],
+    private values: DuckDBValue[][],
+  ) {
+    super();
+  }
+
+  get columnCount() {
+    return this.columns.length;
+  }
+
+  get rowCount() {
+    return this.values.length > 0 ? this.values[0].length : 0;
+  }
+
+  columnName(columnIndex: number): string {
+    return this.columns[columnIndex].name;
+  }
+
+  columnType(columnIndex: number): DuckDBType {
+    return this.columns[columnIndex].type;
+  }
+
+  value(columnIndex: number, rowIndex: number): DuckDBValue {
+    return this.values[columnIndex][rowIndex];
+  }
+}
--- a/ts/pkgs/duckdb-data-reader/src/index.ts
+++ b/ts/pkgs/duckdb-data-reader/src/index.ts
@@ -0,0 +1,6 @@
+export * from './AsyncDuckDBDataBatchIterator.js';
+export * from './ColumnFilteredDuckDBData.js';
+export * from './DuckDBData.js';
+export * from './DuckDBDataReader.js';
+export * from './DuckDBRow.js';
+export * from './MemoryDuckDBData.js';
--- a/ts/pkgs/duckdb-data-reader/src/tsconfig.json
+++ b/ts/pkgs/duckdb-data-reader/src/tsconfig.json
@@ -0,0 +1,6 @@
+{
+  "extends": "../../../tsconfig.library.json",
+  "compilerOptions": {
+    "outDir": "../out"
+  }
+}
--- a/ts/pkgs/duckdb-data-reader/test/DuckDBDataReader.test.ts
+++ b/ts/pkgs/duckdb-data-reader/test/DuckDBDataReader.test.ts
@@ -0,0 +1,177 @@
+import { DuckDBType, INTEGER, VARCHAR } from '@duckdb/data-types';
+import { DuckDBValue } from '@duckdb/data-values';
+import { expect, suite, test } from 'vitest';
+import {
+  AsyncDuckDBDataBatchIterator,
+  DuckDBData,
+  DuckDBDataReader,
+  MemoryDuckDBData,
+} from '../src';
+
+const ITERATOR_DONE = Object.freeze({ done: true, value: undefined });
+
+class TestAsyncDuckDBDataBatchIterator implements AsyncDuckDBDataBatchIterator {
+  private batches: readonly DuckDBData[];
+
+  private nextBatchIndex: number | null;
+
+  constructor(batches: readonly DuckDBData[]) {
+    this.batches = batches;
+    this.nextBatchIndex = this.batches.length > 0 ? 0 : null;
+  }
+
+  async next(): Promise<IteratorResult<DuckDBData, undefined>> {
+    if (this.nextBatchIndex == null) {
+      return ITERATOR_DONE;
+    }
+    const nextBatch = this.batches[this.nextBatchIndex++];
+    if (this.nextBatchIndex >= this.batches.length) {
+      this.nextBatchIndex = null;
+    }
+    return {
+      done: this.nextBatchIndex == null,
+      value: nextBatch,
+    } as IteratorResult<DuckDBData, undefined>;
+  }
+
+  async return(): Promise<IteratorResult<DuckDBData, undefined>> {
+    return ITERATOR_DONE;
+  }
+
+  async throw(_err: Error): Promise<IteratorResult<DuckDBData, undefined>> {
+    return ITERATOR_DONE;
+  }
+
+  [Symbol.asyncIterator](): AsyncDuckDBDataBatchIterator {
+    return this;
+  }
+}
+
+function expectColumns(
+  data: DuckDBData,
+  columns: { name: string; type: DuckDBType }[],
+) {
+  expect(data.columnCount).toBe(columns.length);
+  for (let columnIndex = 0; columnIndex < columns.length; columnIndex++) {
+    const column = columns[columnIndex];
+    expect(data.columnName(columnIndex)).toBe(column.name);
+    expect(data.columnType(columnIndex)).toStrictEqual(column.type);
+  }
+}
+
+function expectValues(data: DuckDBData, values: DuckDBValue[][]) {
+  for (let columnIndex = 0; columnIndex < values.length; columnIndex++) {
+    const column = values[columnIndex];
+    for (let rowIndex = 0; rowIndex < column.length; rowIndex++) {
+      expect(data.value(columnIndex, rowIndex)).toBe(column[rowIndex]);
+    }
+  }
+}
+
+suite('DuckDBDataReader', () => {
+  test('should work for an empty batch list', async () => {
+    const batches: DuckDBData[] = [];
+    const iterator = new TestAsyncDuckDBDataBatchIterator(batches);
+    const reader = new DuckDBDataReader(iterator);
+    expect(reader.done).toBe(false);
+    expect(reader.columnCount).toBe(0);
+    expect(reader.rowCount).toBe(0);
+    await reader.readAll();
+    expect(reader.done).toBe(true);
+    expect(reader.columnCount).toBe(0);
+    expect(reader.rowCount).toBe(0);
+  });
+  test('should work for a single batch', async () => {
+    const columns = [
+      { name: 'num', type: INTEGER },
+      { name: 'str', type: VARCHAR },
+    ];
+    const values = [
+      [2, 3, 5],
+      ['z', 'y', 'x'],
+    ];
+    const batches: DuckDBData[] = [new MemoryDuckDBData(columns, values)];
+    const iterator = new TestAsyncDuckDBDataBatchIterator(batches);
+    const reader = new DuckDBDataReader(iterator);
+    expect(reader.done).toBe(false);
+    expect(reader.columnCount).toBe(0);
+    expect(reader.rowCount).toBe(0);
+    await reader.readAll();
+    expect(reader.done).toBe(true);
+    expectColumns(reader, columns);
+    expect(reader.rowCount).toBe(3);
+    expectValues(reader, values);
+  });
+  test('should work for multiple batches', async () => {
+    const columns = [
+      { name: 'num', type: INTEGER },
+      { name: 'str', type: VARCHAR },
+    ];
+    const values = [
+      [12, 13, 15, 22, 23, 25, 32, 33, 35],
+      ['z1', 'y1', 'x1', 'z2', 'y2', 'x2', 'z3', 'y3', 'x3'],
+    ];
+    const batches: DuckDBData[] = [
+      new MemoryDuckDBData(columns, [
+        values[0].slice(0, 3),
+        values[1].slice(0, 3),
+      ]),
+      new MemoryDuckDBData(columns, [
+        values[0].slice(3, 6),
+        values[1].slice(3, 6),
+      ]),
+      new MemoryDuckDBData(columns, [
+        values[0].slice(6, 9),
+        values[1].slice(6, 9),
+      ]),
+    ];
+    const iterator = new TestAsyncDuckDBDataBatchIterator(batches);
+    const reader = new DuckDBDataReader(iterator);
+    expect(reader.done).toBe(false);
+    expect(reader.columnCount).toBe(0);
+    expect(reader.rowCount).toBe(0);
+    await reader.readAll();
+    expect(reader.done).toBe(true);
+    expectColumns(reader, columns);
+    expect(reader.rowCount).toBe(9);
+    expectValues(reader, values);
+  });
+  test('should work for partial reads of multiple batches', async () => {
+    const columns = [
+      { name: 'num', type: INTEGER },
+      { name: 'str', type: VARCHAR },
+    ];
+    const values = [
+      [12, 13, 15, 22, 23, 25, 32, 33],
+      ['z1', 'y1', 'x1', 'z2', 'y2', 'x2', 'z3', 'y3'],
+    ];
+    const batches: DuckDBData[] = [
+      new MemoryDuckDBData(columns, [
+        values[0].slice(0, 3),
+        values[1].slice(0, 3),
+      ]),
+      new MemoryDuckDBData(columns, [
+        values[0].slice(3, 6),
+        values[1].slice(3, 6),
+      ]),
+      new MemoryDuckDBData(columns, [
+        values[0].slice(6, 8),
+        values[1].slice(6, 8),
+      ]),
+    ];
+    const iterator = new TestAsyncDuckDBDataBatchIterator(batches);
+    const reader = new DuckDBDataReader(iterator);
+    expect(reader.done).toBe(false);
+    expect(reader.columnCount).toBe(0);
+    expect(reader.rowCount).toBe(0);
+    await reader.readUntil(5);
+    expect(reader.done).toBe(false);
+    expectColumns(reader, columns);
+    expect(reader.rowCount).toBe(6);
+    expectValues(reader, [values[0].slice(0, 6), values[1].slice(0, 6)]);
+    await reader.readUntil(10);
+    expect(reader.done).toBe(true);
+    expect(reader.rowCount).toBe(8);
+    expectValues(reader, values);
+  });
+});
--- a/ts/pkgs/duckdb-data-reader/test/tsconfig.json
+++ b/ts/pkgs/duckdb-data-reader/test/tsconfig.json
@@ -0,0 +1,6 @@
+{
+  "extends": "../../../tsconfig.test.json",
+  "references": [
+    { "path": "../src" }
+  ]
+}