add duckdb-ui-client & other ts pkgs (#10)

* add duckdb-ui-client & other ts pkgs

* workflow fixes

* fix working dir

* no sparse checkout; specify package.json path

* path to pnpm-lock.yaml

* add check & build test

* workflow step descriptions

* use comments & names

* one more naming tweak
This commit is contained in:
Jeff Raymakers
2025-06-13 09:06:55 -07:00
parent d6cc9eeea4
commit 0edb52054a
133 changed files with 11112 additions and 4 deletions

View File

@@ -0,0 +1,38 @@
{
"name": "@duckdb/data-reader",
"version": "0.0.1",
"description": "Utilities for representing and reading tabular data returned by DuckDB",
"type": "module",
"main": "./out/index.js",
"module": "./out/index.js",
"types": "./out/index.d.ts",
"scripts": {
"preinstall": "pnpm build:src",
"build": "tsc -b src test",
"build:src": "tsc -b src",
"build:test": "tsc -b test",
"build:watch": "tsc -b src test --watch",
"check": "pnpm format:check && pnpm lint",
"clean": "rimraf out",
"format:check": "prettier . --ignore-path $(find-up .prettierignore) --check",
"format:write": "prettier . --ignore-path $(find-up .prettierignore) --write",
"lint": "pnpm eslint src test",
"test": "vitest run",
"test:watch": "vitest"
},
"dependencies": {
"@duckdb/data-types": "workspace:*",
"@duckdb/data-values": "workspace:*"
},
"devDependencies": {
"@eslint/js": "^9.24.0",
"eslint": "^9.24.0",
"find-up-cli": "^6.0.0",
"prettier": "^3.5.3",
"rimraf": "^6.0.1",
"typescript": "^5.8.3",
"typescript-eslint": "^8.30.1",
"vite": "^6.2.6",
"vitest": "^3.1.1"
}
}

View File

@@ -0,0 +1,11 @@
import { DuckDBData } from './DuckDBData.js';
export type DuckDBDataBatchIteratorResult = IteratorResult<
DuckDBData,
DuckDBData | undefined
>;
export type AsyncDuckDBDataBatchIterator = AsyncIterator<
DuckDBData,
DuckDBData | undefined
>;

View File

@@ -0,0 +1,55 @@
import { DuckDBType } from '@duckdb/data-types';
import { DuckDBValue } from '@duckdb/data-values';
import { DuckDBData } from './DuckDBData.js';
export class ColumnFilteredDuckDBData extends DuckDBData {
private readonly inputColumnIndexForOutputColumnIndex: readonly number[];
constructor(
private data: DuckDBData,
columnVisibility: readonly boolean[],
) {
super();
const inputColumnIndexForOutputColumnIndex: number[] = [];
const inputColumnCount = data.columnCount;
let inputIndex = 0;
while (inputIndex < inputColumnCount) {
while (inputIndex < inputColumnCount && !columnVisibility[inputIndex]) {
inputIndex++;
}
if (inputIndex < inputColumnCount) {
inputColumnIndexForOutputColumnIndex.push(inputIndex++);
}
}
this.inputColumnIndexForOutputColumnIndex =
inputColumnIndexForOutputColumnIndex;
}
get columnCount() {
return this.inputColumnIndexForOutputColumnIndex.length;
}
get rowCount() {
return this.data.rowCount;
}
columnName(columnIndex: number): string {
return this.data.columnName(
this.inputColumnIndexForOutputColumnIndex[columnIndex],
);
}
columnType(columnIndex: number): DuckDBType {
return this.data.columnType(
this.inputColumnIndexForOutputColumnIndex[columnIndex],
);
}
value(columnIndex: number, rowIndex: number): DuckDBValue {
return this.data.value(
this.inputColumnIndexForOutputColumnIndex[columnIndex],
rowIndex,
);
}
}

View File

@@ -0,0 +1,114 @@
import { DuckDBType } from '@duckdb/data-types';
import { DuckDBValue } from '@duckdb/data-values';
import { DuckDBRow } from './DuckDBRow.js';
/**
* A two-dimensional table of data along with column metadata.
*
* May represent either a partial or full result set, or a batch of rows read from a result stream.
* */
export abstract class DuckDBData {
/**
* Number of columns.
*
* May be zero until the first part of the result is read. Will not change after the initial read.
*/
abstract get columnCount(): number;
/**
* Current number of rows.
*
* For a partial result set, this may change as more rows are read.
* For a full result, or a batch, this will not change.
*/
abstract get rowCount(): number;
/**
* Returns the name of column at the given index (starting at zero).
*
* Note that duplicate column names are possible.
*/
abstract columnName(columnIndex: number): string;
/**
* Returns the type of the column at the given index (starting at zero).
*/
abstract columnType(columnIndex: number): DuckDBType;
/**
* Returns the value for the given column and row. Both are zero-indexed.
*/
abstract value(columnIndex: number, rowIndex: number): DuckDBValue;
/**
* Returns the single value, assuming exactly one column and row. Throws otherwise.
*/
singleValue(): DuckDBValue {
const { columnCount, rowCount } = this;
if (columnCount === 0) {
throw Error('no column data');
}
if (rowCount === 0) {
throw Error('no rows');
}
if (columnCount > 1) {
throw Error('more than one column');
}
if (rowCount > 1) {
throw Error('more than one row');
}
return this.value(0, 0);
}
/**
* Returns the column names as an array.
*/
columnNames(): readonly string[] {
const { columnCount } = this;
const outputColumnNames: string[] = [];
for (let columnIndex = 0; columnIndex < columnCount; columnIndex++) {
outputColumnNames.push(this.columnName(columnIndex));
}
return outputColumnNames;
}
/**
* Returns the column names as an array, deduplicated following DuckDB's "Auto-Increment Duplicate Column Names"
* behavior.
*/
deduplicatedColumnNames(): readonly string[] {
const { columnCount } = this;
const outputColumnNames: string[] = [];
const columnNameCount: { [columnName: string]: number } = {};
for (let columnIndex = 0; columnIndex < columnCount; columnIndex++) {
const inputColumnName = this.columnName(columnIndex);
const nameCount = (columnNameCount[inputColumnName] || 0) + 1;
columnNameCount[inputColumnName] = nameCount;
if (nameCount > 1) {
outputColumnNames.push(`${inputColumnName}:${nameCount - 1}`);
} else {
outputColumnNames.push(inputColumnName);
}
}
return outputColumnNames;
}
/**
* Returns the data as an array of row objects, keyed by column names.
*
* The column names are deduplicated following DuckDB's "Auto-Increment Duplicate Column Names" behavior.
*/
toRows(): readonly DuckDBRow[] {
const { rowCount, columnCount } = this;
const outputColumnNames = this.deduplicatedColumnNames();
const outputRows: DuckDBRow[] = [];
for (let rowIndex = 0; rowIndex < rowCount; rowIndex++) {
const row: { [columnName: string]: DuckDBValue } = {};
for (let columnIndex = 0; columnIndex < columnCount; columnIndex++) {
row[outputColumnNames[columnIndex]] = this.value(columnIndex, rowIndex);
}
outputRows.push(row);
}
return outputRows;
}
}

View File

@@ -0,0 +1,179 @@
import { DuckDBType } from '@duckdb/data-types';
import { DuckDBValue } from '@duckdb/data-values';
import { AsyncDuckDBDataBatchIterator } from './AsyncDuckDBDataBatchIterator.js';
import { DuckDBData } from './DuckDBData.js';
// Stores information about a run of similarly-sized batches.
interface BatchSizeRun {
batchCount: number;
batchSize: number;
rowCount: number; // Always equal to batchCount * batchSize. Precalculated for efficiency.
}
/**
* A result set that can be read incrementally.
*
* Represents either a partial or full result.
* For full results, the `done` property will be true.
* To read more rows into a partial result, use the `readUntil` or `readAll` methods.
*/
export class DuckDBDataReader extends DuckDBData {
private readonly iterator: AsyncDuckDBDataBatchIterator;
private iteratorDone: boolean = false;
private totalRowsRead: number = 0;
private readonly batches: DuckDBData[] = [];
// Stores the sizes of the batches using run-length encoding to make lookup efficient.
// Since batches before the last should be a consistent size, this array is not expected to grow beyond length 2.
// (One run for the N-1 batches of consistent size, plus one run for the differently-size last batch, if any.)
private readonly batchSizeRuns: BatchSizeRun[] = [];
constructor(iterator: AsyncDuckDBDataBatchIterator) {
super();
this.iterator = iterator;
}
/**
* Number of columns.
*
* Will be zero until the first part of the result is read. Will not change after the initial read.
*/
public get columnCount(): number {
if (this.batches.length === 0) {
return 0;
}
return this.batches[0].columnCount;
}
/**
* Current number of rows.
*
* For a partial result set, with `done` false, this may change as more rows are read.
* For a full result, with `done` true, this will not change.
*/
public get rowCount(): number {
return this.totalRowsRead;
}
/**
* Returns the name of column at the given index (starting at zero).
*
* Note that duplicate column names are possible.
*
* Will return an error if no part of the result has been read yet.
*/
public columnName(columnIndex: number): string {
if (this.batches.length === 0) {
throw Error('no column data');
}
return this.batches[0].columnName(columnIndex);
}
/**
* Returns the type of the column at the given index (starting at zero).
*
* Will return an error if no part of the result has been read yet.
*/
public columnType(columnIndex: number): DuckDBType {
if (this.batches.length === 0) {
throw Error('no column data');
}
return this.batches[0].columnType(columnIndex);
}
/**
* Returns the value for the given column and row. Both are zero-indexed.
*
* Will return an error if `rowIndex` is not less than the current `rowCount`.
*/
public value(columnIndex: number, rowIndex: number): DuckDBValue {
if (this.totalRowsRead === 0) {
throw Error('no data');
}
let batchIndex = 0;
let currentRowIndex = rowIndex;
// Find which run of batches our row is in.
// Since batchSizeRuns shouldn't ever be longer than 2, this should be O(1).
for (const run of this.batchSizeRuns) {
if (currentRowIndex < run.rowCount) {
// The row we're looking for is in this run.
// Calculate the batch index and the row index in that batch.
batchIndex += Math.floor(currentRowIndex / run.batchSize);
const rowIndexInBatch = currentRowIndex % run.batchSize;
const batch = this.batches[batchIndex];
return batch.value(columnIndex, rowIndexInBatch);
}
// The row we're looking for is not in this run.
// Update our counts for this run and move to the next one.
batchIndex += run.batchCount;
currentRowIndex -= run.rowCount;
}
// We didn't find our row. It must have been out of range.
throw Error(
`Row index ${rowIndex} requested, but only ${this.totalRowsRead} row have been read so far.`,
);
}
/**
* Returns true if all rows have been read.
*/
public get done(): boolean {
return this.iteratorDone;
}
/**
* Read all rows.
*/
public async readAll(): Promise<void> {
return this.read();
}
/**
* Read rows until at least the given target row count has been met.
*
* Note that the resulting row count could be greater than the target, since rows are read in batches, typically of 2048 rows each.
*/
public async readUntil(targetRowCount: number): Promise<void> {
return this.read(targetRowCount);
}
private async read(targetRowCount?: number): Promise<void> {
while (
!(
this.iteratorDone ||
(targetRowCount !== undefined && this.totalRowsRead >= targetRowCount)
)
) {
const { value, done } = await this.iterator.next();
if (value) {
this.updateBatchSizeRuns(value);
this.batches.push(value);
this.totalRowsRead += value.rowCount;
}
if (done) {
this.iteratorDone = done;
}
}
}
private updateBatchSizeRuns(batch: DuckDBData) {
if (this.batchSizeRuns.length > 0) {
const lastRun = this.batchSizeRuns[this.batchSizeRuns.length - 1];
if (lastRun.batchSize === batch.rowCount) {
// If the new batch is the same size as the last one, just update our last run.
lastRun.batchCount += 1;
lastRun.rowCount += lastRun.batchSize;
return;
}
}
// If this is our first batch, or it's a different size, create a new run.
this.batchSizeRuns.push({
batchCount: 1,
batchSize: batch.rowCount,
rowCount: batch.rowCount,
});
}
}

View File

@@ -0,0 +1,5 @@
import { DuckDBValue } from '@duckdb/data-values';
export interface DuckDBRow {
readonly [columnName: string]: DuckDBValue;
}

View File

@@ -0,0 +1,32 @@
import { DuckDBType } from '@duckdb/data-types';
import { DuckDBValue } from '@duckdb/data-values';
import { DuckDBData } from './DuckDBData.js';
export class MemoryDuckDBData extends DuckDBData {
constructor(
private columns: { name: string; type: DuckDBType }[],
private values: DuckDBValue[][],
) {
super();
}
get columnCount() {
return this.columns.length;
}
get rowCount() {
return this.values.length > 0 ? this.values[0].length : 0;
}
columnName(columnIndex: number): string {
return this.columns[columnIndex].name;
}
columnType(columnIndex: number): DuckDBType {
return this.columns[columnIndex].type;
}
value(columnIndex: number, rowIndex: number): DuckDBValue {
return this.values[columnIndex][rowIndex];
}
}

View File

@@ -0,0 +1,6 @@
export * from './AsyncDuckDBDataBatchIterator.js';
export * from './ColumnFilteredDuckDBData.js';
export * from './DuckDBData.js';
export * from './DuckDBDataReader.js';
export * from './DuckDBRow.js';
export * from './MemoryDuckDBData.js';

View File

@@ -0,0 +1,6 @@
{
"extends": "../../../tsconfig.library.json",
"compilerOptions": {
"outDir": "../out"
}
}

View File

@@ -0,0 +1,177 @@
import { DuckDBType, INTEGER, VARCHAR } from '@duckdb/data-types';
import { DuckDBValue } from '@duckdb/data-values';
import { expect, suite, test } from 'vitest';
import {
AsyncDuckDBDataBatchIterator,
DuckDBData,
DuckDBDataReader,
MemoryDuckDBData,
} from '../src';
const ITERATOR_DONE = Object.freeze({ done: true, value: undefined });
class TestAsyncDuckDBDataBatchIterator implements AsyncDuckDBDataBatchIterator {
private batches: readonly DuckDBData[];
private nextBatchIndex: number | null;
constructor(batches: readonly DuckDBData[]) {
this.batches = batches;
this.nextBatchIndex = this.batches.length > 0 ? 0 : null;
}
async next(): Promise<IteratorResult<DuckDBData, undefined>> {
if (this.nextBatchIndex == null) {
return ITERATOR_DONE;
}
const nextBatch = this.batches[this.nextBatchIndex++];
if (this.nextBatchIndex >= this.batches.length) {
this.nextBatchIndex = null;
}
return {
done: this.nextBatchIndex == null,
value: nextBatch,
} as IteratorResult<DuckDBData, undefined>;
}
async return(): Promise<IteratorResult<DuckDBData, undefined>> {
return ITERATOR_DONE;
}
async throw(_err: Error): Promise<IteratorResult<DuckDBData, undefined>> {
return ITERATOR_DONE;
}
[Symbol.asyncIterator](): AsyncDuckDBDataBatchIterator {
return this;
}
}
function expectColumns(
data: DuckDBData,
columns: { name: string; type: DuckDBType }[],
) {
expect(data.columnCount).toBe(columns.length);
for (let columnIndex = 0; columnIndex < columns.length; columnIndex++) {
const column = columns[columnIndex];
expect(data.columnName(columnIndex)).toBe(column.name);
expect(data.columnType(columnIndex)).toStrictEqual(column.type);
}
}
function expectValues(data: DuckDBData, values: DuckDBValue[][]) {
for (let columnIndex = 0; columnIndex < values.length; columnIndex++) {
const column = values[columnIndex];
for (let rowIndex = 0; rowIndex < column.length; rowIndex++) {
expect(data.value(columnIndex, rowIndex)).toBe(column[rowIndex]);
}
}
}
suite('DuckDBDataReader', () => {
test('should work for an empty batch list', async () => {
const batches: DuckDBData[] = [];
const iterator = new TestAsyncDuckDBDataBatchIterator(batches);
const reader = new DuckDBDataReader(iterator);
expect(reader.done).toBe(false);
expect(reader.columnCount).toBe(0);
expect(reader.rowCount).toBe(0);
await reader.readAll();
expect(reader.done).toBe(true);
expect(reader.columnCount).toBe(0);
expect(reader.rowCount).toBe(0);
});
test('should work for a single batch', async () => {
const columns = [
{ name: 'num', type: INTEGER },
{ name: 'str', type: VARCHAR },
];
const values = [
[2, 3, 5],
['z', 'y', 'x'],
];
const batches: DuckDBData[] = [new MemoryDuckDBData(columns, values)];
const iterator = new TestAsyncDuckDBDataBatchIterator(batches);
const reader = new DuckDBDataReader(iterator);
expect(reader.done).toBe(false);
expect(reader.columnCount).toBe(0);
expect(reader.rowCount).toBe(0);
await reader.readAll();
expect(reader.done).toBe(true);
expectColumns(reader, columns);
expect(reader.rowCount).toBe(3);
expectValues(reader, values);
});
test('should work for multiple batches', async () => {
const columns = [
{ name: 'num', type: INTEGER },
{ name: 'str', type: VARCHAR },
];
const values = [
[12, 13, 15, 22, 23, 25, 32, 33, 35],
['z1', 'y1', 'x1', 'z2', 'y2', 'x2', 'z3', 'y3', 'x3'],
];
const batches: DuckDBData[] = [
new MemoryDuckDBData(columns, [
values[0].slice(0, 3),
values[1].slice(0, 3),
]),
new MemoryDuckDBData(columns, [
values[0].slice(3, 6),
values[1].slice(3, 6),
]),
new MemoryDuckDBData(columns, [
values[0].slice(6, 9),
values[1].slice(6, 9),
]),
];
const iterator = new TestAsyncDuckDBDataBatchIterator(batches);
const reader = new DuckDBDataReader(iterator);
expect(reader.done).toBe(false);
expect(reader.columnCount).toBe(0);
expect(reader.rowCount).toBe(0);
await reader.readAll();
expect(reader.done).toBe(true);
expectColumns(reader, columns);
expect(reader.rowCount).toBe(9);
expectValues(reader, values);
});
test('should work for partial reads of multiple batches', async () => {
const columns = [
{ name: 'num', type: INTEGER },
{ name: 'str', type: VARCHAR },
];
const values = [
[12, 13, 15, 22, 23, 25, 32, 33],
['z1', 'y1', 'x1', 'z2', 'y2', 'x2', 'z3', 'y3'],
];
const batches: DuckDBData[] = [
new MemoryDuckDBData(columns, [
values[0].slice(0, 3),
values[1].slice(0, 3),
]),
new MemoryDuckDBData(columns, [
values[0].slice(3, 6),
values[1].slice(3, 6),
]),
new MemoryDuckDBData(columns, [
values[0].slice(6, 8),
values[1].slice(6, 8),
]),
];
const iterator = new TestAsyncDuckDBDataBatchIterator(batches);
const reader = new DuckDBDataReader(iterator);
expect(reader.done).toBe(false);
expect(reader.columnCount).toBe(0);
expect(reader.rowCount).toBe(0);
await reader.readUntil(5);
expect(reader.done).toBe(false);
expectColumns(reader, columns);
expect(reader.rowCount).toBe(6);
expectValues(reader, [values[0].slice(0, 6), values[1].slice(0, 6)]);
await reader.readUntil(10);
expect(reader.done).toBe(true);
expect(reader.rowCount).toBe(8);
expectValues(reader, values);
});
});

View File

@@ -0,0 +1,6 @@
{
"extends": "../../../tsconfig.test.json",
"references": [
{ "path": "../src" }
]
}