add duckdb-ui-client & other ts pkgs (#10)
* add duckdb-ui-client & other ts pkgs * workflow fixes * fix working dir * no sparse checkout; specify package.json path * path to pnpm-lock.yaml * add check & build test * workflow step descriptions * use comments & names * one more naming tweak
This commit is contained in:
@@ -0,0 +1,11 @@
|
||||
import { DuckDBData } from './DuckDBData.js';
|
||||
|
||||
export type DuckDBDataBatchIteratorResult = IteratorResult<
|
||||
DuckDBData,
|
||||
DuckDBData | undefined
|
||||
>;
|
||||
|
||||
export type AsyncDuckDBDataBatchIterator = AsyncIterator<
|
||||
DuckDBData,
|
||||
DuckDBData | undefined
|
||||
>;
|
||||
55
ts/pkgs/duckdb-data-reader/src/ColumnFilteredDuckDBData.ts
Normal file
55
ts/pkgs/duckdb-data-reader/src/ColumnFilteredDuckDBData.ts
Normal file
@@ -0,0 +1,55 @@
|
||||
import { DuckDBType } from '@duckdb/data-types';
|
||||
import { DuckDBValue } from '@duckdb/data-values';
|
||||
import { DuckDBData } from './DuckDBData.js';
|
||||
|
||||
export class ColumnFilteredDuckDBData extends DuckDBData {
|
||||
private readonly inputColumnIndexForOutputColumnIndex: readonly number[];
|
||||
|
||||
constructor(
|
||||
private data: DuckDBData,
|
||||
columnVisibility: readonly boolean[],
|
||||
) {
|
||||
super();
|
||||
|
||||
const inputColumnIndexForOutputColumnIndex: number[] = [];
|
||||
const inputColumnCount = data.columnCount;
|
||||
let inputIndex = 0;
|
||||
while (inputIndex < inputColumnCount) {
|
||||
while (inputIndex < inputColumnCount && !columnVisibility[inputIndex]) {
|
||||
inputIndex++;
|
||||
}
|
||||
if (inputIndex < inputColumnCount) {
|
||||
inputColumnIndexForOutputColumnIndex.push(inputIndex++);
|
||||
}
|
||||
}
|
||||
this.inputColumnIndexForOutputColumnIndex =
|
||||
inputColumnIndexForOutputColumnIndex;
|
||||
}
|
||||
|
||||
get columnCount() {
|
||||
return this.inputColumnIndexForOutputColumnIndex.length;
|
||||
}
|
||||
|
||||
get rowCount() {
|
||||
return this.data.rowCount;
|
||||
}
|
||||
|
||||
columnName(columnIndex: number): string {
|
||||
return this.data.columnName(
|
||||
this.inputColumnIndexForOutputColumnIndex[columnIndex],
|
||||
);
|
||||
}
|
||||
|
||||
columnType(columnIndex: number): DuckDBType {
|
||||
return this.data.columnType(
|
||||
this.inputColumnIndexForOutputColumnIndex[columnIndex],
|
||||
);
|
||||
}
|
||||
|
||||
value(columnIndex: number, rowIndex: number): DuckDBValue {
|
||||
return this.data.value(
|
||||
this.inputColumnIndexForOutputColumnIndex[columnIndex],
|
||||
rowIndex,
|
||||
);
|
||||
}
|
||||
}
|
||||
114
ts/pkgs/duckdb-data-reader/src/DuckDBData.ts
Normal file
114
ts/pkgs/duckdb-data-reader/src/DuckDBData.ts
Normal file
@@ -0,0 +1,114 @@
|
||||
import { DuckDBType } from '@duckdb/data-types';
|
||||
import { DuckDBValue } from '@duckdb/data-values';
|
||||
import { DuckDBRow } from './DuckDBRow.js';
|
||||
|
||||
/**
|
||||
* A two-dimensional table of data along with column metadata.
|
||||
*
|
||||
* May represent either a partial or full result set, or a batch of rows read from a result stream.
|
||||
* */
|
||||
export abstract class DuckDBData {
|
||||
/**
|
||||
* Number of columns.
|
||||
*
|
||||
* May be zero until the first part of the result is read. Will not change after the initial read.
|
||||
*/
|
||||
abstract get columnCount(): number;
|
||||
|
||||
/**
|
||||
* Current number of rows.
|
||||
*
|
||||
* For a partial result set, this may change as more rows are read.
|
||||
* For a full result, or a batch, this will not change.
|
||||
*/
|
||||
abstract get rowCount(): number;
|
||||
|
||||
/**
|
||||
* Returns the name of column at the given index (starting at zero).
|
||||
*
|
||||
* Note that duplicate column names are possible.
|
||||
*/
|
||||
abstract columnName(columnIndex: number): string;
|
||||
|
||||
/**
|
||||
* Returns the type of the column at the given index (starting at zero).
|
||||
*/
|
||||
abstract columnType(columnIndex: number): DuckDBType;
|
||||
|
||||
/**
|
||||
* Returns the value for the given column and row. Both are zero-indexed.
|
||||
*/
|
||||
abstract value(columnIndex: number, rowIndex: number): DuckDBValue;
|
||||
|
||||
/**
|
||||
* Returns the single value, assuming exactly one column and row. Throws otherwise.
|
||||
*/
|
||||
singleValue(): DuckDBValue {
|
||||
const { columnCount, rowCount } = this;
|
||||
if (columnCount === 0) {
|
||||
throw Error('no column data');
|
||||
}
|
||||
if (rowCount === 0) {
|
||||
throw Error('no rows');
|
||||
}
|
||||
if (columnCount > 1) {
|
||||
throw Error('more than one column');
|
||||
}
|
||||
if (rowCount > 1) {
|
||||
throw Error('more than one row');
|
||||
}
|
||||
return this.value(0, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the column names as an array.
|
||||
*/
|
||||
columnNames(): readonly string[] {
|
||||
const { columnCount } = this;
|
||||
const outputColumnNames: string[] = [];
|
||||
for (let columnIndex = 0; columnIndex < columnCount; columnIndex++) {
|
||||
outputColumnNames.push(this.columnName(columnIndex));
|
||||
}
|
||||
return outputColumnNames;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the column names as an array, deduplicated following DuckDB's "Auto-Increment Duplicate Column Names"
|
||||
* behavior.
|
||||
*/
|
||||
deduplicatedColumnNames(): readonly string[] {
|
||||
const { columnCount } = this;
|
||||
const outputColumnNames: string[] = [];
|
||||
const columnNameCount: { [columnName: string]: number } = {};
|
||||
for (let columnIndex = 0; columnIndex < columnCount; columnIndex++) {
|
||||
const inputColumnName = this.columnName(columnIndex);
|
||||
const nameCount = (columnNameCount[inputColumnName] || 0) + 1;
|
||||
columnNameCount[inputColumnName] = nameCount;
|
||||
if (nameCount > 1) {
|
||||
outputColumnNames.push(`${inputColumnName}:${nameCount - 1}`);
|
||||
} else {
|
||||
outputColumnNames.push(inputColumnName);
|
||||
}
|
||||
}
|
||||
return outputColumnNames;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the data as an array of row objects, keyed by column names.
|
||||
*
|
||||
* The column names are deduplicated following DuckDB's "Auto-Increment Duplicate Column Names" behavior.
|
||||
*/
|
||||
toRows(): readonly DuckDBRow[] {
|
||||
const { rowCount, columnCount } = this;
|
||||
const outputColumnNames = this.deduplicatedColumnNames();
|
||||
const outputRows: DuckDBRow[] = [];
|
||||
for (let rowIndex = 0; rowIndex < rowCount; rowIndex++) {
|
||||
const row: { [columnName: string]: DuckDBValue } = {};
|
||||
for (let columnIndex = 0; columnIndex < columnCount; columnIndex++) {
|
||||
row[outputColumnNames[columnIndex]] = this.value(columnIndex, rowIndex);
|
||||
}
|
||||
outputRows.push(row);
|
||||
}
|
||||
return outputRows;
|
||||
}
|
||||
}
|
||||
179
ts/pkgs/duckdb-data-reader/src/DuckDBDataReader.ts
Normal file
179
ts/pkgs/duckdb-data-reader/src/DuckDBDataReader.ts
Normal file
@@ -0,0 +1,179 @@
|
||||
import { DuckDBType } from '@duckdb/data-types';
|
||||
import { DuckDBValue } from '@duckdb/data-values';
|
||||
import { AsyncDuckDBDataBatchIterator } from './AsyncDuckDBDataBatchIterator.js';
|
||||
import { DuckDBData } from './DuckDBData.js';
|
||||
|
||||
// Stores information about a run of similarly-sized batches.
|
||||
interface BatchSizeRun {
|
||||
batchCount: number;
|
||||
batchSize: number;
|
||||
rowCount: number; // Always equal to batchCount * batchSize. Precalculated for efficiency.
|
||||
}
|
||||
|
||||
/**
|
||||
* A result set that can be read incrementally.
|
||||
*
|
||||
* Represents either a partial or full result.
|
||||
* For full results, the `done` property will be true.
|
||||
* To read more rows into a partial result, use the `readUntil` or `readAll` methods.
|
||||
*/
|
||||
export class DuckDBDataReader extends DuckDBData {
|
||||
private readonly iterator: AsyncDuckDBDataBatchIterator;
|
||||
|
||||
private iteratorDone: boolean = false;
|
||||
|
||||
private totalRowsRead: number = 0;
|
||||
|
||||
private readonly batches: DuckDBData[] = [];
|
||||
|
||||
// Stores the sizes of the batches using run-length encoding to make lookup efficient.
|
||||
// Since batches before the last should be a consistent size, this array is not expected to grow beyond length 2.
|
||||
// (One run for the N-1 batches of consistent size, plus one run for the differently-size last batch, if any.)
|
||||
private readonly batchSizeRuns: BatchSizeRun[] = [];
|
||||
|
||||
constructor(iterator: AsyncDuckDBDataBatchIterator) {
|
||||
super();
|
||||
this.iterator = iterator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Number of columns.
|
||||
*
|
||||
* Will be zero until the first part of the result is read. Will not change after the initial read.
|
||||
*/
|
||||
public get columnCount(): number {
|
||||
if (this.batches.length === 0) {
|
||||
return 0;
|
||||
}
|
||||
return this.batches[0].columnCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Current number of rows.
|
||||
*
|
||||
* For a partial result set, with `done` false, this may change as more rows are read.
|
||||
* For a full result, with `done` true, this will not change.
|
||||
*/
|
||||
public get rowCount(): number {
|
||||
return this.totalRowsRead;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the name of column at the given index (starting at zero).
|
||||
*
|
||||
* Note that duplicate column names are possible.
|
||||
*
|
||||
* Will return an error if no part of the result has been read yet.
|
||||
*/
|
||||
public columnName(columnIndex: number): string {
|
||||
if (this.batches.length === 0) {
|
||||
throw Error('no column data');
|
||||
}
|
||||
return this.batches[0].columnName(columnIndex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the type of the column at the given index (starting at zero).
|
||||
*
|
||||
* Will return an error if no part of the result has been read yet.
|
||||
*/
|
||||
public columnType(columnIndex: number): DuckDBType {
|
||||
if (this.batches.length === 0) {
|
||||
throw Error('no column data');
|
||||
}
|
||||
return this.batches[0].columnType(columnIndex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the value for the given column and row. Both are zero-indexed.
|
||||
*
|
||||
* Will return an error if `rowIndex` is not less than the current `rowCount`.
|
||||
*/
|
||||
public value(columnIndex: number, rowIndex: number): DuckDBValue {
|
||||
if (this.totalRowsRead === 0) {
|
||||
throw Error('no data');
|
||||
}
|
||||
let batchIndex = 0;
|
||||
let currentRowIndex = rowIndex;
|
||||
// Find which run of batches our row is in.
|
||||
// Since batchSizeRuns shouldn't ever be longer than 2, this should be O(1).
|
||||
for (const run of this.batchSizeRuns) {
|
||||
if (currentRowIndex < run.rowCount) {
|
||||
// The row we're looking for is in this run.
|
||||
// Calculate the batch index and the row index in that batch.
|
||||
batchIndex += Math.floor(currentRowIndex / run.batchSize);
|
||||
const rowIndexInBatch = currentRowIndex % run.batchSize;
|
||||
const batch = this.batches[batchIndex];
|
||||
return batch.value(columnIndex, rowIndexInBatch);
|
||||
}
|
||||
// The row we're looking for is not in this run.
|
||||
// Update our counts for this run and move to the next one.
|
||||
batchIndex += run.batchCount;
|
||||
currentRowIndex -= run.rowCount;
|
||||
}
|
||||
// We didn't find our row. It must have been out of range.
|
||||
throw Error(
|
||||
`Row index ${rowIndex} requested, but only ${this.totalRowsRead} row have been read so far.`,
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if all rows have been read.
|
||||
*/
|
||||
public get done(): boolean {
|
||||
return this.iteratorDone;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read all rows.
|
||||
*/
|
||||
public async readAll(): Promise<void> {
|
||||
return this.read();
|
||||
}
|
||||
|
||||
/**
|
||||
* Read rows until at least the given target row count has been met.
|
||||
*
|
||||
* Note that the resulting row count could be greater than the target, since rows are read in batches, typically of 2048 rows each.
|
||||
*/
|
||||
public async readUntil(targetRowCount: number): Promise<void> {
|
||||
return this.read(targetRowCount);
|
||||
}
|
||||
|
||||
private async read(targetRowCount?: number): Promise<void> {
|
||||
while (
|
||||
!(
|
||||
this.iteratorDone ||
|
||||
(targetRowCount !== undefined && this.totalRowsRead >= targetRowCount)
|
||||
)
|
||||
) {
|
||||
const { value, done } = await this.iterator.next();
|
||||
if (value) {
|
||||
this.updateBatchSizeRuns(value);
|
||||
this.batches.push(value);
|
||||
this.totalRowsRead += value.rowCount;
|
||||
}
|
||||
if (done) {
|
||||
this.iteratorDone = done;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private updateBatchSizeRuns(batch: DuckDBData) {
|
||||
if (this.batchSizeRuns.length > 0) {
|
||||
const lastRun = this.batchSizeRuns[this.batchSizeRuns.length - 1];
|
||||
if (lastRun.batchSize === batch.rowCount) {
|
||||
// If the new batch is the same size as the last one, just update our last run.
|
||||
lastRun.batchCount += 1;
|
||||
lastRun.rowCount += lastRun.batchSize;
|
||||
return;
|
||||
}
|
||||
}
|
||||
// If this is our first batch, or it's a different size, create a new run.
|
||||
this.batchSizeRuns.push({
|
||||
batchCount: 1,
|
||||
batchSize: batch.rowCount,
|
||||
rowCount: batch.rowCount,
|
||||
});
|
||||
}
|
||||
}
|
||||
5
ts/pkgs/duckdb-data-reader/src/DuckDBRow.ts
Normal file
5
ts/pkgs/duckdb-data-reader/src/DuckDBRow.ts
Normal file
@@ -0,0 +1,5 @@
|
||||
import { DuckDBValue } from '@duckdb/data-values';
|
||||
|
||||
export interface DuckDBRow {
|
||||
readonly [columnName: string]: DuckDBValue;
|
||||
}
|
||||
32
ts/pkgs/duckdb-data-reader/src/MemoryDuckDBData.ts
Normal file
32
ts/pkgs/duckdb-data-reader/src/MemoryDuckDBData.ts
Normal file
@@ -0,0 +1,32 @@
|
||||
import { DuckDBType } from '@duckdb/data-types';
|
||||
import { DuckDBValue } from '@duckdb/data-values';
|
||||
import { DuckDBData } from './DuckDBData.js';
|
||||
|
||||
export class MemoryDuckDBData extends DuckDBData {
|
||||
constructor(
|
||||
private columns: { name: string; type: DuckDBType }[],
|
||||
private values: DuckDBValue[][],
|
||||
) {
|
||||
super();
|
||||
}
|
||||
|
||||
get columnCount() {
|
||||
return this.columns.length;
|
||||
}
|
||||
|
||||
get rowCount() {
|
||||
return this.values.length > 0 ? this.values[0].length : 0;
|
||||
}
|
||||
|
||||
columnName(columnIndex: number): string {
|
||||
return this.columns[columnIndex].name;
|
||||
}
|
||||
|
||||
columnType(columnIndex: number): DuckDBType {
|
||||
return this.columns[columnIndex].type;
|
||||
}
|
||||
|
||||
value(columnIndex: number, rowIndex: number): DuckDBValue {
|
||||
return this.values[columnIndex][rowIndex];
|
||||
}
|
||||
}
|
||||
6
ts/pkgs/duckdb-data-reader/src/index.ts
Normal file
6
ts/pkgs/duckdb-data-reader/src/index.ts
Normal file
@@ -0,0 +1,6 @@
|
||||
export * from './AsyncDuckDBDataBatchIterator.js';
|
||||
export * from './ColumnFilteredDuckDBData.js';
|
||||
export * from './DuckDBData.js';
|
||||
export * from './DuckDBDataReader.js';
|
||||
export * from './DuckDBRow.js';
|
||||
export * from './MemoryDuckDBData.js';
|
||||
6
ts/pkgs/duckdb-data-reader/src/tsconfig.json
Normal file
6
ts/pkgs/duckdb-data-reader/src/tsconfig.json
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"extends": "../../../tsconfig.library.json",
|
||||
"compilerOptions": {
|
||||
"outDir": "../out"
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user