diff --git a/src/table.js b/src/table.js index f83a088..e7a876d 100644 --- a/src/table.js +++ b/src/table.js @@ -627,14 +627,13 @@ export function getSchema(source) { return {schema, inferred: false}; } -// This function applies table cell operations to an in-memory table (array of -// objects); it should be equivalent to the corresponding SQL query. TODO Use -// DuckDBClient for data arrays, too, and then we wouldn’t need our own __table -// function to do table operations on in-memory data? -export function __table(source, operations) { +// This function infers a schema from the source data, if one doesn't already +// exist, and merges type assertions into that schema. If the schema was +// inferred or if there are type assertions, it then coerces the rows in the +// source data to the types specified in the schema. +function applyTypes(source, operations) { const input = source; let {schema, inferred} = getSchema(source); - // Combine column types from schema with user-selected types in operations const types = new Map(schema.map(({name, type}) => [name, type])); if (operations.types) { for (const {name, type} of operations.types) { @@ -650,6 +649,66 @@ export function __table(source, operations) { // operations.types, above. source = source.map(d => coerceRow(d, types, schema)); } + return {source, schema}; +} + +function applyNames(source, operations) { + if (!operations.names) return source; + const overridesByName = new Map(operations.names.map((n) => [n.column, n])); + return source.map((d) => + Object.fromEntries(Object.keys(d).map((k) => { + const override = overridesByName.get(k); + return [override?.name ?? k, d[k]]; + })) + ); +} + +// This function applies table cell operations to an in-memory table (array of +// objects); it should be equivalent to the corresponding SQL query. TODO Use +// DuckDBClient for data arrays, too, and then we wouldn’t need our own __table +// function to do table operations on in-memory data? +export function __table(source, operations) { + const errors = new Map(); + const input = source; + const typed = applyTypes(source, operations); + source = typed.source; + let schema = typed.schema; + if (operations.derive) { + // Derived columns may depend on coerced values from the original data source, + // so we must evaluate derivations after the initial inference and coercion + // step. + const derivedSource = []; + operations.derive.map(({name, value}) => { + let columnErrors = []; + // Derived column formulas may reference renamed columns, so we must + // compute derivations on the renamed source. However, we don't modify the + // source itself with renamed names until after the other operations are + // applied, because operations like filter and sort reference original + // column names. + // TODO Allow derived columns to reference other derived columns. + applyNames(source, operations).map((row, index, rows) => { + let resolved; + try { + resolved = value(row, index, rows); + } catch (error) { + columnErrors.push({index, error}); + resolved = undefined; + } + if (derivedSource[index]) { + derivedSource[index] = {...derivedSource[index], [name]: resolved}; + } else { + derivedSource.push({[name]: resolved}); + } + }); + if (columnErrors.length) errors.set(name, columnErrors); + }); + // Since derived columns are untyped by default, we do a pass of type + // inference and coercion after computing the derived values. + const typedDerived = applyTypes(derivedSource, operations); + // Merge derived source and schema with the source dataset. + source = source.map((row, i) => ({...row, ...typedDerived.source[i]})); + schema = [...schema, ...typedDerived.schema]; + } for (const {type, operands} of operations.filter) { const [{value: column}] = operands; const values = operands.slice(1).map(({value}) => value); @@ -750,6 +809,8 @@ export function __table(source, operations) { if (from > 0 || to < Infinity) { source = source.slice(Math.max(0, from), Math.max(0, to)); } + // Preserve the schema for all columns. + let fullSchema = schema.slice(); if (operations.select.columns) { if (schema) { const schemaByName = new Map(schema.map((s) => [s.name, s])); @@ -767,16 +828,19 @@ export function __table(source, operations) { return ({...s, ...(override ? {name: override.name} : null)}); }); } - source = source.map((d) => - Object.fromEntries(Object.keys(d).map((k) => { - const override = overridesByName.get(k); - return [override?.name ?? k, d[k]]; - })) - ); + if (fullSchema) { + fullSchema = fullSchema.map((s) => { + const override = overridesByName.get(s.name); + return ({...s, ...(override ? {name: override.name} : null)}); + }); + } + source = applyNames(source, operations); } if (source !== input) { if (schema) source.schema = schema; } + source.fullSchema = fullSchema; + source.errors = errors; return source; } diff --git a/test/table-test.js b/test/table-test.js index 0a79525..ea4e471 100644 --- a/test/table-test.js +++ b/test/table-test.js @@ -504,6 +504,8 @@ describe("__table", () => { }; const expectedEmpty = [{}, {}, {}]; expectedEmpty.schema = []; + expectedEmpty.fullSchema = source.schema; + expectedEmpty.errors = new Map(); assert.deepStrictEqual( __table(source, operationsEmptyColumns), expectedEmpty @@ -514,6 +516,8 @@ describe("__table", () => { }; const expectedSelected = [{a: 1}, {a: 2}, {a: 3}]; expectedSelected.schema = [{name: "a", type: "integer", inferred: "integer"}]; + expectedSelected.fullSchema = source.schema; + expectedSelected.errors = new Map(); assert.deepStrictEqual( __table(source, operationsSelectedColumns), expectedSelected @@ -546,6 +550,8 @@ describe("__table", () => { }; const expectedEq = [{a: 1, b: 2, c: 3}]; expectedEq.schema = source.schema; + expectedEq.fullSchema = source.schema; + expectedEq.errors = new Map(); assert.deepStrictEqual(__table(source, operationsEquals), expectedEq); const operationsComparison = { ...EMPTY_TABLE_DATA.operations, @@ -568,6 +574,8 @@ describe("__table", () => { }; const expectedLtGt = [{a: 2, b: 4, c: 6}]; expectedLtGt.schema = source.schema; + expectedLtGt.fullSchema = source.schema; + expectedLtGt.errors = new Map(); assert.deepStrictEqual(__table(source, operationsComparison), expectedLtGt); }); @@ -586,6 +594,8 @@ describe("__table", () => { }; const expectedEq = [{a: 1, b: 2, c: 3}]; expectedEq.schema = source.schema; + expectedEq.fullSchema = source.schema; + expectedEq.errors = new Map(); assert.deepStrictEqual(__table(source, operationsEquals), expectedEq); const operationsComparison = { ...EMPTY_TABLE_DATA.operations, @@ -608,6 +618,8 @@ describe("__table", () => { }; const expectedLteGte = [{a: 2, b: 4, c: 6}]; expectedLteGte.schema = source.schema; + expectedLteGte.fullSchema = source.schema; + expectedLteGte.errors = new Map(); assert.deepStrictEqual( __table(source, operationsComparison), expectedLteGte @@ -634,6 +646,8 @@ describe("__table", () => { ]; const expected = [{a: new Date("2021-01-02")}]; expected.schema = [{name: "a", type: "date", inferred: "date"}]; + expected.fullSchema = expected.schema; + expected.errors = new Map(); assert.deepStrictEqual(__table(source, operationsEquals), expected); }); @@ -648,6 +662,8 @@ describe("__table", () => { {a: 1, b: 2, c: 3} ]; expectedDesc.schema = source.schema; + expectedDesc.fullSchema = source.schema; + expectedDesc.errors = new Map(); assert.deepStrictEqual(__table(source, operationsDesc), expectedDesc); const operationsAsc = { ...EMPTY_TABLE_DATA.operations, @@ -659,6 +675,8 @@ describe("__table", () => { {a: 3, b: 6, c: 9} ]; expectedAsc.schema = source.schema; + expectedAsc.fullSchema = source.schema; + expectedAsc.errors = new Map(); assert.deepStrictEqual(__table(source, operationsAsc), expectedAsc); const sourceExtended = [...source, {a: 1, b: 3, c: 3}, {a: 1, b: 5, c: 3}]; const operationsMulti = { @@ -676,6 +694,8 @@ describe("__table", () => { {a: 1, b: 2, c: 3} ]; expectedExtended.schema = source.schema; + expectedExtended.fullSchema = source.schema; + expectedExtended.errors = new Map(); assert.deepStrictEqual( __table(sourceExtended, operationsMulti), expectedExtended @@ -694,6 +714,8 @@ describe("__table", () => { {a: 20}, {a: 10}, {a: 5}, {a: 1}, {a: NaN}, {a: NaN}, {a: NaN}, {a: NaN} ]; expectedDesc.schema = [{name: "a", type: "number", inferred: "number"}]; + expectedDesc.fullSchema = expectedDesc.schema; + expectedDesc.errors = new Map(); assert.deepStrictEqual( __table(sourceWithMissing, operationsDesc), expectedDesc @@ -706,6 +728,8 @@ describe("__table", () => { {a: 1}, {a: 5}, {a: 10}, {a: 20}, {a: NaN}, {a: NaN}, {a: NaN}, {a: NaN} ]; expectedAsc.schema = [{name: "a", type: "number", inferred: "number"}]; + expectedAsc.fullSchema = expectedAsc.schema; + expectedAsc.errors = new Map(); assert.deepStrictEqual( __table(sourceWithMissing, operationsAsc), expectedAsc @@ -723,6 +747,8 @@ describe("__table", () => { {a: 1, b: 2, c: 3} ]; sorted.schema = source.schema; + sorted.fullSchema = source.schema; + sorted.errors = new Map(); assert.deepStrictEqual(__table(source, operations), sorted); const originalOrder = [ {a: 1, b: 2, c: 3}, @@ -743,6 +769,8 @@ describe("__table", () => { {a: 3, b: 6, c: 9} ]; expectedToNull.schema = source.schema; + expectedToNull.fullSchema = source.schema; + expectedToNull.errors = new Map(); assert.deepStrictEqual(__table(source, operationsToNull), expectedToNull); const operationsFromNull = { ...EMPTY_TABLE_DATA.operations, @@ -750,6 +778,8 @@ describe("__table", () => { }; const expectedFromNull = [{a: 1, b: 2, c: 3}]; expectedFromNull.schema = source.schema; + expectedFromNull.fullSchema = source.schema; + expectedFromNull.errors = new Map(); assert.deepStrictEqual( __table(source, operationsFromNull), expectedFromNull @@ -760,6 +790,8 @@ describe("__table", () => { }; const expectedSlice = [{a: 2, b: 4, c: 6}]; expectedSlice.schema = source.schema; + expectedSlice.fullSchema = source.schema; + expectedSlice.errors = new Map(); assert.deepStrictEqual(__table(source, operations), expectedSlice); }); @@ -794,18 +826,16 @@ describe("__table", () => { {nameA: 2, b: 4, c: 6}, {nameA: 3, b: 6, c: 9} ]; - expected.schema = [ + const schema = [ {name: "nameA", type: "integer", inferred: "integer"}, {name: "b", type: "integer", inferred: "integer"}, {name: "c", type: "integer", inferred: "integer"} ]; + expected.schema = schema; + expected.fullSchema = schema; + expected.errors = new Map(); assert.deepStrictEqual(__table(source, operations), expected); source.columns = ["a", "b", "c"]; - assert.deepStrictEqual(__table(source, operations).schema, [ - {name: "nameA", type: "integer", inferred: "integer"}, - {name: "b", type: "integer", inferred: "integer"}, - {name: "c", type: "integer", inferred: "integer"} - ]); }); it("__table type assertions", () => { @@ -823,13 +853,59 @@ describe("__table", () => { {name: "b", type: "integer", inferred: "integer"}, {name: "c", type: "integer", inferred: "integer"} ]; + expected.fullSchema = expected.schema; + expected.errors = new Map(); assert.deepStrictEqual(__table(source, operations), expected); source.columns = ["a", "b", "c"]; - assert.deepStrictEqual(__table(source, operations).schema, [ - {name: "a", type: "string", inferred: "integer"}, + }); + + it("__table derived columns", () => { + const operations = { + ...EMPTY_TABLE_DATA.operations, + derive: [{name: "d", value: (row) => row.a ** 2}] + }; + const expected = [ + {a: 1, b: 2, c: 3, d: 1}, + {a: 2, b: 4, c: 6, d: 4}, + {a: 3, b: 6, c: 9, d: 9} + ]; + expected.schema = [ + {name: "a", type: "integer", inferred: "integer"}, {name: "b", type: "integer", inferred: "integer"}, - {name: "c", type: "integer", inferred: "integer"} - ]); + {name: "c", type: "integer", inferred: "integer"}, + {name: "d", type: "integer", inferred: "integer"} + ]; + expected.fullSchema = expected.schema; + expected.errors = new Map(); + assert.deepStrictEqual(__table(source, operations), expected); + }); + + it("__table derived columns with errors", () => { + const functionWithError = (row) => row.a.b.c; + const operations = { + ...EMPTY_TABLE_DATA.operations, + derive: [{name: "d", value: functionWithError}] + }; + let error; + try { + functionWithError(source[0]); + } catch (e) { + error = e; + } + const expected = [ + {a: 1, b: 2, c: 3, d: undefined}, + {a: 2, b: 4, c: 6, d: undefined}, + {a: 3, b: 6, c: 9, d: undefined} + ]; + expected.schema = [ + {name: "a", type: "integer", inferred: "integer"}, + {name: "b", type: "integer", inferred: "integer"}, + {name: "c", type: "integer", inferred: "integer"}, + {name: "d", type: "other", inferred: "other"} + ]; + expected.fullSchema = expected.schema; + expected.errors = new Map([["d", [{index: 0, error}, {index: 1, error}, {index: 2, error}]]]); + assert.deepStrictEqual(__table(source, operations), expected); }); });