Skip to content

Commit 990c240

Browse files
committed
use arrow types for schema
1 parent 6bf7aea commit 990c240

File tree

2 files changed

+77
-68
lines changed

2 files changed

+77
-68
lines changed

src/arrow.mjs

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
export function getArrowTableSchema(table) {
2+
return table.schema.fields.map(getArrowFieldSchema);
3+
}
4+
5+
function getArrowFieldSchema(field) {
6+
return {
7+
name: field.name,
8+
type: getArrowType(field.type),
9+
nullable: field.nullable,
10+
databaseType: String(field.type)
11+
};
12+
}
13+
14+
// https://github.com/apache/arrow/blob/89f9a0948961f6e94f1ef5e4f310b707d22a3c11/js/src/enum.ts#L140-L141
15+
function getArrowType(type) {
16+
switch (type.typeId) {
17+
case 2: // Int
18+
return "integer";
19+
case 3: // Float
20+
case 7: // Decimal
21+
return "number";
22+
case 4: // Binary
23+
case 15: // FixedSizeBinary
24+
return "buffer";
25+
case 5: // Utf8
26+
return "string";
27+
case 6: // Bool
28+
return "boolean";
29+
case 8: // Date
30+
case 9: // Time
31+
case 10: // Timestamp
32+
return "date";
33+
case 12: // List
34+
case 16: // FixedSizeList
35+
return "array";
36+
case 13: // Struct
37+
case 14: // Union
38+
return "object";
39+
case 11: // Interval
40+
case 17: // Map
41+
default:
42+
return "other";
43+
}
44+
}

src/duckdb.mjs

Lines changed: 33 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import {getArrowTableSchema} from "./arrow.mjs";
12
import {arrow9 as arrow, duckdb} from "./dependencies.mjs";
23
import {FileAttachment} from "./fileAttachment.mjs";
34

@@ -42,22 +43,17 @@ export class DuckDBClient {
4243

4344
async queryStream(query, params) {
4445
const connection = await this._db.connect();
45-
let reader, schema, batch;
46+
let reader, batch;
4647
try {
4748
reader = await connection.send(query, params);
4849
batch = await reader.next();
4950
if (batch.done) throw new Error("missing first batch");
50-
schema = batch.value.schema;
5151
} catch (error) {
5252
await connection.close();
5353
throw error;
5454
}
5555
return {
56-
schema: schema.fields.map(({name, type}) => ({
57-
name,
58-
type: getType(String(type)),
59-
databaseType: String(type)
60-
})),
56+
schema: getArrowTableSchema(batch.value),
6157
async *readRows() {
6258
try {
6359
while (!batch.done) {
@@ -113,13 +109,12 @@ export class DuckDBClient {
113109

114110
async describeColumns({table} = {}) {
115111
const columns = await this.query(`DESCRIBE ${table}`);
116-
return columns.map(({column_name, column_type}) => {
117-
return {
118-
name: column_name,
119-
type: getType(column_type),
120-
databaseType: column_type
121-
};
122-
});
112+
return columns.map(({column_name, column_type, null: nullable}) => ({
113+
name: column_name,
114+
type: getDuckDBType(column_type),
115+
nullable: nullable !== "NO",
116+
databaseType: column_type
117+
}));
123118
}
124119

125120
static async of(sources = {}, config = {}) {
@@ -227,67 +222,37 @@ async function loadArrow() {
227222
return await import(`${cdn}${arrow.resolve()}`);
228223
}
229224

230-
function getType(type) {
231-
switch (type.toLowerCase()) {
232-
case "bigint":
233-
case "int8":
234-
case "long":
225+
// https://duckdb.org/docs/sql/data_types/overview
226+
function getDuckDBType(type) {
227+
switch (type) {
228+
case "BIGINT":
229+
case "HUGEINT":
230+
case "UBIGINT":
235231
return "bigint";
236-
237-
case "double":
238-
case "float8":
239-
case "numeric":
240-
case "decimal":
241-
case "decimal(s, p)":
242-
case "real":
243-
case "float4":
244-
case "float":
245-
case "float32":
246-
case "float64":
232+
case "DOUBLE":
233+
case "REAL":
247234
return "number";
248-
249-
case "hugeint":
250-
case "integer":
251-
case "smallint":
252-
case "tinyint":
253-
case "ubigint":
254-
case "uinteger":
255-
case "usmallint":
256-
case "utinyint":
257-
case "int4":
258-
case "int":
259-
case "signed":
260-
case "int2":
261-
case "short":
262-
case "int1":
263-
case "int64":
264-
case "int32":
235+
case "INTEGER":
236+
case "SMALLINT":
237+
case "TINYINT":
238+
case "USMALLINT":
239+
case "UINTEGER":
240+
case "UTINYINT":
265241
return "integer";
266-
267-
case "boolean":
268-
case "bool":
269-
case "logical":
242+
case "BOOLEAN":
270243
return "boolean";
271-
272-
case "date":
273-
case "interval": // date or time delta
274-
case "time":
275-
case "timestamp":
276-
case "timestamp with time zone":
277-
case "datetime":
278-
case "timestamptz":
279-
case "date64<millisecond>":
244+
case "DATE":
245+
case "TIMESTAMP":
246+
case "TIMESTAMP WITH TIME ZONE":
280247
return "date";
281-
282-
case "uuid":
283-
case "varchar":
284-
case "char":
285-
case "bpchar":
286-
case "text":
287-
case "string":
288-
case "utf8": // this type is unlisted in the `types`, but is returned by the db as `column_type`...
248+
case "VARCHAR":
249+
case "UUID":
289250
return "string";
251+
// case "BLOB":
252+
// case "INTERVAL":
253+
// case "TIME":
290254
default:
255+
if (/^DECIMAL\(/.test(type)) return "integer";
291256
return "other";
292257
}
293258
}

0 commit comments

Comments
 (0)