From a8f79980de88403a680b7e7a22a323aaf68f10f6 Mon Sep 17 00:00:00 2001 From: Visnu Pitiyanuvath Date: Fri, 3 Sep 2021 16:29:18 -0700 Subject: [PATCH 01/34] XLSX support with ExcelJS --- src/dependencies.js | 1 + src/fileAttachment.js | 7 ++- src/xlsx.js | 100 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 src/xlsx.js diff --git a/src/dependencies.js b/src/dependencies.js index ae845fc3..e721f673 100644 --- a/src/dependencies.js +++ b/src/dependencies.js @@ -16,3 +16,4 @@ export const vegaliteApi = dependency("vega-lite-api", "5.0.0", "build/vega-lite export const arrow = dependency("apache-arrow", "4.0.1", "Arrow.es2015.min.js"); export const arquero = dependency("arquero", "4.8.4", "dist/arquero.min.js"); export const topojson = dependency("topojson-client", "3.1.0", "dist/topojson-client.min.js"); +export const exceljs = dependency("exceljs", "4.3.0", "dist/exceljs.min.js"); diff --git a/src/fileAttachment.js b/src/fileAttachment.js index ac2c4d44..d185dded 100644 --- a/src/fileAttachment.js +++ b/src/fileAttachment.js @@ -1,7 +1,8 @@ import {autoType, csvParse, csvParseRows, tsvParse, tsvParseRows} from "d3-dsv"; import {require as requireDefault} from "d3-require"; -import {arrow, jszip} from "./dependencies.js"; +import {arrow, jszip, exceljs} from "./dependencies.js"; import {SQLiteDatabaseClient} from "./sqlite.js"; +import {ExcelWorkbook} from "./xlsx.js"; async function remote_fetch(file) { const response = await fetch(await file.url()); @@ -70,6 +71,10 @@ class AbstractFile { async html() { return this.xml("text/html"); } + async xlsx() { + const [ExcelJS, buffer] = await Promise.all([requireDefault(exceljs.resolve()), this.arrayBuffer()]); + return new ExcelWorkbook(await new ExcelJS.Workbook().xlsx.load(buffer)); + } } class FileAttachment extends AbstractFile { diff --git a/src/xlsx.js b/src/xlsx.js new file mode 100644 index 00000000..8075eff2 --- /dev/null +++ b/src/xlsx.js @@ -0,0 +1,100 @@ +export class ExcelWorkbook { + constructor(workbook) { + Object.defineProperty(this, "_", {value: workbook}); + } + sheetNames() { + return this._.worksheets.map(sheet => sheet.name); + } + sheet(name, options) { + name = (typeof name === "number") ? name = this.sheetNames()[name] : name + ""; + const sheet = this._.getWorksheet(name); + if (!sheet) throw new Error(`Sheet not found: ${name}`); + return extract(sheet, options); + } +} + +function extract(sheet, options = {}) { + const { range, headers = false } = options; + let [[c0, r0], [c1, r1]] = parseRange(range, sheet); + const empty = {}; + const output = new Array(r1 - r0).fill(empty); + + const headerRow = headers && sheet._rows[r0++]; + const seen = new Set(); + const names = []; + function name(n) { + if (!names[n]) { + let name = (headerRow ? valueOf(headerRow._cells[n]) : AA(n)) || AA(n); + while (seen.has(name)) name += "_"; + seen.add((names[n] = name)); + } + return names[n]; + } + + for (let r = r0; r < r1; r++) { + const _row = sheet._rows[r]; + if (!_row || !_row.hasValues) continue; + const row = (output[r - r0] = {}); + for (let c = c0; c < c1; c++) { + const value = valueOf(_row._cells[c]); + if (value) row[name(c)] = value; + } + } + + output.columns = names.filter(() => true); + return output; +} + +function valueOf(cell) { + if (!cell) return; + const { value } = cell; + if (value && typeof value === "object") { + if (value.formula) return value.result; + if (value.richText) return value.richText.map((d) => d.text).join(""); + if (value.text && value.hyperlink) + return `${value.text}`; + } + return value; +} + +function parseRange(specifier = {}, { columnCount, rowCount }) { + if (typeof specifier === "string") { + const [ + [c0 = 0, r0 = 0] = [], + [c1 = columnCount, r1 = rowCount] = [] + ] = specifier.split(":").map(NN); + return [ + [c0, r0], + [c1, r1] + ]; + } else if (typeof specifier === "object") { + const { + start: [c0 = 0, r0 = 0] = [], + end: [c1 = columnCount, r1 = rowCount] = [] + } = specifier; + return [ + [c0, r0], + [c1, r1] + ]; + } +} + +function AA(x) { + let s = ""; + x++; + do { + s = String.fromCharCode(64 + (x % 26 || 26)) + s; + } while ((x = Math.floor((x - 1) / 26))); + return s; +} + +function NN(s = "") { + const [, sc, sr] = s.match(/^([a-zA-Z]+)?(\d+)?$/); + let c = undefined; + if (sc) { + c = 0; + for (let i = 0; i < sc.length; i++) + c += Math.pow(26, sc.length - i - 1) * (sc.charCodeAt(i) - 64); + } + return [c && c - 1, sr && +sr - 1]; +} From 38fceabc8493a9e169d49ce27555d4c4b3b67774 Mon Sep 17 00:00:00 2001 From: Visnu Pitiyanuvath Date: Fri, 3 Sep 2021 16:37:41 -0700 Subject: [PATCH 02/34] Prettier --- src/xlsx.js | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/src/xlsx.js b/src/xlsx.js index 8075eff2..039c3da8 100644 --- a/src/xlsx.js +++ b/src/xlsx.js @@ -3,18 +3,19 @@ export class ExcelWorkbook { Object.defineProperty(this, "_", {value: workbook}); } sheetNames() { - return this._.worksheets.map(sheet => sheet.name); + return this._.worksheets.map((sheet) => sheet.name); } sheet(name, options) { - name = (typeof name === "number") ? name = this.sheetNames()[name] : name + ""; - const sheet = this._.getWorksheet(name); + const sheet = this._.getWorksheet( + typeof name === "number" ? this.sheetNames()[name] : name + "" + ); if (!sheet) throw new Error(`Sheet not found: ${name}`); return extract(sheet, options); } } function extract(sheet, options = {}) { - const { range, headers = false } = options; + const {range, headers = false} = options; let [[c0, r0], [c1, r1]] = parseRange(range, sheet); const empty = {}; const output = new Array(r1 - r0).fill(empty); @@ -47,7 +48,7 @@ function extract(sheet, options = {}) { function valueOf(cell) { if (!cell) return; - const { value } = cell; + const {value} = cell; if (value && typeof value === "object") { if (value.formula) return value.result; if (value.richText) return value.richText.map((d) => d.text).join(""); @@ -57,24 +58,22 @@ function valueOf(cell) { return value; } -function parseRange(specifier = {}, { columnCount, rowCount }) { +function parseRange(specifier = {}, {columnCount, rowCount}) { if (typeof specifier === "string") { - const [ - [c0 = 0, r0 = 0] = [], - [c1 = columnCount, r1 = rowCount] = [] - ] = specifier.split(":").map(NN); + const [[c0 = 0, r0 = 0] = [], [c1 = columnCount, r1 = rowCount] = []] = + specifier.split(":").map(NN); return [ [c0, r0], - [c1, r1] + [c1, r1], ]; } else if (typeof specifier === "object") { const { start: [c0 = 0, r0 = 0] = [], - end: [c1 = columnCount, r1 = rowCount] = [] + end: [c1 = columnCount, r1 = rowCount] = [], } = specifier; return [ [c0, r0], - [c1, r1] + [c1, r1], ]; } } From 922644606239b3bef5d6955f495da5ca66909ac3 Mon Sep 17 00:00:00 2001 From: Visnu Pitiyanuvath Date: Sat, 4 Sep 2021 14:29:38 -0700 Subject: [PATCH 03/34] Change range option to nested arrays General code clean up --- src/xlsx.js | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/src/xlsx.js b/src/xlsx.js index 039c3da8..920a5dd3 100644 --- a/src/xlsx.js +++ b/src/xlsx.js @@ -5,20 +5,18 @@ export class ExcelWorkbook { sheetNames() { return this._.worksheets.map((sheet) => sheet.name); } - sheet(name, options) { + sheet(name, {range, headers = false} = {}) { const sheet = this._.getWorksheet( typeof name === "number" ? this.sheetNames()[name] : name + "" ); if (!sheet) throw new Error(`Sheet not found: ${name}`); - return extract(sheet, options); + return extract(sheet, {range, headers}); } } -function extract(sheet, options = {}) { - const {range, headers = false} = options; +function extract(sheet, {range, headers}) { let [[c0, r0], [c1, r1]] = parseRange(range, sheet); - const empty = {}; - const output = new Array(r1 - r0).fill(empty); + const output = new Array(r1 - r0).fill({}); const headerRow = headers && sheet._rows[r0++]; const seen = new Set(); @@ -58,7 +56,7 @@ function valueOf(cell) { return value; } -function parseRange(specifier = {}, {columnCount, rowCount}) { +function parseRange(specifier = [], {columnCount, rowCount}) { if (typeof specifier === "string") { const [[c0 = 0, r0 = 0] = [], [c1 = columnCount, r1 = rowCount] = []] = specifier.split(":").map(NN); @@ -67,10 +65,8 @@ function parseRange(specifier = {}, {columnCount, rowCount}) { [c1, r1], ]; } else if (typeof specifier === "object") { - const { - start: [c0 = 0, r0 = 0] = [], - end: [c1 = columnCount, r1 = rowCount] = [], - } = specifier; + const [[c0 = 0, r0 = 0] = [], [c1 = columnCount, r1 = rowCount] = []] = + specifier; return [ [c0, r0], [c1, r1], @@ -78,13 +74,13 @@ function parseRange(specifier = {}, {columnCount, rowCount}) { } } -function AA(x) { - let s = ""; - x++; +function AA(c) { + let sc = ""; + c++; do { - s = String.fromCharCode(64 + (x % 26 || 26)) + s; - } while ((x = Math.floor((x - 1) / 26))); - return s; + sc = String.fromCharCode(64 + (c % 26 || 26)) + sc; + } while ((c = Math.floor((c - 1) / 26))); + return sc; } function NN(s = "") { From 77159f0220876ee704646cb51d3b0e894a81308a Mon Sep 17 00:00:00 2001 From: Visnu Pitiyanuvath Date: Sat, 4 Sep 2021 22:23:53 -0700 Subject: [PATCH 04/34] Tests and bug fixes --- src/xlsx.js | 21 ++++--- test/xlsx-test.js | 156 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 168 insertions(+), 9 deletions(-) create mode 100644 test/xlsx-test.js diff --git a/src/xlsx.js b/src/xlsx.js index 920a5dd3..fd3a53e2 100644 --- a/src/xlsx.js +++ b/src/xlsx.js @@ -16,8 +16,6 @@ export class ExcelWorkbook { function extract(sheet, {range, headers}) { let [[c0, r0], [c1, r1]] = parseRange(range, sheet); - const output = new Array(r1 - r0).fill({}); - const headerRow = headers && sheet._rows[r0++]; const seen = new Set(); const names = []; @@ -30,13 +28,14 @@ function extract(sheet, {range, headers}) { return names[n]; } - for (let r = r0; r < r1; r++) { + const output = new Array(r1 - r0 + 1).fill({}); + for (let r = r0; r <= r1; r++) { const _row = sheet._rows[r]; if (!_row || !_row.hasValues) continue; const row = (output[r - r0] = {}); - for (let c = c0; c < c1; c++) { + for (let c = c0; c <= c1; c++) { const value = valueOf(_row._cells[c]); - if (value) row[name(c)] = value; + if (value !== null && value !== undefined) row[name(c)] = value; } } @@ -58,15 +57,19 @@ function valueOf(cell) { function parseRange(specifier = [], {columnCount, rowCount}) { if (typeof specifier === "string") { - const [[c0 = 0, r0 = 0] = [], [c1 = columnCount, r1 = rowCount] = []] = - specifier.split(":").map(NN); + const [ + [c0 = 0, r0 = 0] = [], + [c1 = columnCount - 1, r1 = rowCount - 1] = [], + ] = specifier.split(":").map(NN); return [ [c0, r0], [c1, r1], ]; } else if (typeof specifier === "object") { - const [[c0 = 0, r0 = 0] = [], [c1 = columnCount, r1 = rowCount] = []] = - specifier; + const [ + [c0 = 0, r0 = 0] = [], + [c1 = columnCount - 1, r1 = rowCount - 1] = [], + ] = specifier; return [ [c0, r0], [c1, r1], diff --git a/test/xlsx-test.js b/test/xlsx-test.js new file mode 100644 index 00000000..940cea98 --- /dev/null +++ b/test/xlsx-test.js @@ -0,0 +1,156 @@ +import {test} from "tap"; +import {ExcelWorkbook} from "../src/xlsx.js"; + +function mockWorkbook(contents) { + return { + worksheets: Object.keys(contents).map((name) => ({name})), + getWorksheet(name) { + const _rows = contents[name]; + return { + _rows: _rows.map((row) => ({ + _cells: row.map((cell) => ({value: cell})), + hasValues: !!row.length, + })), + rowCount: _rows.length, + columnCount: Math.max(..._rows.map((r) => r.length)), + }; + }, + }; +} + +test("FileAttachment.xlsx reads sheet names", (t) => { + const workbook = new ExcelWorkbook(mockWorkbook({Sheet1: []})); + t.same(workbook.sheetNames(), ["Sheet1"]); + t.end(); +}); + +test("FileAttachment.xlsx reads sheets", (t) => { + const workbook = new ExcelWorkbook( + mockWorkbook({ + Sheet1: [ + ["one", "two", "three"], + [1, 2, 3], + ], + }) + ); + t.same(workbook.sheet(0), [ + {A: "one", B: "two", C: "three"}, + {A: 1, B: 2, C: 3}, + ]); + t.end(); +}); + +test("FileAttachment.xlsx reads sheets with different types", (t) => { + const workbook = new ExcelWorkbook( + mockWorkbook({ + Sheet1: [ + ["one", {richText: [{text: "two"}, {text: "three"}]}], + [ + {text: "link", hyperlink: "https://example.com"}, + 2, + {formula: "=B2*5", result: 10}, + ], + ], + }) + ); + t.same(workbook.sheet(0), [ + {A: "one", B: "twothree"}, + {A: `link`, B: 2, C: 10}, + ]); + t.end(); +}); + +test("FileAttachment.xlsx reads sheets with headers", (t) => { + const workbook = new ExcelWorkbook( + mockWorkbook({ + Sheet1: [ + ["one", "one", "one", "two"], + [1, null, 3, 4], + [5, 6, 7, 8], + ], + }) + ); + t.same(workbook.sheet(0, {headers: true}), [ + {one: 1, one_: 3, two: 4}, + {one: 5, one__: 6, one_: 7, two: 8}, + ]); + t.end(); +}); + +test("FileAttachment.xlsx reads sheet ranges", (t) => { + const workbook = new ExcelWorkbook( + mockWorkbook({ + Sheet1: [ + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + [10, 11, 12, 13, 14, 15, 16, 17, 18, 19], + [20, 21, 22, 23, 24, 25, 26, 27, 28, 29], + [30, 31, 32, 33, 34, 35, 36, 37, 38, 39], + ], + }) + ); + + // undefined + // "" + // [] + const entireSheet = [ + {A: 0, B: 1, C: 2, D: 3, E: 4, F: 5, G: 6, H: 7, I: 8, J: 9}, + {A: 10, B: 11, C: 12, D: 13, E: 14, F: 15, G: 16, H: 17, I: 18, J: 19}, + {A: 20, B: 21, C: 22, D: 23, E: 24, F: 25, G: 26, H: 27, I: 28, J: 29}, + {A: 30, B: 31, C: 32, D: 33, E: 34, F: 35, G: 36, H: 37, I: 38, J: 39}, + ]; + t.same(workbook.sheet(0), entireSheet); + t.same(workbook.sheet(0, {range: ""}), entireSheet); + t.same(workbook.sheet(0, {range: []}), entireSheet); + + // "B2:C3" + // [[1,1],[2,2]] + t.same(workbook.sheet(0, {range: "B2:C3"}), [ + {B: 11, C: 12}, + {B: 21, C: 22}, + ]); + t.same( + workbook.sheet(0, { + range: [ + [1, 1], + [2, 2], + ], + }), + [ + {B: 11, C: 12}, + {B: 21, C: 22}, + ] + ); + + // ":C3" + // [,[2,2]] + t.same(workbook.sheet(0, {range: ":C3"}), [ + {A: 0, B: 1, C: 2}, + {A: 10, B: 11, C: 12}, + {A: 20, B: 21, C: 22}, + ]); + t.same(workbook.sheet(0, {range: [undefined, [2, 2]]}), [ + {A: 0, B: 1, C: 2}, + {A: 10, B: 11, C: 12}, + {A: 20, B: 21, C: 22}, + ]); + + // "B2" + // [[1,1]] + t.same(workbook.sheet(0, {range: "B2"}), [ + {B: 11, C: 12, D: 13, E: 14, F: 15, G: 16, H: 17, I: 18, J: 19}, + {B: 21, C: 22, D: 23, E: 24, F: 25, G: 26, H: 27, I: 28, J: 29}, + {B: 31, C: 32, D: 33, E: 34, F: 35, G: 36, H: 37, I: 38, J: 39}, + ]); + t.same(workbook.sheet(0, {range: [[1, 1]]}), [ + {B: 11, C: 12, D: 13, E: 14, F: 15, G: 16, H: 17, I: 18, J: 19}, + {B: 21, C: 22, D: 23, E: 24, F: 25, G: 26, H: 27, I: 28, J: 29}, + {B: 31, C: 32, D: 33, E: 34, F: 35, G: 36, H: 37, I: 38, J: 39}, + ]); + + // "2" + // [[,1]] + t.same(workbook.sheet(0, {range: "2"}), entireSheet.slice(1)); + t.same(workbook.sheet(0, {range: [[undefined, 1]]}), entireSheet.slice(1)); + + t.end(); +}); From d8904d0e6a28546aa0f03cdcfd927fada8fa638d Mon Sep 17 00:00:00 2001 From: Visnu Pitiyanuvath Date: Sun, 5 Sep 2021 13:36:00 -0700 Subject: [PATCH 05/34] Respect header row order when resolving conflicts --- src/xlsx.js | 3 ++- test/xlsx-test.js | 10 +++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/xlsx.js b/src/xlsx.js index fd3a53e2..cc6528da 100644 --- a/src/xlsx.js +++ b/src/xlsx.js @@ -16,9 +16,9 @@ export class ExcelWorkbook { function extract(sheet, {range, headers}) { let [[c0, r0], [c1, r1]] = parseRange(range, sheet); - const headerRow = headers && sheet._rows[r0++]; const seen = new Set(); const names = []; + const headerRow = headers && sheet._rows[r0++]; function name(n) { if (!names[n]) { let name = (headerRow ? valueOf(headerRow._cells[n]) : AA(n)) || AA(n); @@ -27,6 +27,7 @@ function extract(sheet, {range, headers}) { } return names[n]; } + if (headerRow) for (let c = c0; c <= c1; c++) name(c); const output = new Array(r1 - r0 + 1).fill({}); for (let r = r0; r <= r1; r++) { diff --git a/test/xlsx-test.js b/test/xlsx-test.js index 940cea98..60a622ba 100644 --- a/test/xlsx-test.js +++ b/test/xlsx-test.js @@ -64,15 +64,15 @@ test("FileAttachment.xlsx reads sheets with headers", (t) => { const workbook = new ExcelWorkbook( mockWorkbook({ Sheet1: [ - ["one", "one", "one", "two"], - [1, null, 3, 4], - [5, 6, 7, 8], + [null, "one", "one", "two", "A"], + [ 1, null, 3, 4, 5], + [ 6, 7, 8, 9, 10], ], }) ); t.same(workbook.sheet(0, {headers: true}), [ - {one: 1, one_: 3, two: 4}, - {one: 5, one__: 6, one_: 7, two: 8}, + {A: 1, one_: 3, two: 4, A_: 5}, + {A: 6, one: 7, one_: 8, two: 9, A_: 10}, ]); t.end(); }); From ee0dfbf2d57e7328d7c9e04a6c53bb401b5e24ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Tue, 7 Sep 2021 19:58:20 +0200 Subject: [PATCH 06/34] Fil/xlsx (#249) * document xlsx (minimalist, we'll work on the notebook first) * fix coverage reporter (avoids a crash on my computer; solution found at https://github.com/tapjs/node-tap/issues/624) * unknown sheet name * simplify rows naming * NN is always called on string (cell specifier such as "AA99") * test name * more range specifiers --- README.md | 4 ++++ package.json | 2 +- src/xlsx.js | 34 ++++++++++++++---------------- test/xlsx-test.js | 53 +++++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 72 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index a5ffdffe..5a2387ee 100644 --- a/README.md +++ b/README.md @@ -379,6 +379,10 @@ Returns a promise to the file loads as a [SQLite database client](https://observ const db = await FileAttachment("chinook.db").sqlite(); ``` +# *attachment*.xlsx() [<>](https://github.com/observablehq/stdlib/blob/master/src/xlsx.js "Source") + +Returns a promise to the file loaded as an [ExcelWorkbook](https://observablehq.com/@observablehq/excelworkbook). + # FileAttachments(resolve) [<>](https://github.com/observablehq/stdlib/blob/master/src/fileAttachment.js "Source") *Note: this function is not part of the Observable standard library (in notebooks), but is provided by this module as a means for defining custom file attachment implementations when working directly with the Observable runtime.* diff --git a/package.json b/package.json index f7de5be5..e92ee190 100644 --- a/package.json +++ b/package.json @@ -13,7 +13,7 @@ "url": "https://github.com/observablehq/stdlib.git" }, "scripts": { - "test": "tap 'test/**/*-test.js'", + "test": "tap 'test/**/*-test.js' --reporter classic", "prepublishOnly": "rollup -c", "postpublish": "git push && git push --tags" }, diff --git a/src/xlsx.js b/src/xlsx.js index cc6528da..688232f3 100644 --- a/src/xlsx.js +++ b/src/xlsx.js @@ -6,28 +6,24 @@ export class ExcelWorkbook { return this._.worksheets.map((sheet) => sheet.name); } sheet(name, {range, headers = false} = {}) { - const sheet = this._.getWorksheet( - typeof name === "number" ? this.sheetNames()[name] : name + "" - ); - if (!sheet) throw new Error(`Sheet not found: ${name}`); + const names = this.sheetNames(); + const sname = typeof name === "number" ? names[name] : names.includes(name + "") ? name + "" : null; + if (sname == null) throw new Error(`Sheet not found: ${name}`); + const sheet = this._.getWorksheet(sname); return extract(sheet, {range, headers}); } } function extract(sheet, {range, headers}) { let [[c0, r0], [c1, r1]] = parseRange(range, sheet); - const seen = new Set(); - const names = []; const headerRow = headers && sheet._rows[r0++]; - function name(n) { - if (!names[n]) { - let name = (headerRow ? valueOf(headerRow._cells[n]) : AA(n)) || AA(n); - while (seen.has(name)) name += "_"; - seen.add((names[n] = name)); - } - return names[n]; + let names = new Set(); + for (let n = c0; n <= c1; n++) { + let name = (headerRow ? valueOf(headerRow._cells[n]) : null) || AA(n); + while (names.has(name)) name += "_"; + names.add(name); } - if (headerRow) for (let c = c0; c <= c1; c++) name(c); + names = new Array(c0).concat(Array.from(names)); const output = new Array(r1 - r0 + 1).fill({}); for (let r = r0; r <= r1; r++) { @@ -36,7 +32,7 @@ function extract(sheet, {range, headers}) { const row = (output[r - r0] = {}); for (let c = c0; c <= c1; c++) { const value = valueOf(_row._cells[c]); - if (value !== null && value !== undefined) row[name(c)] = value; + if (value != null) row[names[c]] = value; } } @@ -75,6 +71,8 @@ function parseRange(specifier = [], {columnCount, rowCount}) { [c0, r0], [c1, r1], ]; + } else { + throw new Error(`Unknown range specifier`); } } @@ -87,13 +85,13 @@ function AA(c) { return sc; } -function NN(s = "") { - const [, sc, sr] = s.match(/^([a-zA-Z]+)?(\d+)?$/); +function NN(s) { + const [, sc, sr] = s.match(/^([A-Z]*)(\d*)$/i); let c = undefined; if (sc) { c = 0; for (let i = 0; i < sc.length; i++) c += Math.pow(26, sc.length - i - 1) * (sc.charCodeAt(i) - 64); } - return [c && c - 1, sr && +sr - 1]; + return [c ? c - 1 : undefined, sr ? +sr - 1 : undefined]; } diff --git a/test/xlsx-test.js b/test/xlsx-test.js index 60a622ba..40ef8999 100644 --- a/test/xlsx-test.js +++ b/test/xlsx-test.js @@ -24,6 +24,12 @@ test("FileAttachment.xlsx reads sheet names", (t) => { t.end(); }); +test("FileAttachment.xlsx sheet(name) throws on unknown sheet name", (t) => { + const workbook = new ExcelWorkbook(mockWorkbook({Sheet1: []})); + t.throws(() => workbook.sheet("bad")); + t.end(); +}); + test("FileAttachment.xlsx reads sheets", (t) => { const workbook = new ExcelWorkbook( mockWorkbook({ @@ -37,6 +43,10 @@ test("FileAttachment.xlsx reads sheets", (t) => { {A: "one", B: "two", C: "three"}, {A: 1, B: 2, C: 3}, ]); + t.same(workbook.sheet("Sheet1"), [ + {A: "one", B: "two", C: "three"}, + {A: 1, B: 2, C: 3}, + ]); t.end(); }); @@ -44,18 +54,20 @@ test("FileAttachment.xlsx reads sheets with different types", (t) => { const workbook = new ExcelWorkbook( mockWorkbook({ Sheet1: [ - ["one", {richText: [{text: "two"}, {text: "three"}]}], + ["one", null, {richText: [{text: "two"}, {text: "three"}]}, undefined], [ {text: "link", hyperlink: "https://example.com"}, 2, {formula: "=B2*5", result: 10}, ], + [], ], }) ); t.same(workbook.sheet(0), [ - {A: "one", B: "twothree"}, + {A: "one", C: "twothree"}, {A: `link`, B: 2, C: 10}, + {}, ]); t.end(); }); @@ -152,5 +164,42 @@ test("FileAttachment.xlsx reads sheet ranges", (t) => { t.same(workbook.sheet(0, {range: "2"}), entireSheet.slice(1)); t.same(workbook.sheet(0, {range: [[undefined, 1]]}), entireSheet.slice(1)); + // ":I" + // [,[1,]] + const sheetJ = [ + { I: 8, J: 9 }, + { I: 18, J: 19 }, + { I: 28, J: 29 }, + { I: 38, J: 39 } + ]; + t.same(workbook.sheet(0, {range: "I"}), sheetJ); + t.same(workbook.sheet(0, {range: [[8, undefined], undefined]}), sheetJ); + t.end(); +}); + +test("FileAttachment.xlsx throws on unknown range specifier", (t) => { + const workbook = new ExcelWorkbook(mockWorkbook({ Sheet1: [] })); + t.throws(() => workbook.sheet(0, {range: 0})); + t.end(); +}); + +test("FileAttachment.xlsx derives column names such as A AA AAA…", (t) => { + const l0 = 26 * 26 * 26 + 26 * 26 + 26; + const workbook = new ExcelWorkbook( + mockWorkbook({ + Sheet1: [ + Array.from({length: l0}).fill(1), + ], + }) + ); + t.same(workbook.sheet(0, {headers: false}).columns.filter(d => d.match(/^A*$/)), ["A", "AA", "AAA"]); + const workbook1 = new ExcelWorkbook( + mockWorkbook({ + Sheet1: [ + Array.from({length: l0 + 1}).fill(1), + ], + }) + ); + t.same(workbook1.sheet(0, {headers: false}).columns.filter(d => d.match(/^A*$/)), ["A", "AA", "AAA", "AAAA"]); t.end(); }); From fd177b06265f8d73d890f0fee6c6fc2eec0b4f45 Mon Sep 17 00:00:00 2001 From: Visnu Pitiyanuvath Date: Sun, 5 Sep 2021 13:53:54 -0700 Subject: [PATCH 07/34] Column only range test case --- test/xlsx-test.js | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/test/xlsx-test.js b/test/xlsx-test.js index 40ef8999..ec09b060 100644 --- a/test/xlsx-test.js +++ b/test/xlsx-test.js @@ -77,8 +77,8 @@ test("FileAttachment.xlsx reads sheets with headers", (t) => { mockWorkbook({ Sheet1: [ [null, "one", "one", "two", "A"], - [ 1, null, 3, 4, 5], - [ 6, 7, 8, 9, 10], + [1, null, 3, 4, 5], + [6, 7, 8, 9, 10], ], }) ); @@ -159,6 +159,21 @@ test("FileAttachment.xlsx reads sheet ranges", (t) => { {B: 31, C: 32, D: 33, E: 34, F: 35, G: 36, H: 37, I: 38, J: 39}, ]); + // "H" + // [[7]] + t.same(workbook.sheet(0, {range: "H"}), [ + {H: 7, I: 8, J: 9}, + {H: 17, I: 18, J: 19}, + {H: 27, I: 28, J: 29}, + {H: 37, I: 38, J: 39}, + ]); + t.same(workbook.sheet(0, {range: [[7]]}), [ + {H: 7, I: 8, J: 9}, + {H: 17, I: 18, J: 19}, + {H: 27, I: 28, J: 29}, + {H: 37, I: 38, J: 39}, + ]); + // "2" // [[,1]] t.same(workbook.sheet(0, {range: "2"}), entireSheet.slice(1)); From f6ddcff37ac935afd72bd67903d0922e6ce244a8 Mon Sep 17 00:00:00 2001 From: Visnu Pitiyanuvath Date: Tue, 7 Sep 2021 16:51:00 -0700 Subject: [PATCH 08/34] sheetNames is enumerable --- src/xlsx.js | 12 +++++++----- test/xlsx-test.js | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/xlsx.js b/src/xlsx.js index 688232f3..e8fe016c 100644 --- a/src/xlsx.js +++ b/src/xlsx.js @@ -1,13 +1,15 @@ export class ExcelWorkbook { constructor(workbook) { Object.defineProperty(this, "_", {value: workbook}); - } - sheetNames() { - return this._.worksheets.map((sheet) => sheet.name); + this.sheetNames = this._.worksheets.map((sheet) => sheet.name); } sheet(name, {range, headers = false} = {}) { - const names = this.sheetNames(); - const sname = typeof name === "number" ? names[name] : names.includes(name + "") ? name + "" : null; + const sname = + typeof name === "number" + ? this.sheetNames[name] + : this.sheetNames.includes(name + "") + ? name + "" + : null; if (sname == null) throw new Error(`Sheet not found: ${name}`); const sheet = this._.getWorksheet(sname); return extract(sheet, {range, headers}); diff --git a/test/xlsx-test.js b/test/xlsx-test.js index ec09b060..1ebf0f4f 100644 --- a/test/xlsx-test.js +++ b/test/xlsx-test.js @@ -20,7 +20,7 @@ function mockWorkbook(contents) { test("FileAttachment.xlsx reads sheet names", (t) => { const workbook = new ExcelWorkbook(mockWorkbook({Sheet1: []})); - t.same(workbook.sheetNames(), ["Sheet1"]); + t.same(workbook.sheetNames, ["Sheet1"]); t.end(); }); From 9b9eab673fa637038bae3d06f6b7f83cd4cd5cc2 Mon Sep 17 00:00:00 2001 From: Visnu Pitiyanuvath Date: Wed, 8 Sep 2021 16:01:44 -0700 Subject: [PATCH 09/34] One more test to check for empty columns Prettier + use default/base tap reporter --- package.json | 2 +- test/xlsx-test.js | 32 +++++++++++++++++++------------- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/package.json b/package.json index e92ee190..10448e0b 100644 --- a/package.json +++ b/package.json @@ -13,7 +13,7 @@ "url": "https://github.com/observablehq/stdlib.git" }, "scripts": { - "test": "tap 'test/**/*-test.js' --reporter classic", + "test": "tap 'test/**/*-test.js' --reporter base", "prepublishOnly": "rollup -c", "postpublish": "git push && git push --tags" }, diff --git a/test/xlsx-test.js b/test/xlsx-test.js index 1ebf0f4f..e964ed7c 100644 --- a/test/xlsx-test.js +++ b/test/xlsx-test.js @@ -182,18 +182,22 @@ test("FileAttachment.xlsx reads sheet ranges", (t) => { // ":I" // [,[1,]] const sheetJ = [ - { I: 8, J: 9 }, - { I: 18, J: 19 }, - { I: 28, J: 29 }, - { I: 38, J: 39 } + {I: 8, J: 9}, + {I: 18, J: 19}, + {I: 28, J: 29}, + {I: 38, J: 39}, ]; t.same(workbook.sheet(0, {range: "I"}), sheetJ); t.same(workbook.sheet(0, {range: [[8, undefined], undefined]}), sheetJ); + + // ":ZZ" (doesn't cause extra column fields) + t.same(workbook.sheet(0, {range: ":ZZ"}), entireSheet); + t.end(); }); test("FileAttachment.xlsx throws on unknown range specifier", (t) => { - const workbook = new ExcelWorkbook(mockWorkbook({ Sheet1: [] })); + const workbook = new ExcelWorkbook(mockWorkbook({Sheet1: []})); t.throws(() => workbook.sheet(0, {range: 0})); t.end(); }); @@ -202,19 +206,21 @@ test("FileAttachment.xlsx derives column names such as A AA AAA…", (t) => { const l0 = 26 * 26 * 26 + 26 * 26 + 26; const workbook = new ExcelWorkbook( mockWorkbook({ - Sheet1: [ - Array.from({length: l0}).fill(1), - ], + Sheet1: [Array.from({length: l0}).fill(1)], }) ); - t.same(workbook.sheet(0, {headers: false}).columns.filter(d => d.match(/^A*$/)), ["A", "AA", "AAA"]); + t.same( + workbook.sheet(0, {headers: false}).columns.filter((d) => d.match(/^A*$/)), + ["A", "AA", "AAA"] + ); const workbook1 = new ExcelWorkbook( mockWorkbook({ - Sheet1: [ - Array.from({length: l0 + 1}).fill(1), - ], + Sheet1: [Array.from({length: l0 + 1}).fill(1)], }) ); - t.same(workbook1.sheet(0, {headers: false}).columns.filter(d => d.match(/^A*$/)), ["A", "AA", "AAA", "AAAA"]); + t.same( + workbook1.sheet(0, {headers: false}).columns.filter((d) => d.match(/^A*$/)), + ["A", "AA", "AAA", "AAAA"] + ); t.end(); }); From a8450868304f025d7a3540b9a8c6099908ee00b2 Mon Sep 17 00:00:00 2001 From: Visnu Pitiyanuvath Date: Thu, 9 Sep 2021 08:23:22 -0700 Subject: [PATCH 10/34] Add Node 16 to the test matrix --- .github/workflows/nodejs.yml | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/.github/workflows/nodejs.yml b/.github/workflows/nodejs.yml index 10ca6b7f..2a2dcb19 100644 --- a/.github/workflows/nodejs.yml +++ b/.github/workflows/nodejs.yml @@ -4,23 +4,22 @@ on: [push] jobs: build: - strategy: matrix: os: [ubuntu-latest] - node-version: [12.x, 14.x] + node-version: [12.x, 14.x, 16.x] runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v1 - - name: Use Node.js ${{ matrix.node-version }} - uses: actions/setup-node@v1 - with: - node-version: ${{ matrix.node-version }} - - name: yarn install and test - run: | - yarn install --frozen-lockfile - yarn test - env: - CI: true + - uses: actions/checkout@v1 + - name: Use Node.js ${{ matrix.node-version }} + uses: actions/setup-node@v1 + with: + node-version: ${{ matrix.node-version }} + - name: yarn install and test + run: | + yarn install --frozen-lockfile + yarn test + env: + CI: true From f30b626c0e4034ebc53ec7a0092512c77d5f1110 Mon Sep 17 00:00:00 2001 From: Visnu Pitiyanuvath Date: Thu, 9 Sep 2021 08:25:18 -0700 Subject: [PATCH 11/34] Revert reporter to classic for Node 16 --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 10448e0b..e92ee190 100644 --- a/package.json +++ b/package.json @@ -13,7 +13,7 @@ "url": "https://github.com/observablehq/stdlib.git" }, "scripts": { - "test": "tap 'test/**/*-test.js' --reporter base", + "test": "tap 'test/**/*-test.js' --reporter classic", "prepublishOnly": "rollup -c", "postpublish": "git push && git push --tags" }, From e421983baf203f5f0c8299fd0cc4da3cc5ad3c98 Mon Sep 17 00:00:00 2001 From: Visnu Pitiyanuvath Date: Thu, 9 Sep 2021 08:26:37 -0700 Subject: [PATCH 12/34] Don't fail matrix quickly in actions --- .github/workflows/nodejs.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/nodejs.yml b/.github/workflows/nodejs.yml index 2a2dcb19..0e12497a 100644 --- a/.github/workflows/nodejs.yml +++ b/.github/workflows/nodejs.yml @@ -5,6 +5,7 @@ on: [push] jobs: build: strategy: + fail-fast: false matrix: os: [ubuntu-latest] node-version: [12.x, 14.x, 16.x] From e8b0153d78d00fd6868672d0b8fe9401d5d3597c Mon Sep 17 00:00:00 2001 From: Visnu Pitiyanuvath Date: Thu, 9 Sep 2021 09:08:48 -0700 Subject: [PATCH 13/34] More coverage. --- src/xlsx.js | 9 ++++++--- test/xlsx-test.js | 35 ++++++++++++++++++++++++++--------- 2 files changed, 32 insertions(+), 12 deletions(-) diff --git a/src/xlsx.js b/src/xlsx.js index e8fe016c..7acf0ce9 100644 --- a/src/xlsx.js +++ b/src/xlsx.js @@ -48,8 +48,11 @@ function valueOf(cell) { if (value && typeof value === "object") { if (value.formula) return value.result; if (value.richText) return value.richText.map((d) => d.text).join(""); - if (value.text && value.hyperlink) - return `${value.text}`; + if (value.text) + return value.hyperlink + ? `${value.text}` + : value.text; + return ""; } return value; } @@ -57,7 +60,7 @@ function valueOf(cell) { function parseRange(specifier = [], {columnCount, rowCount}) { if (typeof specifier === "string") { const [ - [c0 = 0, r0 = 0] = [], + [c0 = 0, r0 = 0], [c1 = columnCount - 1, r1 = rowCount - 1] = [], ] = specifier.split(":").map(NN); return [ diff --git a/test/xlsx-test.js b/test/xlsx-test.js index e964ed7c..85902da7 100644 --- a/test/xlsx-test.js +++ b/test/xlsx-test.js @@ -1,19 +1,19 @@ import {test} from "tap"; import {ExcelWorkbook} from "../src/xlsx.js"; -function mockWorkbook(contents) { +function mockWorkbook(contents, overrides = {}) { return { worksheets: Object.keys(contents).map((name) => ({name})), getWorksheet(name) { const _rows = contents[name]; - return { + return Object.assign({ _rows: _rows.map((row) => ({ _cells: row.map((cell) => ({value: cell})), hasValues: !!row.length, })), rowCount: _rows.length, columnCount: Math.max(..._rows.map((r) => r.length)), - }; + }, overrides); }, }; } @@ -56,17 +56,20 @@ test("FileAttachment.xlsx reads sheets with different types", (t) => { Sheet1: [ ["one", null, {richText: [{text: "two"}, {text: "three"}]}, undefined], [ + {text: "plain text"}, {text: "link", hyperlink: "https://example.com"}, 2, {formula: "=B2*5", result: 10}, ], + [ {/* empty object */} ], [], ], }) ); t.same(workbook.sheet(0), [ {A: "one", C: "twothree"}, - {A: `link`, B: 2, C: 10}, + {A: "plain text", B: `link`, C: 2, D: 10}, + {A: ""}, {}, ]); t.end(); @@ -81,11 +84,19 @@ test("FileAttachment.xlsx reads sheets with headers", (t) => { [6, 7, 8, 9, 10], ], }) + // }, { columnCount: 10 }) ); t.same(workbook.sheet(0, {headers: true}), [ {A: 1, one_: 3, two: 4, A_: 5}, {A: 6, one: 7, one_: 8, two: 9, A_: 10}, ]); + t.same(workbook.sheet(0, {headers: true}).columns, [ + "A", + "one", + "one_", + "two", + "A_", + ]); t.end(); }); @@ -113,6 +124,7 @@ test("FileAttachment.xlsx reads sheet ranges", (t) => { t.same(workbook.sheet(0), entireSheet); t.same(workbook.sheet(0, {range: ""}), entireSheet); t.same(workbook.sheet(0, {range: []}), entireSheet); + t.same(workbook.sheet(0, {range: []}).columns, Object.keys(entireSheet[0])); // "B2:C3" // [[1,1],[2,2]] @@ -174,11 +186,6 @@ test("FileAttachment.xlsx reads sheet ranges", (t) => { {H: 37, I: 38, J: 39}, ]); - // "2" - // [[,1]] - t.same(workbook.sheet(0, {range: "2"}), entireSheet.slice(1)); - t.same(workbook.sheet(0, {range: [[undefined, 1]]}), entireSheet.slice(1)); - // ":I" // [,[1,]] const sheetJ = [ @@ -193,6 +200,16 @@ test("FileAttachment.xlsx reads sheet ranges", (t) => { // ":ZZ" (doesn't cause extra column fields) t.same(workbook.sheet(0, {range: ":ZZ"}), entireSheet); + // "2" + // [[,1]] + t.same(workbook.sheet(0, {range: "2"}), entireSheet.slice(1)); + t.same(workbook.sheet(0, {range: [[undefined, 1]]}), entireSheet.slice(1)); + + // ":2" + // [[,],[,1]] + t.same(workbook.sheet(0, {range: ":2"}), entireSheet.slice(0, 2)); + t.same(workbook.sheet(0, {range: [[], [undefined, 1]]}), entireSheet.slice(0, 2)); + t.end(); }); From e7c82d48ab8fc5112e7260f85e0fdd9c6655d3c4 Mon Sep 17 00:00:00 2001 From: Visnu Pitiyanuvath Date: Thu, 9 Sep 2021 09:12:00 -0700 Subject: [PATCH 14/34] Example of .xlsx in README --- README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 5a2387ee..e28f42ad 100644 --- a/README.md +++ b/README.md @@ -381,10 +381,15 @@ const db = await FileAttachment("chinook.db").sqlite(); # *attachment*.xlsx() [<>](https://github.com/observablehq/stdlib/blob/master/src/xlsx.js "Source") -Returns a promise to the file loaded as an [ExcelWorkbook](https://observablehq.com/@observablehq/excelworkbook). +Returns a promise to the file loaded as an [ExcelWorkbook](https://observablehq.com/@observablehq/excel-workbook). # FileAttachments(resolve) [<>](https://github.com/observablehq/stdlib/blob/master/src/fileAttachment.js "Source") +```js +const workbook = await FileAttachment("profit-and-loss.xlsx").xlsx(); +const sheet = workbook.sheet("Sheet1", { range: "B4:AF234", headers: true }); +``` + *Note: this function is not part of the Observable standard library (in notebooks), but is provided by this module as a means for defining custom file attachment implementations when working directly with the Observable runtime.* Returns a [*FileAttachment*](#FileAttachment) function given the specified *resolve* function. The *resolve* function is an async function that takes a *name* and returns a URL at which the file of that name can be loaded. For example: From 1440400697634b235761376970be5a3e9c70eca7 Mon Sep 17 00:00:00 2001 From: Visnu Pitiyanuvath Date: Thu, 9 Sep 2021 09:24:11 -0700 Subject: [PATCH 15/34] Remove Excel from Workbook naming --- README.md | 2 +- src/fileAttachment.js | 4 ++-- src/xlsx.js | 4 ++-- test/xlsx-test.js | 20 ++++++++++---------- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index e28f42ad..d70c091c 100644 --- a/README.md +++ b/README.md @@ -381,7 +381,7 @@ const db = await FileAttachment("chinook.db").sqlite(); # *attachment*.xlsx() [<>](https://github.com/observablehq/stdlib/blob/master/src/xlsx.js "Source") -Returns a promise to the file loaded as an [ExcelWorkbook](https://observablehq.com/@observablehq/excel-workbook). +Returns a promise to the file loaded as a [Workbook](https://observablehq.com/@observablehq/xlsx-workbook). # FileAttachments(resolve) [<>](https://github.com/observablehq/stdlib/blob/master/src/fileAttachment.js "Source") diff --git a/src/fileAttachment.js b/src/fileAttachment.js index d185dded..90d0d03e 100644 --- a/src/fileAttachment.js +++ b/src/fileAttachment.js @@ -2,7 +2,7 @@ import {autoType, csvParse, csvParseRows, tsvParse, tsvParseRows} from "d3-dsv"; import {require as requireDefault} from "d3-require"; import {arrow, jszip, exceljs} from "./dependencies.js"; import {SQLiteDatabaseClient} from "./sqlite.js"; -import {ExcelWorkbook} from "./xlsx.js"; +import {Workbook} from "./xlsx.js"; async function remote_fetch(file) { const response = await fetch(await file.url()); @@ -73,7 +73,7 @@ class AbstractFile { } async xlsx() { const [ExcelJS, buffer] = await Promise.all([requireDefault(exceljs.resolve()), this.arrayBuffer()]); - return new ExcelWorkbook(await new ExcelJS.Workbook().xlsx.load(buffer)); + return new Workbook(await new ExcelJS.Workbook().xlsx.load(buffer)); } } diff --git a/src/xlsx.js b/src/xlsx.js index 7acf0ce9..0fdcb34f 100644 --- a/src/xlsx.js +++ b/src/xlsx.js @@ -1,4 +1,4 @@ -export class ExcelWorkbook { +export class Workbook { constructor(workbook) { Object.defineProperty(this, "_", {value: workbook}); this.sheetNames = this._.worksheets.map((sheet) => sheet.name); @@ -50,7 +50,7 @@ function valueOf(cell) { if (value.richText) return value.richText.map((d) => d.text).join(""); if (value.text) return value.hyperlink - ? `${value.text}` + ? `${value.text.replace(/` : value.text; return ""; } diff --git a/test/xlsx-test.js b/test/xlsx-test.js index 85902da7..ecd48b1e 100644 --- a/test/xlsx-test.js +++ b/test/xlsx-test.js @@ -1,5 +1,5 @@ import {test} from "tap"; -import {ExcelWorkbook} from "../src/xlsx.js"; +import {Workbook} from "../src/xlsx.js"; function mockWorkbook(contents, overrides = {}) { return { @@ -19,19 +19,19 @@ function mockWorkbook(contents, overrides = {}) { } test("FileAttachment.xlsx reads sheet names", (t) => { - const workbook = new ExcelWorkbook(mockWorkbook({Sheet1: []})); + const workbook = new Workbook(mockWorkbook({Sheet1: []})); t.same(workbook.sheetNames, ["Sheet1"]); t.end(); }); test("FileAttachment.xlsx sheet(name) throws on unknown sheet name", (t) => { - const workbook = new ExcelWorkbook(mockWorkbook({Sheet1: []})); + const workbook = new Workbook(mockWorkbook({Sheet1: []})); t.throws(() => workbook.sheet("bad")); t.end(); }); test("FileAttachment.xlsx reads sheets", (t) => { - const workbook = new ExcelWorkbook( + const workbook = new Workbook( mockWorkbook({ Sheet1: [ ["one", "two", "three"], @@ -51,7 +51,7 @@ test("FileAttachment.xlsx reads sheets", (t) => { }); test("FileAttachment.xlsx reads sheets with different types", (t) => { - const workbook = new ExcelWorkbook( + const workbook = new Workbook( mockWorkbook({ Sheet1: [ ["one", null, {richText: [{text: "two"}, {text: "three"}]}, undefined], @@ -76,7 +76,7 @@ test("FileAttachment.xlsx reads sheets with different types", (t) => { }); test("FileAttachment.xlsx reads sheets with headers", (t) => { - const workbook = new ExcelWorkbook( + const workbook = new Workbook( mockWorkbook({ Sheet1: [ [null, "one", "one", "two", "A"], @@ -101,7 +101,7 @@ test("FileAttachment.xlsx reads sheets with headers", (t) => { }); test("FileAttachment.xlsx reads sheet ranges", (t) => { - const workbook = new ExcelWorkbook( + const workbook = new Workbook( mockWorkbook({ Sheet1: [ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], @@ -214,14 +214,14 @@ test("FileAttachment.xlsx reads sheet ranges", (t) => { }); test("FileAttachment.xlsx throws on unknown range specifier", (t) => { - const workbook = new ExcelWorkbook(mockWorkbook({Sheet1: []})); + const workbook = new Workbook(mockWorkbook({Sheet1: []})); t.throws(() => workbook.sheet(0, {range: 0})); t.end(); }); test("FileAttachment.xlsx derives column names such as A AA AAA…", (t) => { const l0 = 26 * 26 * 26 + 26 * 26 + 26; - const workbook = new ExcelWorkbook( + const workbook = new Workbook( mockWorkbook({ Sheet1: [Array.from({length: l0}).fill(1)], }) @@ -230,7 +230,7 @@ test("FileAttachment.xlsx derives column names such as A AA AAA…", (t) => { workbook.sheet(0, {headers: false}).columns.filter((d) => d.match(/^A*$/)), ["A", "AA", "AAA"] ); - const workbook1 = new ExcelWorkbook( + const workbook1 = new Workbook( mockWorkbook({ Sheet1: [Array.from({length: l0 + 1}).fill(1)], }) From d444ebec69e9c230d8aaaa1ed15f5610e0166e2b Mon Sep 17 00:00:00 2001 From: Visnu Pitiyanuvath Date: Thu, 9 Sep 2021 13:10:56 -0700 Subject: [PATCH 16/34] Fix dates --- src/xlsx.js | 2 +- test/xlsx-test.js | 35 +++++++++++++++++++++-------------- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/src/xlsx.js b/src/xlsx.js index 0fdcb34f..43b658c1 100644 --- a/src/xlsx.js +++ b/src/xlsx.js @@ -52,7 +52,7 @@ function valueOf(cell) { return value.hyperlink ? `${value.text.replace(/` : value.text; - return ""; + return value; } return value; } diff --git a/test/xlsx-test.js b/test/xlsx-test.js index ecd48b1e..0a9f8781 100644 --- a/test/xlsx-test.js +++ b/test/xlsx-test.js @@ -6,14 +6,17 @@ function mockWorkbook(contents, overrides = {}) { worksheets: Object.keys(contents).map((name) => ({name})), getWorksheet(name) { const _rows = contents[name]; - return Object.assign({ - _rows: _rows.map((row) => ({ - _cells: row.map((cell) => ({value: cell})), - hasValues: !!row.length, - })), - rowCount: _rows.length, - columnCount: Math.max(..._rows.map((r) => r.length)), - }, overrides); + return Object.assign( + { + _rows: _rows.map((row) => ({ + _cells: row.map((cell) => ({value: cell})), + hasValues: !!row.length, + })), + rowCount: _rows.length, + columnCount: Math.max(..._rows.map((r) => r.length)), + }, + overrides + ); }, }; } @@ -61,7 +64,7 @@ test("FileAttachment.xlsx reads sheets with different types", (t) => { 2, {formula: "=B2*5", result: 10}, ], - [ {/* empty object */} ], + [{}, new Date(Date.UTC(2020, 0, 1))], [], ], }) @@ -69,7 +72,7 @@ test("FileAttachment.xlsx reads sheets with different types", (t) => { t.same(workbook.sheet(0), [ {A: "one", C: "twothree"}, {A: "plain text", B: `link`, C: 2, D: 10}, - {A: ""}, + {A: {}, B: new Date(Date.UTC(2020, 0, 1))}, {}, ]); t.end(); @@ -79,15 +82,15 @@ test("FileAttachment.xlsx reads sheets with headers", (t) => { const workbook = new Workbook( mockWorkbook({ Sheet1: [ - [null, "one", "one", "two", "A"], - [1, null, 3, 4, 5], + [null, "one", "one", "two", "A", "0"], + [1, null, 3, 4, 5, "zero"], [6, 7, 8, 9, 10], ], }) // }, { columnCount: 10 }) ); t.same(workbook.sheet(0, {headers: true}), [ - {A: 1, one_: 3, two: 4, A_: 5}, + {A: 1, one_: 3, two: 4, A_: 5, 0: "zero"}, {A: 6, one: 7, one_: 8, two: 9, A_: 10}, ]); t.same(workbook.sheet(0, {headers: true}).columns, [ @@ -96,6 +99,7 @@ test("FileAttachment.xlsx reads sheets with headers", (t) => { "one_", "two", "A_", + "0", ]); t.end(); }); @@ -208,7 +212,10 @@ test("FileAttachment.xlsx reads sheet ranges", (t) => { // ":2" // [[,],[,1]] t.same(workbook.sheet(0, {range: ":2"}), entireSheet.slice(0, 2)); - t.same(workbook.sheet(0, {range: [[], [undefined, 1]]}), entireSheet.slice(0, 2)); + t.same( + workbook.sheet(0, {range: [[], [undefined, 1]]}), + entireSheet.slice(0, 2) + ); t.end(); }); From 410f4c9250b06decefac3ebd2049ba8ec165e4b1 Mon Sep 17 00:00:00 2001 From: Visnu Pitiyanuvath Date: Thu, 9 Sep 2021 16:47:20 -0700 Subject: [PATCH 17/34] Fix for sharedFormula --- src/xlsx.js | 14 ++++++++------ test/xlsx-test.js | 9 ++++++++- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/src/xlsx.js b/src/xlsx.js index 43b658c1..9e880169 100644 --- a/src/xlsx.js +++ b/src/xlsx.js @@ -45,12 +45,16 @@ function extract(sheet, {range, headers}) { function valueOf(cell) { if (!cell) return; const {value} = cell; + if (value && value instanceof Date) return value; if (value && typeof value === "object") { - if (value.formula) return value.result; + if (value.formula || value.sharedFormula) return value.result; if (value.richText) return value.richText.map((d) => d.text).join(""); if (value.text) return value.hyperlink - ? `${value.text.replace(/` + ? `${value.text.replace( + /` : value.text; return value; } @@ -59,10 +63,8 @@ function valueOf(cell) { function parseRange(specifier = [], {columnCount, rowCount}) { if (typeof specifier === "string") { - const [ - [c0 = 0, r0 = 0], - [c1 = columnCount - 1, r1 = rowCount - 1] = [], - ] = specifier.split(":").map(NN); + const [[c0 = 0, r0 = 0], [c1 = columnCount - 1, r1 = rowCount - 1] = []] = + specifier.split(":").map(NN); return [ [c0, r0], [c1, r1], diff --git a/test/xlsx-test.js b/test/xlsx-test.js index 0a9f8781..ed150b03 100644 --- a/test/xlsx-test.js +++ b/test/xlsx-test.js @@ -63,6 +63,7 @@ test("FileAttachment.xlsx reads sheets with different types", (t) => { {text: "link", hyperlink: "https://example.com"}, 2, {formula: "=B2*5", result: 10}, + {sharedFormula: "=B2*6", result: 12}, ], [{}, new Date(Date.UTC(2020, 0, 1))], [], @@ -71,7 +72,13 @@ test("FileAttachment.xlsx reads sheets with different types", (t) => { ); t.same(workbook.sheet(0), [ {A: "one", C: "twothree"}, - {A: "plain text", B: `link`, C: 2, D: 10}, + { + A: "plain text", + B: `link`, + C: 2, + D: 10, + E: 12, + }, {A: {}, B: new Date(Date.UTC(2020, 0, 1))}, {}, ]); From 57cb0e0cd80d89a7c21c85dece74a5bd014b877d Mon Sep 17 00:00:00 2001 From: Visnu Pitiyanuvath Date: Fri, 10 Sep 2021 09:53:00 -0700 Subject: [PATCH 18/34] Coerce errors to NaN --- src/xlsx.js | 3 ++- test/xlsx-test.js | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/xlsx.js b/src/xlsx.js index 9e880169..8747e57a 100644 --- a/src/xlsx.js +++ b/src/xlsx.js @@ -47,7 +47,8 @@ function valueOf(cell) { const {value} = cell; if (value && value instanceof Date) return value; if (value && typeof value === "object") { - if (value.formula || value.sharedFormula) return value.result; + if (value.formula || value.sharedFormula) + return value.result && value.result.error ? NaN : value.result; if (value.richText) return value.richText.map((d) => d.text).join(""); if (value.text) return value.hyperlink diff --git a/test/xlsx-test.js b/test/xlsx-test.js index ed150b03..6dc0a966 100644 --- a/test/xlsx-test.js +++ b/test/xlsx-test.js @@ -64,6 +64,7 @@ test("FileAttachment.xlsx reads sheets with different types", (t) => { 2, {formula: "=B2*5", result: 10}, {sharedFormula: "=B2*6", result: 12}, + {sharedFormula: "=Z2*6", result: {error: "#REF!"}}, ], [{}, new Date(Date.UTC(2020, 0, 1))], [], @@ -78,6 +79,7 @@ test("FileAttachment.xlsx reads sheets with different types", (t) => { C: 2, D: 10, E: 12, + F: NaN, }, {A: {}, B: new Date(Date.UTC(2020, 0, 1))}, {}, From e2976b190c8452b428bf922368bc5f02cfc517c6 Mon Sep 17 00:00:00 2001 From: Visnu Pitiyanuvath Date: Fri, 10 Sep 2021 10:09:59 -0700 Subject: [PATCH 19/34] Properly escape html --- src/xlsx.js | 22 +++++++++++++++++++--- test/xlsx-test.js | 5 ++--- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/xlsx.js b/src/xlsx.js index 8747e57a..a2ddc612 100644 --- a/src/xlsx.js +++ b/src/xlsx.js @@ -52,9 +52,8 @@ function valueOf(cell) { if (value.richText) return value.richText.map((d) => d.text).join(""); if (value.text) return value.hyperlink - ? `${value.text.replace( - /${escapeHTML( + value.text )}` : value.text; return value; @@ -62,6 +61,23 @@ function valueOf(cell) { return value; } +function escapeHTML(string) { + return string.replace(/[&<>"']/g, (m) => { + switch (m) { + case "&": + return "&"; + case "<": + return "<"; + case ">": + return ">"; + case '"': + return """; + case "'": + return "'"; + } + }); +} + function parseRange(specifier = [], {columnCount, rowCount}) { if (typeof specifier === "string") { const [[c0 = 0, r0 = 0], [c1 = columnCount - 1, r1 = rowCount - 1] = []] = diff --git a/test/xlsx-test.js b/test/xlsx-test.js index 6dc0a966..7914229d 100644 --- a/test/xlsx-test.js +++ b/test/xlsx-test.js @@ -60,7 +60,7 @@ test("FileAttachment.xlsx reads sheets with different types", (t) => { ["one", null, {richText: [{text: "two"}, {text: "three"}]}, undefined], [ {text: "plain text"}, - {text: "link", hyperlink: "https://example.com"}, + {text: `link&"'?`, hyperlink: 'https://example.com?q="'}, 2, {formula: "=B2*5", result: 10}, {sharedFormula: "=B2*6", result: 12}, @@ -75,7 +75,7 @@ test("FileAttachment.xlsx reads sheets with different types", (t) => { {A: "one", C: "twothree"}, { A: "plain text", - B: `link`, + B: `link&</a>"'?`, C: 2, D: 10, E: 12, @@ -96,7 +96,6 @@ test("FileAttachment.xlsx reads sheets with headers", (t) => { [6, 7, 8, 9, 10], ], }) - // }, { columnCount: 10 }) ); t.same(workbook.sheet(0, {headers: true}), [ {A: 1, one_: 3, two: 4, A_: 5, 0: "zero"}, From b97b9f6c7f2e26d88307974db27629b38ff81526 Mon Sep 17 00:00:00 2001 From: Visnu Pitiyanuvath Date: Fri, 10 Sep 2021 10:25:15 -0700 Subject: [PATCH 20/34] Make sheetNames read-only --- src/xlsx.js | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/xlsx.js b/src/xlsx.js index a2ddc612..1704a22c 100644 --- a/src/xlsx.js +++ b/src/xlsx.js @@ -1,7 +1,13 @@ export class Workbook { constructor(workbook) { - Object.defineProperty(this, "_", {value: workbook}); - this.sheetNames = this._.worksheets.map((sheet) => sheet.name); + Object.defineProperties(this, { + _: {value: workbook}, + sheetNames: { + value: workbook.worksheets.map((sheet) => sheet.name), + writable: false, + enumerable: true, + }, + }); } sheet(name, {range, headers = false} = {}) { const sname = From e5eb8d6339180ac433dde343a2cabeea350181fc Mon Sep 17 00:00:00 2001 From: Visnu Pitiyanuvath Date: Fri, 10 Sep 2021 14:23:38 -0700 Subject: [PATCH 21/34] Require colons in range specifiers --- src/xlsx.js | 5 +++-- test/xlsx-test.js | 55 +++++++++++++++++++++++++---------------------- 2 files changed, 32 insertions(+), 28 deletions(-) diff --git a/src/xlsx.js b/src/xlsx.js index 1704a22c..d9a747c7 100644 --- a/src/xlsx.js +++ b/src/xlsx.js @@ -86,7 +86,8 @@ function escapeHTML(string) { function parseRange(specifier = [], {columnCount, rowCount}) { if (typeof specifier === "string") { - const [[c0 = 0, r0 = 0], [c1 = columnCount - 1, r1 = rowCount - 1] = []] = + if (!specifier.includes(":")) throw new Error("Malformed range specifier"); + const [[c0 = 0, r0 = 0], [c1 = columnCount - 1, r1 = rowCount - 1]] = specifier.split(":").map(NN); return [ [c0, r0], @@ -102,7 +103,7 @@ function parseRange(specifier = [], {columnCount, rowCount}) { [c1, r1], ]; } else { - throw new Error(`Unknown range specifier`); + throw new Error("Unknown range specifier"); } } diff --git a/test/xlsx-test.js b/test/xlsx-test.js index 7914229d..a90b2454 100644 --- a/test/xlsx-test.js +++ b/test/xlsx-test.js @@ -124,8 +124,11 @@ test("FileAttachment.xlsx reads sheet ranges", (t) => { }) ); - // undefined // "" + t.throws(() => t.same(workbook.sheet(0, {range: ""}), entireSheet)); + + // undefined + // ":" // [] const entireSheet = [ {A: 0, B: 1, C: 2, D: 3, E: 4, F: 5, G: 6, H: 7, I: 8, J: 9}, @@ -134,7 +137,7 @@ test("FileAttachment.xlsx reads sheet ranges", (t) => { {A: 30, B: 31, C: 32, D: 33, E: 34, F: 35, G: 36, H: 37, I: 38, J: 39}, ]; t.same(workbook.sheet(0), entireSheet); - t.same(workbook.sheet(0, {range: ""}), entireSheet); + t.same(workbook.sheet(0, {range: ":"}), entireSheet); t.same(workbook.sheet(0, {range: []}), entireSheet); t.same(workbook.sheet(0, {range: []}).columns, Object.keys(entireSheet[0])); @@ -170,9 +173,9 @@ test("FileAttachment.xlsx reads sheet ranges", (t) => { {A: 20, B: 21, C: 22}, ]); - // "B2" + // "B2:" // [[1,1]] - t.same(workbook.sheet(0, {range: "B2"}), [ + t.same(workbook.sheet(0, {range: "B2:"}), [ {B: 11, C: 12, D: 13, E: 14, F: 15, G: 16, H: 17, I: 18, J: 19}, {B: 21, C: 22, D: 23, E: 24, F: 25, G: 26, H: 27, I: 28, J: 29}, {B: 31, C: 32, D: 33, E: 34, F: 35, G: 36, H: 37, I: 38, J: 39}, @@ -183,38 +186,38 @@ test("FileAttachment.xlsx reads sheet ranges", (t) => { {B: 31, C: 32, D: 33, E: 34, F: 35, G: 36, H: 37, I: 38, J: 39}, ]); - // "H" + // "H:" // [[7]] - t.same(workbook.sheet(0, {range: "H"}), [ - {H: 7, I: 8, J: 9}, - {H: 17, I: 18, J: 19}, - {H: 27, I: 28, J: 29}, - {H: 37, I: 38, J: 39}, - ]); - t.same(workbook.sheet(0, {range: [[7]]}), [ + const sheetH = [ {H: 7, I: 8, J: 9}, {H: 17, I: 18, J: 19}, {H: 27, I: 28, J: 29}, {H: 37, I: 38, J: 39}, - ]); + ]; + t.same(workbook.sheet(0, {range: "H:"}), sheetH); + t.same(workbook.sheet(0, {range: [[7]]}), sheetH); - // ":I" - // [,[1,]] - const sheetJ = [ - {I: 8, J: 9}, - {I: 18, J: 19}, - {I: 28, J: 29}, - {I: 38, J: 39}, + // ":C" + // [,[,2]] + const sheetC = [ + {A: 0, B: 1, C: 2}, + {A: 10, B: 11, C: 12}, + {A: 20, B: 21, C: 22}, + {A: 30, B: 31, C: 32}, ]; - t.same(workbook.sheet(0, {range: "I"}), sheetJ); - t.same(workbook.sheet(0, {range: [[8, undefined], undefined]}), sheetJ); + t.same(workbook.sheet(0, {range: ":C"}), sheetC); + t.same(workbook.sheet(0, {range: [undefined, [2]]}), sheetC); - // ":ZZ" (doesn't cause extra column fields) - t.same(workbook.sheet(0, {range: ":ZZ"}), entireSheet); + // ":Z" + t.same(workbook.sheet(0, {range: ":Z"}), entireSheet); + t.same( + workbook.sheet(0, {range: ":Z"}).columns, + "ABCDEFGHIJKLMNOPQRSTUVWXYZ".split("") + ); - // "2" + // "2:" // [[,1]] - t.same(workbook.sheet(0, {range: "2"}), entireSheet.slice(1)); + t.same(workbook.sheet(0, {range: "2:"}), entireSheet.slice(1)); t.same(workbook.sheet(0, {range: [[undefined, 1]]}), entireSheet.slice(1)); // ":2" From 81433c66ef84db1fc750abce529ddc7fa0c68948 Mon Sep 17 00:00:00 2001 From: Visnu Pitiyanuvath Date: Fri, 10 Sep 2021 15:27:27 -0700 Subject: [PATCH 22/34] Include row numbers --- src/xlsx.js | 20 +++++++++++--------- test/xlsx-test.js | 8 ++++++-- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/xlsx.js b/src/xlsx.js index d9a747c7..96a27561 100644 --- a/src/xlsx.js +++ b/src/xlsx.js @@ -25,7 +25,7 @@ export class Workbook { function extract(sheet, {range, headers}) { let [[c0, r0], [c1, r1]] = parseRange(range, sheet); const headerRow = headers && sheet._rows[r0++]; - let names = new Set(); + let names = new Set(["#"]); for (let n = c0; n <= c1; n++) { let name = (headerRow ? valueOf(headerRow._cells[n]) : null) || AA(n); while (names.has(name)) name += "_"; @@ -33,18 +33,20 @@ function extract(sheet, {range, headers}) { } names = new Array(c0).concat(Array.from(names)); - const output = new Array(r1 - r0 + 1).fill({}); + const output = new Array(r1 - r0 + 1); for (let r = r0; r <= r1; r++) { + const row = (output[r - r0] = Object.defineProperty({}, "#", { + value: r + 1, + })); const _row = sheet._rows[r]; - if (!_row || !_row.hasValues) continue; - const row = (output[r - r0] = {}); - for (let c = c0; c <= c1; c++) { - const value = valueOf(_row._cells[c]); - if (value != null) row[names[c]] = value; - } + if (_row && _row.hasValues) + for (let c = c0; c <= c1; c++) { + const value = valueOf(_row._cells[c]); + if (value != null) row[names[c + 1]] = value; + } } - output.columns = names.filter(() => true); + output.columns = names.filter(() => true); // Filter sparse columns return output; } diff --git a/test/xlsx-test.js b/test/xlsx-test.js index a90b2454..610ed609 100644 --- a/test/xlsx-test.js +++ b/test/xlsx-test.js @@ -102,6 +102,7 @@ test("FileAttachment.xlsx reads sheets with headers", (t) => { {A: 6, one: 7, one_: 8, two: 9, A_: 10}, ]); t.same(workbook.sheet(0, {headers: true}).columns, [ + "#", "A", "one", "one_", @@ -139,7 +140,10 @@ test("FileAttachment.xlsx reads sheet ranges", (t) => { t.same(workbook.sheet(0), entireSheet); t.same(workbook.sheet(0, {range: ":"}), entireSheet); t.same(workbook.sheet(0, {range: []}), entireSheet); - t.same(workbook.sheet(0, {range: []}).columns, Object.keys(entireSheet[0])); + t.same(workbook.sheet(0, {range: []}).columns, [ + "#", + ...Object.keys(entireSheet[0]), + ]); // "B2:C3" // [[1,1],[2,2]] @@ -212,7 +216,7 @@ test("FileAttachment.xlsx reads sheet ranges", (t) => { t.same(workbook.sheet(0, {range: ":Z"}), entireSheet); t.same( workbook.sheet(0, {range: ":Z"}).columns, - "ABCDEFGHIJKLMNOPQRSTUVWXYZ".split("") + "#ABCDEFGHIJKLMNOPQRSTUVWXYZ".split("") ); // "2:" From 2f26284140ebbb40737defbb6aa6aa9de7789ee1 Mon Sep 17 00:00:00 2001 From: Visnu Pitiyanuvath Date: Sat, 11 Sep 2021 17:32:29 -0700 Subject: [PATCH 23/34] Use only string form ranges --- src/xlsx.js | 31 +++++++++------------------ test/xlsx-test.js | 53 +++++------------------------------------------ 2 files changed, 15 insertions(+), 69 deletions(-) diff --git a/src/xlsx.js b/src/xlsx.js index 96a27561..df128fd4 100644 --- a/src/xlsx.js +++ b/src/xlsx.js @@ -86,27 +86,16 @@ function escapeHTML(string) { }); } -function parseRange(specifier = [], {columnCount, rowCount}) { - if (typeof specifier === "string") { - if (!specifier.includes(":")) throw new Error("Malformed range specifier"); - const [[c0 = 0, r0 = 0], [c1 = columnCount - 1, r1 = rowCount - 1]] = - specifier.split(":").map(NN); - return [ - [c0, r0], - [c1, r1], - ]; - } else if (typeof specifier === "object") { - const [ - [c0 = 0, r0 = 0] = [], - [c1 = columnCount - 1, r1 = rowCount - 1] = [], - ] = specifier; - return [ - [c0, r0], - [c1, r1], - ]; - } else { - throw new Error("Unknown range specifier"); - } +function parseRange(specifier = ":", {columnCount, rowCount}) { + if (typeof specifier !== "string") throw new Error("Unknown range specifier"); + if (!specifier.match(/^[A-Z]*[0-9]*:[A-Z]*[0-9]*$/)) + throw new Error("Malformed range specifier"); + const [[c0 = 0, r0 = 0], [c1 = columnCount - 1, r1 = rowCount - 1]] = + specifier.split(":").map(NN); + return [ + [c0, r0], + [c1, r1], + ]; } function AA(c) { diff --git a/test/xlsx-test.js b/test/xlsx-test.js index 610ed609..ca1f61e7 100644 --- a/test/xlsx-test.js +++ b/test/xlsx-test.js @@ -130,7 +130,6 @@ test("FileAttachment.xlsx reads sheet ranges", (t) => { // undefined // ":" - // [] const entireSheet = [ {A: 0, B: 1, C: 2, D: 3, E: 4, F: 5, G: 6, H: 7, I: 8, J: 9}, {A: 10, B: 11, C: 12, D: 13, E: 14, F: 15, G: 16, H: 17, I: 18, J: 19}, @@ -139,78 +138,43 @@ test("FileAttachment.xlsx reads sheet ranges", (t) => { ]; t.same(workbook.sheet(0), entireSheet); t.same(workbook.sheet(0, {range: ":"}), entireSheet); - t.same(workbook.sheet(0, {range: []}), entireSheet); - t.same(workbook.sheet(0, {range: []}).columns, [ - "#", - ...Object.keys(entireSheet[0]), - ]); + t.same(workbook.sheet(0).columns, ["#", ...Object.keys(entireSheet[0])]); // "B2:C3" - // [[1,1],[2,2]] t.same(workbook.sheet(0, {range: "B2:C3"}), [ {B: 11, C: 12}, {B: 21, C: 22}, ]); - t.same( - workbook.sheet(0, { - range: [ - [1, 1], - [2, 2], - ], - }), - [ - {B: 11, C: 12}, - {B: 21, C: 22}, - ] - ); // ":C3" - // [,[2,2]] t.same(workbook.sheet(0, {range: ":C3"}), [ {A: 0, B: 1, C: 2}, {A: 10, B: 11, C: 12}, {A: 20, B: 21, C: 22}, ]); - t.same(workbook.sheet(0, {range: [undefined, [2, 2]]}), [ - {A: 0, B: 1, C: 2}, - {A: 10, B: 11, C: 12}, - {A: 20, B: 21, C: 22}, - ]); // "B2:" - // [[1,1]] t.same(workbook.sheet(0, {range: "B2:"}), [ {B: 11, C: 12, D: 13, E: 14, F: 15, G: 16, H: 17, I: 18, J: 19}, {B: 21, C: 22, D: 23, E: 24, F: 25, G: 26, H: 27, I: 28, J: 29}, {B: 31, C: 32, D: 33, E: 34, F: 35, G: 36, H: 37, I: 38, J: 39}, ]); - t.same(workbook.sheet(0, {range: [[1, 1]]}), [ - {B: 11, C: 12, D: 13, E: 14, F: 15, G: 16, H: 17, I: 18, J: 19}, - {B: 21, C: 22, D: 23, E: 24, F: 25, G: 26, H: 27, I: 28, J: 29}, - {B: 31, C: 32, D: 33, E: 34, F: 35, G: 36, H: 37, I: 38, J: 39}, - ]); // "H:" - // [[7]] - const sheetH = [ + t.same(workbook.sheet(0, {range: "H:"}), [ {H: 7, I: 8, J: 9}, {H: 17, I: 18, J: 19}, {H: 27, I: 28, J: 29}, {H: 37, I: 38, J: 39}, - ]; - t.same(workbook.sheet(0, {range: "H:"}), sheetH); - t.same(workbook.sheet(0, {range: [[7]]}), sheetH); + ]); // ":C" - // [,[,2]] - const sheetC = [ + t.same(workbook.sheet(0, {range: ":C"}), [ {A: 0, B: 1, C: 2}, {A: 10, B: 11, C: 12}, {A: 20, B: 21, C: 22}, {A: 30, B: 31, C: 32}, - ]; - t.same(workbook.sheet(0, {range: ":C"}), sheetC); - t.same(workbook.sheet(0, {range: [undefined, [2]]}), sheetC); + ]); // ":Z" t.same(workbook.sheet(0, {range: ":Z"}), entireSheet); @@ -220,17 +184,10 @@ test("FileAttachment.xlsx reads sheet ranges", (t) => { ); // "2:" - // [[,1]] t.same(workbook.sheet(0, {range: "2:"}), entireSheet.slice(1)); - t.same(workbook.sheet(0, {range: [[undefined, 1]]}), entireSheet.slice(1)); // ":2" - // [[,],[,1]] t.same(workbook.sheet(0, {range: ":2"}), entireSheet.slice(0, 2)); - t.same( - workbook.sheet(0, {range: [[], [undefined, 1]]}), - entireSheet.slice(0, 2) - ); t.end(); }); From 66a539cd32485c93ac9540eb9fb9ab89436cc61f Mon Sep 17 00:00:00 2001 From: Visnu Pitiyanuvath Date: Sat, 11 Sep 2021 17:34:19 -0700 Subject: [PATCH 24/34] Coerce range specifiers to strings --- src/xlsx.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/xlsx.js b/src/xlsx.js index df128fd4..8ea3ba90 100644 --- a/src/xlsx.js +++ b/src/xlsx.js @@ -87,7 +87,7 @@ function escapeHTML(string) { } function parseRange(specifier = ":", {columnCount, rowCount}) { - if (typeof specifier !== "string") throw new Error("Unknown range specifier"); + specifier += ""; if (!specifier.match(/^[A-Z]*[0-9]*:[A-Z]*[0-9]*$/)) throw new Error("Malformed range specifier"); const [[c0 = 0, r0 = 0], [c1 = columnCount - 1, r1 = rowCount - 1]] = From fcb6eb7798744605b16d6cae05f5e6e79b846e22 Mon Sep 17 00:00:00 2001 From: Visnu Pitiyanuvath Date: Mon, 13 Sep 2021 16:19:57 -0700 Subject: [PATCH 25/34] Update README.md Co-authored-by: Mike Bostock --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8d02e353..e0b83e75 100644 --- a/README.md +++ b/README.md @@ -385,7 +385,7 @@ Returns a promise to the file loaded as a [Workbook](https://observablehq.com/@o ```js const workbook = await FileAttachment("profit-and-loss.xlsx").xlsx(); -const sheet = workbook.sheet("Sheet1", { range: "B4:AF234", headers: true }); +const sheet = workbook.sheet("Sheet1", {range: "B4:AF234", headers: true}); ``` # *attachment*.xml() [<>](https://github.com/observablehq/stdlib/blob/master/src/fileAttachment.js "Source") From 162d55e5d2e07aab3a7df5083205e15b1323f526 Mon Sep 17 00:00:00 2001 From: Visnu Pitiyanuvath Date: Mon, 13 Sep 2021 16:24:09 -0700 Subject: [PATCH 26/34] Apply suggestions from code review Co-authored-by: Mike Bostock --- README.md | 2 +- src/xlsx.js | 10 +++------- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index e0b83e75..ae8b1eea 100644 --- a/README.md +++ b/README.md @@ -381,7 +381,7 @@ const db = await FileAttachment("chinook.db").sqlite(); # *attachment*.xlsx() [<>](https://github.com/observablehq/stdlib/blob/master/src/xlsx.js "Source") -Returns a promise to the file loaded as a [Workbook](https://observablehq.com/@observablehq/xlsx-workbook). +Returns a promise to the file loaded as a [Workbook](https://observablehq.com/@observablehq/xlsx). ```js const workbook = await FileAttachment("profit-and-loss.xlsx").xlsx(); diff --git a/src/xlsx.js b/src/xlsx.js index 8ea3ba90..630a5f95 100644 --- a/src/xlsx.js +++ b/src/xlsx.js @@ -2,19 +2,15 @@ export class Workbook { constructor(workbook) { Object.defineProperties(this, { _: {value: workbook}, - sheetNames: { - value: workbook.worksheets.map((sheet) => sheet.name), - writable: false, - enumerable: true, - }, + sheetNames: {value: workbook.worksheets.map(s => s.name), enumerable: true} }); } sheet(name, {range, headers = false} = {}) { const sname = typeof name === "number" ? this.sheetNames[name] - : this.sheetNames.includes(name + "") - ? name + "" + : this.sheetNames.includes(name += "") + ? name : null; if (sname == null) throw new Error(`Sheet not found: ${name}`); const sheet = this._.getWorksheet(sname); From 9c9e91ba6c69e7edbfb1b5807163035295971d4f Mon Sep 17 00:00:00 2001 From: Visnu Pitiyanuvath Date: Tue, 14 Sep 2021 09:59:48 -0700 Subject: [PATCH 27/34] Simplify hyperlinks --- src/xlsx.js | 23 ++--------------------- test/xlsx-test.js | 2 +- 2 files changed, 3 insertions(+), 22 deletions(-) diff --git a/src/xlsx.js b/src/xlsx.js index 630a5f95..3156577e 100644 --- a/src/xlsx.js +++ b/src/xlsx.js @@ -55,33 +55,14 @@ function valueOf(cell) { return value.result && value.result.error ? NaN : value.result; if (value.richText) return value.richText.map((d) => d.text).join(""); if (value.text) - return value.hyperlink - ? `${escapeHTML( - value.text - )}` + return value.hyperlink && value.hyperlink !== value.text + ? `${value.hyperlink} ${value.text}` : value.text; return value; } return value; } -function escapeHTML(string) { - return string.replace(/[&<>"']/g, (m) => { - switch (m) { - case "&": - return "&"; - case "<": - return "<"; - case ">": - return ">"; - case '"': - return """; - case "'": - return "'"; - } - }); -} - function parseRange(specifier = ":", {columnCount, rowCount}) { specifier += ""; if (!specifier.match(/^[A-Z]*[0-9]*:[A-Z]*[0-9]*$/)) diff --git a/test/xlsx-test.js b/test/xlsx-test.js index ca1f61e7..7e7ee868 100644 --- a/test/xlsx-test.js +++ b/test/xlsx-test.js @@ -75,7 +75,7 @@ test("FileAttachment.xlsx reads sheets with different types", (t) => { {A: "one", C: "twothree"}, { A: "plain text", - B: `link&</a>"'?`, + B: `https://example.com?q=" link&"'?`, C: 2, D: 10, E: 12, From 1a0345ebd5d272c7fd8af9a6e041b04e53144f5a Mon Sep 17 00:00:00 2001 From: Visnu Pitiyanuvath Date: Tue, 14 Sep 2021 10:01:56 -0700 Subject: [PATCH 28/34] Prettier --- src/xlsx.js | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/xlsx.js b/src/xlsx.js index 3156577e..8ca653b9 100644 --- a/src/xlsx.js +++ b/src/xlsx.js @@ -2,14 +2,17 @@ export class Workbook { constructor(workbook) { Object.defineProperties(this, { _: {value: workbook}, - sheetNames: {value: workbook.worksheets.map(s => s.name), enumerable: true} + sheetNames: { + value: workbook.worksheets.map((s) => s.name), + enumerable: true, + }, }); } sheet(name, {range, headers = false} = {}) { const sname = typeof name === "number" ? this.sheetNames[name] - : this.sheetNames.includes(name += "") + : this.sheetNames.includes((name += "")) ? name : null; if (sname == null) throw new Error(`Sheet not found: ${name}`); From 5daef26a2be1563e359729fadb54ff8e46220f5e Mon Sep 17 00:00:00 2001 From: Visnu Pitiyanuvath Date: Tue, 14 Sep 2021 10:03:25 -0700 Subject: [PATCH 29/34] Pass options through --- src/xlsx.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/xlsx.js b/src/xlsx.js index 8ca653b9..b36b15ee 100644 --- a/src/xlsx.js +++ b/src/xlsx.js @@ -8,7 +8,7 @@ export class Workbook { }, }); } - sheet(name, {range, headers = false} = {}) { + sheet(name, options) { const sname = typeof name === "number" ? this.sheetNames[name] @@ -17,11 +17,11 @@ export class Workbook { : null; if (sname == null) throw new Error(`Sheet not found: ${name}`); const sheet = this._.getWorksheet(sname); - return extract(sheet, {range, headers}); + return extract(sheet, options); } } -function extract(sheet, {range, headers}) { +function extract(sheet, {range, headers = false} = {}) { let [[c0, r0], [c1, r1]] = parseRange(range, sheet); const headerRow = headers && sheet._rows[r0++]; let names = new Set(["#"]); From 92c4af1d9c737ac9dae1dad59594f8fb5409d13a Mon Sep 17 00:00:00 2001 From: Visnu Pitiyanuvath Date: Tue, 14 Sep 2021 10:34:49 -0700 Subject: [PATCH 30/34] Rename helper functions for clarity, range tests --- src/xlsx.js | 12 ++++++++---- test/xlsx-test.js | 20 +++++++++++++++++--- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/src/xlsx.js b/src/xlsx.js index b36b15ee..d9b81137 100644 --- a/src/xlsx.js +++ b/src/xlsx.js @@ -26,7 +26,7 @@ function extract(sheet, {range, headers = false} = {}) { const headerRow = headers && sheet._rows[r0++]; let names = new Set(["#"]); for (let n = c0; n <= c1; n++) { - let name = (headerRow ? valueOf(headerRow._cells[n]) : null) || AA(n); + let name = (headerRow ? valueOf(headerRow._cells[n]) : null) || toColumn(n); while (names.has(name)) name += "_"; names.add(name); } @@ -71,14 +71,16 @@ function parseRange(specifier = ":", {columnCount, rowCount}) { if (!specifier.match(/^[A-Z]*[0-9]*:[A-Z]*[0-9]*$/)) throw new Error("Malformed range specifier"); const [[c0 = 0, r0 = 0], [c1 = columnCount - 1, r1 = rowCount - 1]] = - specifier.split(":").map(NN); + specifier.split(":").map(fromCellReference); return [ [c0, r0], [c1, r1], ]; } -function AA(c) { +// Returns the default column name for a zero-based column index. +// For example: 0 => A, 1 => B, 25 => Z, 26 => AA, 27 => AB. +function toColumn(c) { let sc = ""; c++; do { @@ -87,7 +89,9 @@ function AA(c) { return sc; } -function NN(s) { +// Returns the zero-based indexes from a cell reference. +// For example: "A1" -> [0, 0], "B2" -> [1, 1], "AA10" -> [26, 9]. +function fromCellReference(s) { const [, sc, sr] = s.match(/^([A-Z]*)(\d*)$/i); let c = undefined; if (sc) { diff --git a/test/xlsx-test.js b/test/xlsx-test.js index 7e7ee868..62a36cc7 100644 --- a/test/xlsx-test.js +++ b/test/xlsx-test.js @@ -113,6 +113,23 @@ test("FileAttachment.xlsx reads sheets with headers", (t) => { t.end(); }); +test("FileAttachment.xlsx throws on invalid ranges", (t) => { + const workbook = new Workbook(mockWorkbook({Sheet1: []})); + const malformed = new Error("Malformed range specifier"); + + t.throws(() => t.same(workbook.sheet(0, {range: ""})), malformed); + t.throws(() => t.same(workbook.sheet(0, {range: "-:"})), malformed); + t.throws(() => t.same(workbook.sheet(0, {range: " :"})), malformed); + t.throws( + () => t.same(workbook.sheet(0, {range: "a1:"})), + malformed, + "lowercase" + ); + t.throws(() => t.same(workbook.sheet(0, {range: "1A:"})), malformed); + + t.end(); +}); + test("FileAttachment.xlsx reads sheet ranges", (t) => { const workbook = new Workbook( mockWorkbook({ @@ -125,9 +142,6 @@ test("FileAttachment.xlsx reads sheet ranges", (t) => { }) ); - // "" - t.throws(() => t.same(workbook.sheet(0, {range: ""}), entireSheet)); - // undefined // ":" const entireSheet = [ From 6f13d59bbc1b525efb33acb02ba056607afcbec5 Mon Sep 17 00:00:00 2001 From: Visnu Pitiyanuvath Date: Tue, 14 Sep 2021 10:51:03 -0700 Subject: [PATCH 31/34] Simpler --- src/xlsx.js | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/xlsx.js b/src/xlsx.js index d9b81137..33d317b2 100644 --- a/src/xlsx.js +++ b/src/xlsx.js @@ -93,11 +93,9 @@ function toColumn(c) { // For example: "A1" -> [0, 0], "B2" -> [1, 1], "AA10" -> [26, 9]. function fromCellReference(s) { const [, sc, sr] = s.match(/^([A-Z]*)(\d*)$/i); - let c = undefined; - if (sc) { - c = 0; + let c = 0; + if (sc) for (let i = 0; i < sc.length; i++) c += Math.pow(26, sc.length - i - 1) * (sc.charCodeAt(i) - 64); - } return [c ? c - 1 : undefined, sr ? +sr - 1 : undefined]; } From c52b73ff907169b13ead7425dc5a33257b2414ad Mon Sep 17 00:00:00 2001 From: Visnu Pitiyanuvath Date: Tue, 14 Sep 2021 10:55:28 -0700 Subject: [PATCH 32/34] Consistent comment format --- src/xlsx.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/xlsx.js b/src/xlsx.js index 33d317b2..dd378aeb 100644 --- a/src/xlsx.js +++ b/src/xlsx.js @@ -79,7 +79,7 @@ function parseRange(specifier = ":", {columnCount, rowCount}) { } // Returns the default column name for a zero-based column index. -// For example: 0 => A, 1 => B, 25 => Z, 26 => AA, 27 => AB. +// For example: 0 -> "A", 1 -> "B", 25 -> "Z", 26 -> "AA", 27 -> "AB". function toColumn(c) { let sc = ""; c++; From 5b21a7908baa2acebe4cc466c8e29062c1b6292a Mon Sep 17 00:00:00 2001 From: Visnu Pitiyanuvath Date: Tue, 14 Sep 2021 10:58:12 -0700 Subject: [PATCH 33/34] Consistent regexes --- src/xlsx.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/xlsx.js b/src/xlsx.js index dd378aeb..aa72c107 100644 --- a/src/xlsx.js +++ b/src/xlsx.js @@ -68,7 +68,7 @@ function valueOf(cell) { function parseRange(specifier = ":", {columnCount, rowCount}) { specifier += ""; - if (!specifier.match(/^[A-Z]*[0-9]*:[A-Z]*[0-9]*$/)) + if (!specifier.match(/^[A-Z]*\d*:[A-Z]*\d*$/)) throw new Error("Malformed range specifier"); const [[c0 = 0, r0 = 0], [c1 = columnCount - 1, r1 = rowCount - 1]] = specifier.split(":").map(fromCellReference); @@ -92,7 +92,7 @@ function toColumn(c) { // Returns the zero-based indexes from a cell reference. // For example: "A1" -> [0, 0], "B2" -> [1, 1], "AA10" -> [26, 9]. function fromCellReference(s) { - const [, sc, sr] = s.match(/^([A-Z]*)(\d*)$/i); + const [, sc, sr] = s.match(/^([A-Z]*)(\d*)$/); let c = 0; if (sc) for (let i = 0; i < sc.length; i++) From 0a59d0c57c6d84e7f47efabfdda0319cc261e6d6 Mon Sep 17 00:00:00 2001 From: Visnu Pitiyanuvath Date: Tue, 14 Sep 2021 14:55:52 -0700 Subject: [PATCH 34/34] Fix hyperlinks for certain cases --- src/xlsx.js | 11 ++++-- test/xlsx-test.js | 98 +++++++++++++++++++++++++++++++++-------------- 2 files changed, 77 insertions(+), 32 deletions(-) diff --git a/src/xlsx.js b/src/xlsx.js index aa72c107..86203268 100644 --- a/src/xlsx.js +++ b/src/xlsx.js @@ -57,10 +57,13 @@ function valueOf(cell) { if (value.formula || value.sharedFormula) return value.result && value.result.error ? NaN : value.result; if (value.richText) return value.richText.map((d) => d.text).join(""); - if (value.text) - return value.hyperlink && value.hyperlink !== value.text - ? `${value.hyperlink} ${value.text}` - : value.text; + if (value.text) { + let {text} = value; + if (text.richText) text = text.richText.map((d) => d.text).join(""); + return value.hyperlink && value.hyperlink !== text + ? `${value.hyperlink} ${text}` + : text; + } return value; } return value; diff --git a/test/xlsx-test.js b/test/xlsx-test.js index 62a36cc7..773661a2 100644 --- a/test/xlsx-test.js +++ b/test/xlsx-test.js @@ -54,36 +54,78 @@ test("FileAttachment.xlsx reads sheets", (t) => { }); test("FileAttachment.xlsx reads sheets with different types", (t) => { - const workbook = new Workbook( - mockWorkbook({ - Sheet1: [ - ["one", null, {richText: [{text: "two"}, {text: "three"}]}, undefined], - [ - {text: "plain text"}, - {text: `link&"'?`, hyperlink: 'https://example.com?q="'}, - 2, - {formula: "=B2*5", result: 10}, - {sharedFormula: "=B2*6", result: 12}, - {sharedFormula: "=Z2*6", result: {error: "#REF!"}}, + t.same( + new Workbook( + mockWorkbook({ + Sheet1: [ + [], + [null, undefined], + ["hello", "", "0", "1"], + [1, 1.2], + [true, false], + [new Date(Date.UTC(2020, 0, 1)), {}], ], - [{}, new Date(Date.UTC(2020, 0, 1))], - [], - ], - }) + }) + ).sheet(0), + [ + {}, + {}, + {A: "hello", B: "", C: "0", D: "1"}, + {A: 1, B: 1.2}, + {A: true, B: false}, + {A: new Date(Date.UTC(2020, 0, 1)), B: {}}, + ], + "nullish, strings, numbers, booleans, dates, objects" ); - t.same(workbook.sheet(0), [ - {A: "one", C: "twothree"}, - { - A: "plain text", - B: `https://example.com?q=" link&"'?`, - C: 2, - D: 10, - E: 12, - F: NaN, - }, - {A: {}, B: new Date(Date.UTC(2020, 0, 1))}, - {}, - ]); + t.same( + new Workbook( + mockWorkbook({ + Sheet1: [ + [ + {richText: [{text: "two"}, {text: "three"}]}, // A + {text: "plain text"}, // B + {text: "https://example.com", hyperlink: "https://example.com"}, // C + { + text: {richText: [{text: "https://example.com"}]}, // D + hyperlink: "https://example.com", + }, + {text: `link&"'?`, hyperlink: 'https://example.com?q="'}, // E + { + text: {richText: [{text: "first"}, {text: "second"}]}, // F + hyperlink: "https://example.com", + }, + ], + ], + }) + ).sheet(0), + [ + { + A: "twothree", + B: "plain text", + C: "https://example.com", + D: "https://example.com", + E: `https://example.com?q=" link&"'?`, + F: "https://example.com firstsecond", + }, + ], + "rich text, text, hyperlink text" + ); + t.same( + new Workbook( + mockWorkbook({ + Sheet1: [ + [ + {formula: "=B2*5", result: 10}, + {sharedFormula: "=B2*6", result: 12}, + {sharedFormula: "=Z2*6", result: {error: "#REF!"}}, + ], + ], + }) + ).sheet(0), + [{A: 10, B: 12, C: NaN}], + "formula results, errors" + ); + t.end(); });