From 960ea1695825876fb88b5061fa1083458d162824 Mon Sep 17 00:00:00 2001 From: Bill Maxwell Date: Fri, 23 Feb 2024 16:52:30 -0700 Subject: [PATCH] handle json docs containing an object per line Large datasets in JSON sometimes come in files with an object entry per line. When looking for the schema, the tool could quickly list enough entries to fill the context window. This updates the tool to try and process larger files with the slurp functionality and treat all entries in the file as items in an array. This lets lenght functions work, and grabbing a single entry. Signed-off-by: Bill Maxwell --- jq.js | 4 +++- tool.gpt | 18 +++++++++++++----- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/jq.js b/jq.js index 15cc948..51b2a0d 100644 --- a/jq.js +++ b/jq.js @@ -7,6 +7,7 @@ function parseArgs() { const argName = val.substring(2); // Check if we are not at the last element and the next element doesn't start with '--' if (index < array.length - 1 && !array[index + 1].startsWith('--')) { + // Directly assign the following value to the argument args[argName] = array[index + 1]; } } @@ -21,7 +22,8 @@ const filter = args.filter || '.'; const jsonPath = args.jsonpath; const options = { input: 'file', - output: 'json' + output: 'json', + slurp: args.optionSlurp === 'true' }; jq.run(filter, jsonPath, options) diff --git a/tool.gpt b/tool.gpt index 6f8ccd4..fe2603c 100644 --- a/tool.gpt +++ b/tool.gpt @@ -8,17 +8,23 @@ You are an expert at using the jq cli and know all there is about the functions you will be given a filename that contains JSON data the requester believes answers the ${task}. Do not assume anything about the file structure until the schema has been determined. +When working with files take into account the possibility of null entries. + get the schema of the file from ${filename}. +if the schema was determined with slurp, then consider that in all future queries plan out the jq queries needed and get the data. --- name: schema -tools: execute -args: path: path to the json file +tools: execute,sys.stat +args: jsonPath: file and path to execute jq queries against description: return the schema of the JSON file. -You are given the filepath ${path} to a json file. +You are given the filepath ${jsonPath} to a json file. + +As an expert user of jq find out the schema of the file. +If the file is larger then 250kb, use slurp mode first to get the length. +Check the first two objects in slurp mode to ensure the schema is consistent. -As an expert user of jq find out the schema of the file You can recursively make requests to jq to understand each layer. if you encounter a list, pick the first one or two items and assume that all items follow the same structure. @@ -47,13 +53,15 @@ Then return the paths like: .key2.listOfThings[].thing ... return the list so it can be determined what to query by other tools +If Slurp mode was used also respond that slurp was true. --- name: execute description: execute the jq command line utility to parse data from json files tools: sys.exec args: jsonPath: file and path to execute jq queries against args: filter: the jq filter to pass to the command line +args: optionSlurp: pass false unless you need this, otherwise pass exactly 'true' #!/bin/bash -node jq.js --jsonpath ${jsonPath} --filter "${filter}" \ No newline at end of file +node jq.js --jsonpath ${jsonPath} --filter "${filter}" --optionSlurp ${optionSlurp} \ No newline at end of file