From 960ea1695825876fb88b5061fa1083458d162824 Mon Sep 17 00:00:00 2001
From: Bill Maxwell <cloudnautique@users.noreply.github.com>
Date: Fri, 23 Feb 2024 16:52:30 -0700
Subject: [PATCH] handle json docs containing an object per line

Large datasets in JSON sometimes come in files with
an object entry per line. When looking for the schema, the tool
could quickly list enough entries to fill the context window.

This updates the tool to try and process larger files with
the slurp functionality and treat all entries in the file as items
in an array. This lets lenght functions work, and grabbing a single
entry.

Signed-off-by: Bill Maxwell <cloudnautique@users.noreply.github.com>
---
 jq.js    |  4 +++-
 tool.gpt | 18 +++++++++++++-----
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/jq.js b/jq.js
index 15cc948..51b2a0d 100644
--- a/jq.js
+++ b/jq.js
@@ -7,6 +7,7 @@ function parseArgs() {
             const argName = val.substring(2);
             // Check if we are not at the last element and the next element doesn't start with '--'
             if (index < array.length - 1 && !array[index + 1].startsWith('--')) {
+                // Directly assign the following value to the argument
                 args[argName] = array[index + 1];
             }
         }
@@ -21,7 +22,8 @@ const filter = args.filter || '.';
 const jsonPath = args.jsonpath;
 const options = {
     input: 'file',
-    output: 'json'
+    output: 'json',
+    slurp: args.optionSlurp === 'true'
 };
 
 jq.run(filter, jsonPath, options)
diff --git a/tool.gpt b/tool.gpt
index 6f8ccd4..fe2603c 100644
--- a/tool.gpt
+++ b/tool.gpt
@@ -8,17 +8,23 @@ You are an expert at using the jq cli and know all there is about the functions
 you will be given a filename that contains JSON data the requester believes answers the ${task}.
 Do not assume anything about the file structure until the schema has been determined.
 
+When working with files take into account the possibility of null entries.
+
 get the schema of the file from ${filename}.
+if the schema was determined with slurp, then consider that in all future queries
 plan out the jq queries needed and get the data.
 ---
 name: schema
-tools: execute
-args: path: path to the json file
+tools: execute,sys.stat
+args: jsonPath: file and path to execute jq queries against
 description: return the schema of the JSON file.
 
-You are given the filepath ${path} to a json file.
+You are given the filepath ${jsonPath} to a json file.
+
+As an expert user of jq find out the schema of the file.
+If the file is larger then 250kb, use slurp mode first to get the length.
+Check the first two objects in slurp mode to ensure the schema is consistent.
 
-As an expert user of jq find out the schema of the file
 
 You can recursively make requests to jq to understand each layer.
 if you encounter a list, pick the first one or two items and assume that all items follow the same structure.
@@ -47,13 +53,15 @@ Then return the paths like:
 .key2.listOfThings[].thing
 ...
 return the list so it can be determined what to query by other tools
+If Slurp mode was used also respond that slurp was true.
 ---
 name: execute
 description: execute the jq command line utility to parse data from json files
 tools: sys.exec
 args: jsonPath: file and path to execute jq queries against
 args: filter: the jq filter to pass to the command line
+args: optionSlurp: pass false unless you need this, otherwise pass exactly 'true'
 
 #!/bin/bash
 
-node jq.js --jsonpath ${jsonPath} --filter "${filter}"
\ No newline at end of file
+node jq.js --jsonpath ${jsonPath} --filter "${filter}" --optionSlurp ${optionSlurp}
\ No newline at end of file