From c0f2ffb6fddcf7d55c4bba7cf7455a20f353beef Mon Sep 17 00:00:00 2001 From: Darren Shepherd Date: Fri, 12 Apr 2024 15:29:44 -0700 Subject: [PATCH] feat: Add parser skip feature If a text node startwith with !word where word is any \w+ word the entire node is skipped until --- is found. --- pkg/parser/parser.go | 38 +++++++++++---------- pkg/parser/parser_test.go | 70 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+), 17 deletions(-) create mode 100644 pkg/parser/parser_test.go diff --git a/pkg/parser/parser.go b/pkg/parser/parser.go index afe5ce78..5ab5295d 100644 --- a/pkg/parser/parser.go +++ b/pkg/parser/parser.go @@ -13,7 +13,9 @@ import ( ) var ( - sepRegex = regexp.MustCompile(`^\s*---+\s*$`) + sepRegex = regexp.MustCompile(`^\s*---+\s*$`) + strictSepRegex = regexp.MustCompile(`^---\n$`) + skipRegex = regexp.MustCompile(`^![-\w]+\s*$`) ) func normalize(key string) string { @@ -160,6 +162,8 @@ type context struct { tool types.Tool instructions []string inBody bool + skipNode bool + seenParam bool } func (c *context) finish(tools *[]types.Tool) { @@ -170,17 +174,6 @@ func (c *context) finish(tools *[]types.Tool) { *c = context{} } -func commentEmbedded(line string) (string, bool) { - for _, i := range []string{"#", "# ", "//", "// "} { - prefix := i + "gptscript:" - cut, ok := strings.CutPrefix(line, prefix) - if ok { - return strings.TrimSpace(cut) + "\n", ok - } - } - return line, false -} - func Parse(input io.Reader) ([]types.Tool, error) { scan := bufio.NewScanner(input) @@ -197,16 +190,21 @@ func Parse(input io.Reader) ([]types.Tool, error) { } line := scan.Text() + "\n" - if embeddedLine, ok := commentEmbedded(line); ok { - // Strip special comments to allow embedding the preamble in python or other interpreted languages - line = embeddedLine - } - if sepRegex.MatchString(line) { + if context.skipNode { + if strictSepRegex.MatchString(line) { + context.finish(&tools) + continue + } + } else if sepRegex.MatchString(line) { context.finish(&tools) continue } + if context.skipNode { + continue + } + if !context.inBody { // If the very first line is #! just skip because this is a unix interpreter declaration if strings.HasPrefix(line, "#!") && lineNo == 1 { @@ -218,6 +216,11 @@ func Parse(input io.Reader) ([]types.Tool, error) { continue } + if !context.seenParam && skipRegex.MatchString(line) { + context.skipNode = true + continue + } + // Blank line if strings.TrimSpace(line) == "" { continue @@ -227,6 +230,7 @@ func Parse(input io.Reader) ([]types.Tool, error) { if isParam, err := isParam(line, &context.tool); err != nil { return nil, NewErrLine("", lineNo, err) } else if isParam { + context.seenParam = true continue } } diff --git a/pkg/parser/parser_test.go b/pkg/parser/parser_test.go new file mode 100644 index 00000000..02a61fd3 --- /dev/null +++ b/pkg/parser/parser_test.go @@ -0,0 +1,70 @@ +package parser + +import ( + "strings" + "testing" + + "github.com/gptscript-ai/gptscript/pkg/types" + "github.com/hexops/autogold/v2" + "github.com/stretchr/testify/require" +) + +func TestParse(t *testing.T) { + var input = ` +first +--- +name: second +--- + +!third + +name: third +--- +name: fourth +!forth dont skip +--- +name: fifth + +#!ignore +--- +!skip +name: six + +---- +name: bad + --- +name: bad +-- +name: bad +--- +name: bad +--- +name: seven +` + out, err := Parse(strings.NewReader(input)) + require.NoError(t, err) + autogold.Expect([]types.Tool{ + { + Instructions: "first", + Source: types.ToolSource{LineNo: 1}, + }, + { + Parameters: types.Parameters{Name: "second"}, + Source: types.ToolSource{LineNo: 4}, + }, + { + Parameters: types.Parameters{Name: "fourth"}, + Instructions: "!forth dont skip", + Source: types.ToolSource{LineNo: 11}, + }, + { + Parameters: types.Parameters{Name: "fifth"}, + Instructions: "#!ignore", + Source: types.ToolSource{LineNo: 14}, + }, + { + Parameters: types.Parameters{Name: "seven"}, + Source: types.ToolSource{LineNo: 30}, + }, + }).Equal(t, out) +}