diff --git a/pkg/monitor/display.go b/pkg/monitor/display.go index c84e9cdb..041c366f 100644 --- a/pkg/monitor/display.go +++ b/pkg/monitor/display.go @@ -288,7 +288,9 @@ func (d *display) Stop(output string, err error) { defer d.callLock.Unlock() log.Fields("runID", d.dump.ID, "output", output, "err", err).Debugf("Run stopped") - log.Fields("runID", d.dump.ID, "total", d.usage.TotalTokens, "prompt", d.usage.PromptTokens, "completion", d.usage.CompletionTokens).Infof("usage ") + if d.usage.TotalTokens > 0 { + log.Fields("runID", d.dump.ID, "total", d.usage.TotalTokens, "prompt", d.usage.PromptTokens, "completion", d.usage.CompletionTokens).Infof("usage ") + } d.dump.Output = output d.dump.Err = err if d.dumpState != "" { diff --git a/pkg/openai/client.go b/pkg/openai/client.go index a04ce2d4..2c72bfa8 100644 --- a/pkg/openai/client.go +++ b/pkg/openai/client.go @@ -308,9 +308,6 @@ func (c *Client) Call(ctx context.Context, messageRequest types.CompletionReques Model: messageRequest.Model, Messages: msgs, MaxTokens: messageRequest.MaxTokens, - StreamOptions: &openai.StreamOptions{ - IncludeUsage: true, - }, } if messageRequest.Temperature == nil { @@ -350,6 +347,9 @@ func (c *Client) Call(ctx context.Context, messageRequest types.CompletionReques var cacheResponse bool if c.setSeed { request.Seed = ptr(c.seed(request)) + request.StreamOptions = &openai.StreamOptions{ + IncludeUsage: true, + } } response, ok, err := c.fromCache(ctx, messageRequest, request) if err != nil {