Skip to content

Commit 802d8a8

Browse files
chore: updated how we store accuracy result
Instead of storing multiple documents per accuracy test run(one for each prompt+model response), we will now be storing one document for accuracy result and under that, all the prompt+model responses will be nested.
1 parent c04ed9a commit 802d8a8

22 files changed

+644
-583
lines changed

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
"generate": "./scripts/generate.sh",
3232
"test": "node --experimental-vm-modules node_modules/jest/bin/jest.js --coverage --testPathIgnorePatterns=/tests/accuracy/",
3333
"pre:test:accuracy": "npm run build:compile",
34-
"test:accuracy": "sh ./scripts/run-accuracy-tests.sh"
34+
"test:accuracy": "sh ./scripts/accuracy/run-accuracy-tests.sh"
3535
},
3636
"license": "Apache-2.0",
3737
"devDependencies": {

resources/test-summary-template.html

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -167,28 +167,28 @@
167167
font-family: "Monaco", "Menlo", monospace;
168168
font-size: 12px;
169169
max-height: 400px;
170+
max-width: 1300px;
170171
overflow-y: auto;
171172
}
172-
.accuracy-perfect {
173-
background-color: #d4edda;
174-
color: #155724;
173+
.run-status {
174+
text-transform: capitalize;
175+
}
176+
.chip {
175177
padding: 2px 6px;
176178
border-radius: 3px;
177179
font-weight: bold;
178180
}
179-
.accuracy-good {
181+
.perfect {
182+
background-color: #d4edda;
183+
color: #155724;
184+
}
185+
.good {
180186
background-color: #fff3cd;
181187
color: #856404;
182-
padding: 2px 6px;
183-
border-radius: 3px;
184-
font-weight: bold;
185188
}
186-
.accuracy-poor {
189+
.poor {
187190
background-color: #f8d7da;
188191
color: #721c24;
189-
padding: 2px 6px;
190-
border-radius: 3px;
191-
font-weight: bold;
192192
}
193193
.tool-call {
194194
background: #e9ecef;
@@ -303,13 +303,17 @@ <h1>📊 MongoDB MCP Server - Accuracy Test Summary</h1>
303303
<div class="header-info">
304304
<h2>📊 Current Run Information</h2>
305305
<div class="info-grid">
306+
<div class="info-item">
307+
<div class="info-label">Commit SHA</div>
308+
<div class="info-value">{{commitSHA}}</div>
309+
</div>
306310
<div class="info-item">
307311
<div class="info-label">Accuracy Run ID</div>
308312
<div class="info-value">{{accuracyRunId}}</div>
309313
</div>
310314
<div class="info-item">
311-
<div class="info-label">Commit SHA</div>
312-
<div class="info-value">{{commitSHA}}</div>
315+
<div class="info-label">Accuracy Run Status</div>
316+
<div class="info-value">{{accuracyRunStatus}}</div>
313317
</div>
314318
<div class="info-item">
315319
<div class="info-label">Run Created On</div>
@@ -347,13 +351,17 @@ <h2>📈 Test Results Summary</h2>
347351
<div class="header-info">
348352
<h2>🔄 Baseline Comparison</h2>
349353
<div class="info-grid">
354+
<div class="info-item">
355+
<div class="info-label">Baseline Commit SHA</div>
356+
<div class="info-value">{{baselineCommitSHA}}</div>
357+
</div>
350358
<div class="info-item">
351359
<div class="info-label">Baseline Accuracy Run ID</div>
352360
<div class="info-value">{{baselineAccuracyRunId}}</div>
353361
</div>
354362
<div class="info-item">
355-
<div class="info-label">Baseline Commit SHA</div>
356-
<div class="info-value">{{baselineCommitSHA}}</div>
363+
<div class="info-label">Baseline Accuracy Run Status</div>
364+
<div class="info-value">{{baselineAccuracyRunStatus}}</div>
357365
</div>
358366
<div class="info-item">
359367
<div class="info-label">Baseline Run Created On</div>

scripts/run-accuracy-tests.sh renamed to scripts/accuracy/run-accuracy-tests.sh

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,39 +8,38 @@ export MDB_ACCURACY_RUN_ID=$(npx uuid v4)
88
# export MDB_AZURE_OPEN_AI_API_KEY=""
99
# export MDB_AZURE_OPEN_AI_API_URL=""
1010

11-
# For providing a mongodb based storage to store accuracy snapshots
11+
# For providing a mongodb based storage to store accuracy result
1212
# export MDB_ACCURACY_MDB_URL=""
1313

1414
# By default we run all the tests under tests/accuracy folder unless a path is
1515
# specified in the command line. Such as:
1616
# npm run test:accuracy -- tests/accuracy/some-test.test.ts
1717
TEST_PATH_PATTERN="${1:-tests/accuracy}"
1818
shift || true
19+
echo "Running accuracy tests with MDB_ACCURACY_RUN_ID '$MDB_ACCURACY_RUN_ID' and TEST_PATH_PATTERN '$TEST_PATH_PATTERN'"
1920
node --experimental-vm-modules node_modules/jest/bin/jest.js --bail --testPathPatterns "$TEST_PATH_PATTERN" "$@"
2021

2122
# Preserving the exit code from test run to correctly notify in the CI
2223
# environments when the tests fail.
2324
JEST_EXIT_CODE=$?
2425

25-
# Each test run submits an accuracy snapshot entry with the accuracyRunStatus:
26+
# Each test run submits an accuracy result with the accuracyRunStatus:
2627
# "in-progress". When all the tests are done and jest exits with an exit code of
2728
# 0, we can safely mark accuracy run as finished otherwise failed.
2829

2930
# This "outside-the-tests-status-update" is arising out of the fact that each
3031
# test suite stores their own accuracy run data in the storage and this setup
3132
# might lead to data inconsistency when the tests fail. To overcome that each
32-
# accuracy snapshot entry has a status which by default is "in-progress" and is
33+
# accuracy result entry has a status which by default is "in-progress" and is
3334
# updated when the tests either pass (all our accuracy tests are supposed to
3435
# pass unless some errors occurs during the test runs), or fail.
3536

3637
# This is necessary when comparing one accuracy run with another as we wouldn't
3738
# want to compare against an incomplete run.
38-
if [ $JEST_EXIT_CODE -eq 0 ]; then
39-
MDB_ACCURACY_RUN_STATUS="done" npx tsx scripts/update-accuracy-run-status.ts || echo "Warning: Failed to update accuracy run status to 'done'"
40-
npx tsx scripts/generate-test-summary.ts || echo "Warning: Failed to generate test summary HTML report"
41-
else
42-
MDB_ACCURACY_RUN_STATUS="failed" npx tsx scripts/update-accuracy-run-status.ts || echo "Warning: Failed to update accuracy run status to 'failed'"
43-
fi
39+
export MDB_ACCURACY_RUN_STATUS=$([ $JEST_EXIT_CODE -eq 0 ] && echo "done" || echo "failed")
40+
npx tsx scripts/accuracy/update-accuracy-run-status.ts || echo "Warning: Failed to update accuracy run status to '$MDB_ACCURACY_RUN_STATUS'"
4441

42+
# This is optional but we do it anyways to generate a readable summary of report.
43+
npx tsx scripts/generate-test-summary.ts || echo "Warning: Failed to generate test summary HTML report"
4544

4645
exit $JEST_EXIT_CODE
Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,21 @@
1-
import { getAccuracySnapshotStorage } from "../tests/accuracy/sdk/accuracy-snapshot-storage/get-snapshot-storage.js";
2-
import { AccuracyRunStatus } from "../tests/accuracy/sdk/accuracy-snapshot-storage/snapshot-storage.js";
1+
import { getAccuracyResultStorage } from "../../tests/accuracy/sdk/accuracy-result-storage/get-accuracy-result-storage.js";
2+
import { AccuracyRunStatus } from "../../tests/accuracy/sdk/accuracy-result-storage/result-storage.js";
3+
import { getCommitSHA } from "../../tests/accuracy/sdk/git-info.js";
34

45
const envAccuracyRunId = process.env.MDB_ACCURACY_RUN_ID;
56
const envAccuracyRunStatus = process.env.MDB_ACCURACY_RUN_STATUS;
7+
const commitSHA = await getCommitSHA();
68

79
if (
810
!envAccuracyRunId ||
11+
!commitSHA ||
912
(envAccuracyRunStatus !== AccuracyRunStatus.Done && envAccuracyRunStatus !== AccuracyRunStatus.Failed)
1013
) {
1114
process.exit(1);
1215
}
1316

1417
console.time(`Marked accuracy run id - ${envAccuracyRunId} as ${envAccuracyRunStatus} in`);
15-
const storage = await getAccuracySnapshotStorage();
16-
await storage.updateAccuracyRunStatus(envAccuracyRunId, envAccuracyRunStatus);
18+
const storage = getAccuracyResultStorage();
19+
await storage.updateRunStatus(commitSHA, envAccuracyRunId, envAccuracyRunStatus);
1720
await storage.close();
1821
console.timeEnd(`Marked accuracy run id - ${envAccuracyRunId} as ${envAccuracyRunStatus} in`);

0 commit comments

Comments
 (0)