Skip to content

Commit 52dee79

Browse files
ci: Try hard to preserve caches for default branch. (#5)
See doc comment in the Python script for more context.
1 parent 7dbfdf9 commit 52dee79

File tree

2 files changed

+100
-0
lines changed

2 files changed

+100
-0
lines changed

.github/workflows/ci.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ jobs:
1414
- uses: actions/checkout@v3
1515
- name: Install Bazel
1616
run: sudo npm install --location=global @bazel/bazelisk
17+
- name: Manually evict cache entry if applicable
18+
run: ACCESS_TOKEN='${{ secrets.GITHUB_TOKEN }}' python3 .github/workflows/evict.py
1719
- name: Mount bazel cache
1820
uses: actions/cache@v3
1921
with:

.github/workflows/evict.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
#!/usr/bin/env python3
2+
3+
# GitHub's @actions/cache action has the following properties:
4+
# 1. It has a 10 GB limit (the bazel cache for this repo is about 2.4 GB).
5+
# 2. It uses branch-specific caches.
6+
# - Feature branches can access caches from the default branch,
7+
# but not the other way around.
8+
# - Feature branches cannot access caches from each other.
9+
# - Cache eviction operates on an LRU policy.
10+
# https://github.community/t/github-actions-cache-eviction-policy/143754/3
11+
#
12+
# Together, these factors imply that if you make multiple pushes to a feature
13+
# branch, such as when fixing bugs, GitHub will evict the older caches,
14+
# including that for the default branch. The problem with this is that
15+
# if there is a new separate feature branch, or a test-on-merge operation,
16+
# those will get cache misses, causing a 20 min CI time.
17+
#
18+
# The solution to this to "pin" at least one cache entry for the default branch.
19+
# So if there is only one cache entry for the default branch, we manually evict
20+
# a cache entry from a non-default branch (on an LRU basis). That should create
21+
# enough space for a new cache entry.
22+
23+
import datetime
24+
import requests
25+
import os
26+
27+
DEFAULT_BRANCH_NAME = 'scip-ruby/master'
28+
29+
CACHES_URL = 'https://api.github.com/repos/sourcegraph/scip-ruby/actions/caches'
30+
31+
# 10 GB limit: https://github.com/actions/cache#cache-limits
32+
GITHUB_CACHE_LIMIT_BYTES = 10_000_000_000
33+
34+
def partition(xs, f):
35+
good = [x for x in xs if f(x)]
36+
bad = [x for x in xs if not f(x)]
37+
return (good, bad)
38+
39+
def default_main():
40+
access_token = os.environ['ACCESS_TOKEN']
41+
headers = {
42+
'Accept': 'application/vnd.github.v3+json',
43+
'Authorization': 'token {}'.format(access_token)
44+
}
45+
caches = requests.get(CACHES_URL, headers=headers).json()
46+
if caches['total_count'] == 0:
47+
print('GitHub Actions cache is empty.')
48+
print('Not manually evicting any entry.')
49+
return
50+
51+
sizes = [x['size_in_bytes'] for x in caches['actions_caches']]
52+
avg_size = sum(sizes) / len(sizes)
53+
if avg_size + sum(sizes) < 0.90 * GITHUB_CACHE_LIMIT_BYTES:
54+
# Don't evict anything, we'll probably be fine.
55+
print('Remaining space in cache {:.2f} GB'.format(sum(sizes) / 1_000_000_000))
56+
print('Not manually evicting any entry.')
57+
return
58+
59+
default_branch_cache_entries, other_branch_cache_entries = partition(
60+
caches['actions_caches'],
61+
lambda x: x['ref'].endswith('scip-ruby/master')
62+
)
63+
if len(default_branch_cache_entries) > 1:
64+
# Even if the cache action decides to evict a cache entry
65+
# for the default branch, it'll be OK, since we'll at least have
66+
# one cache entry left. This is assuming that we don't have a ginormous
67+
# cache entry, but that's OK.
68+
print('Found multiple cache entries for {}'.format(default_branch))
69+
print('Not manually evicting any entry.')
70+
return
71+
72+
if len(other_branch_cache_entries) == 0:
73+
print('Expected 1+ cache entries for non-default branches but found 0.')
74+
print('Normally, this should be impossible. :thinking_face:')
75+
return
76+
77+
entries_and_times = [
78+
(x, datetime.fromisoformat(x['last_accessed_at']))
79+
for x in other_branch_cache_entries
80+
]
81+
82+
# Sort descending based on timestamps, and evict the oldest one.
83+
sorted(entries_and_times, key=itemgetter(1))
84+
earliest_entry = entries_and_times[0]
85+
86+
if os.getenv('DRY_RUN'):
87+
print('dry run: Will evict:\n{}'.format(earliest_entry))
88+
return
89+
90+
print('requesting deletion of cache entry:\n{}'.format(earliest_entry))
91+
92+
entry_url = '{}/{}'.format(CACHES_URL, earliest_entry['id'])
93+
94+
res = requests.delete(entry_url, headers=headers)
95+
print('cache deletion status: {}', res.status_code)
96+
97+
if __name__ == '__main__':
98+
default_main()

0 commit comments

Comments
 (0)