Skip to content

Commit c92c5fa

Browse files
committed
chore: refactor provenance related checks
Signed-off-by: Ben Selwyn-Smith <[email protected]>
1 parent 6e025b2 commit c92c5fa

20 files changed

+341
-781
lines changed

src/macaron/repo_finder/provenance_finder.py

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,22 @@
77
import tempfile
88

99
from packageurl import PackageURL
10+
from pydriller import Git
1011

1112
from macaron.config.defaults import defaults
1213
from macaron.repo_finder.commit_finder import AbstractPurlType, determine_abstract_purl_type
14+
from macaron.slsa_analyzer.analyze_context import AnalyzeContext
1315
from macaron.slsa_analyzer.checks.provenance_available_check import ProvenanceAvailableException
16+
from macaron.slsa_analyzer.ci_service import GitHubActions
17+
from macaron.slsa_analyzer.ci_service.base_ci_service import NoneCIService
1418
from macaron.slsa_analyzer.package_registry import PACKAGE_REGISTRIES, JFrogMavenRegistry, NPMRegistry
1519
from macaron.slsa_analyzer.package_registry.npm_registry import NPMAttestationAsset
1620
from macaron.slsa_analyzer.provenance.intoto import InTotoPayload
1721
from macaron.slsa_analyzer.provenance.intoto.errors import LoadIntotoAttestationError
1822
from macaron.slsa_analyzer.provenance.loader import load_provenance_payload
23+
from macaron.slsa_analyzer.provenance.slsa import SLSAProvenanceData
1924
from macaron.slsa_analyzer.provenance.witness import is_witness_provenance_payload, load_witness_verifier_config
25+
from macaron.slsa_analyzer.specs.ci_spec import CIInfo
2026

2127
logger: logging.Logger = logging.getLogger(__name__)
2228

@@ -223,3 +229,152 @@ def find_gav_provenance(purl: PackageURL, jfrog_registry: JFrogMavenRegistry) ->
223229
provenance = provenances[0]
224230

225231
return provenance
232+
233+
def find_provenance_from_ci(self, analyze_ctx: AnalyzeContext, git_obj: Git | None) -> InTotoPayload | None:
234+
"""Try to find provenance from CI services of the repository.
235+
236+
Note that we stop going through the CI services once we encounter a CI service
237+
that does host provenance assets.
238+
239+
This method also loads the provenance payloads into the ``CIInfo`` object where
240+
the provenance assets are found.
241+
242+
Parameters
243+
----------
244+
analyze_ctx: AnalyzeContext
245+
The contenxt of the ongoing analysis.
246+
git_obj: Git | None
247+
The Pydriller Git object representing the repository, if any.
248+
249+
Returns
250+
-------
251+
InTotoPayload | None
252+
The provenance payload, or None if not found.
253+
"""
254+
provenance_extensions = defaults.get_list(
255+
"slsa.verifier",
256+
"provenance_extensions",
257+
fallback=["intoto.jsonl"],
258+
)
259+
component = analyze_ctx.component
260+
ci_info_entries = analyze_ctx.dynamic_data["ci_services"]
261+
262+
if not component.repository:
263+
logger.debug("Unable to find a provenance because a repository was not found for %s.", component.purl)
264+
return None
265+
266+
repo_full_name = component.repository.full_name
267+
for ci_info in ci_info_entries:
268+
ci_service = ci_info["service"]
269+
270+
if isinstance(ci_service, NoneCIService):
271+
continue
272+
273+
if isinstance(ci_service, GitHubActions):
274+
# Find the release for the software component version being analyzed.
275+
276+
digest = component.repository.commit_sha
277+
tag = None
278+
if git_obj:
279+
# Use the software component commit to find the tag.
280+
if not digest:
281+
logger.debug("Cannot retrieve asset provenance without commit digest.")
282+
return None
283+
tags = git_obj.repo.tags
284+
for _tag in tags:
285+
if _tag.commit and _tag.commit == digest:
286+
tag = str(_tag)
287+
break
288+
289+
if not tag:
290+
logger.debug("Could not find the tag matching commit: %s", digest)
291+
return None
292+
293+
# Get the correct release using the tag.
294+
release_payload = ci_service.api_client.get_release_by_tag(repo_full_name, tag)
295+
if not release_payload:
296+
logger.debug("Failed to find release matching tag: %s", tag)
297+
return None
298+
299+
# Store the release data for other checks.
300+
ci_info["release"] = release_payload
301+
302+
# Get the provenance assets.
303+
for prov_ext in provenance_extensions:
304+
provenance_assets = ci_service.api_client.fetch_assets(
305+
release_payload,
306+
ext=prov_ext,
307+
)
308+
if not provenance_assets:
309+
continue
310+
311+
logger.info("Found the following provenance assets:")
312+
for provenance_asset in provenance_assets:
313+
logger.info("* %s", provenance_asset.url)
314+
315+
# Store the provenance assets for other checks.
316+
ci_info["provenance_assets"].extend(provenance_assets)
317+
318+
# Download the provenance assets and load the provenance payloads.
319+
self.download_provenances_from_github_actions_ci_service(
320+
ci_info,
321+
)
322+
323+
# TODO consider how to handle multiple payloads here.
324+
return ci_info["provenances"][0].payload if ci_info["provenances"] else None
325+
326+
return None
327+
328+
def download_provenances_from_github_actions_ci_service(self, ci_info: CIInfo) -> None:
329+
"""Download provenances from GitHub Actions.
330+
331+
Parameters
332+
----------
333+
ci_info: CIInfo,
334+
A ``CIInfo`` instance that holds a GitHub Actions git service object.
335+
"""
336+
ci_service = ci_info["service"]
337+
prov_assets = ci_info["provenance_assets"]
338+
339+
try:
340+
with tempfile.TemporaryDirectory() as temp_path:
341+
downloaded_provs = []
342+
for prov_asset in prov_assets:
343+
# Check the size before downloading.
344+
if prov_asset.size_in_bytes > defaults.getint(
345+
"slsa.verifier",
346+
"max_download_size",
347+
fallback=1000000,
348+
):
349+
logger.info(
350+
"Skip verifying the provenance %s: asset size too large.",
351+
prov_asset.name,
352+
)
353+
continue
354+
355+
provenance_filepath = os.path.join(temp_path, prov_asset.name)
356+
357+
if not ci_service.api_client.download_asset(
358+
prov_asset.url,
359+
provenance_filepath,
360+
):
361+
logger.debug(
362+
"Could not download the provenance %s. Skip verifying...",
363+
prov_asset.name,
364+
)
365+
continue
366+
367+
# Read the provenance.
368+
try:
369+
payload = load_provenance_payload(provenance_filepath)
370+
except LoadIntotoAttestationError as error:
371+
logger.error("Error logging provenance: %s", error)
372+
continue
373+
374+
# Add the provenance file.
375+
downloaded_provs.append(SLSAProvenanceData(payload=payload, asset=prov_asset))
376+
377+
# Persist the provenance payloads into the CIInfo object.
378+
ci_info["provenances"] = downloaded_provs
379+
except OSError as error:
380+
logger.error("Error while storing provenance in the temporary directory: %s", error)

src/macaron/slsa_analyzer/analyze_context.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ def provenances(self) -> dict[str, list[InTotoV01Statement | InTotoV1Statement]]
154154
result: dict[str, list[InTotoV01Statement | InTotoV1Statement]] = defaultdict(list)
155155
for ci_info in ci_services:
156156
result[ci_info["service"].name].extend(
157-
prov_asset.payload.statement for prov_asset in ci_info["provenances"]
157+
provenance.payload.statement for provenance in ci_info["provenances"]
158158
)
159159
package_registry_entries = self.dynamic_data["package_registries"]
160160
for package_registry_entry in package_registry_entries:

0 commit comments

Comments
 (0)