|
7 | 7 | import tempfile
|
8 | 8 |
|
9 | 9 | from packageurl import PackageURL
|
| 10 | +from pydriller import Git |
10 | 11 |
|
11 | 12 | from macaron.config.defaults import defaults
|
12 | 13 | from macaron.repo_finder.commit_finder import AbstractPurlType, determine_abstract_purl_type
|
| 14 | +from macaron.slsa_analyzer.analyze_context import AnalyzeContext |
13 | 15 | from macaron.slsa_analyzer.checks.provenance_available_check import ProvenanceAvailableException
|
| 16 | +from macaron.slsa_analyzer.ci_service import GitHubActions |
| 17 | +from macaron.slsa_analyzer.ci_service.base_ci_service import NoneCIService |
14 | 18 | from macaron.slsa_analyzer.package_registry import PACKAGE_REGISTRIES, JFrogMavenRegistry, NPMRegistry
|
15 | 19 | from macaron.slsa_analyzer.package_registry.npm_registry import NPMAttestationAsset
|
16 | 20 | from macaron.slsa_analyzer.provenance.intoto import InTotoPayload
|
17 | 21 | from macaron.slsa_analyzer.provenance.intoto.errors import LoadIntotoAttestationError
|
18 | 22 | from macaron.slsa_analyzer.provenance.loader import load_provenance_payload
|
| 23 | +from macaron.slsa_analyzer.provenance.slsa import SLSAProvenanceData |
19 | 24 | from macaron.slsa_analyzer.provenance.witness import is_witness_provenance_payload, load_witness_verifier_config
|
| 25 | +from macaron.slsa_analyzer.specs.ci_spec import CIInfo |
20 | 26 |
|
21 | 27 | logger: logging.Logger = logging.getLogger(__name__)
|
22 | 28 |
|
@@ -223,3 +229,152 @@ def find_gav_provenance(purl: PackageURL, jfrog_registry: JFrogMavenRegistry) ->
|
223 | 229 | provenance = provenances[0]
|
224 | 230 |
|
225 | 231 | return provenance
|
| 232 | + |
| 233 | + def find_provenance_from_ci(self, analyze_ctx: AnalyzeContext, git_obj: Git | None) -> InTotoPayload | None: |
| 234 | + """Try to find provenance from CI services of the repository. |
| 235 | +
|
| 236 | + Note that we stop going through the CI services once we encounter a CI service |
| 237 | + that does host provenance assets. |
| 238 | +
|
| 239 | + This method also loads the provenance payloads into the ``CIInfo`` object where |
| 240 | + the provenance assets are found. |
| 241 | +
|
| 242 | + Parameters |
| 243 | + ---------- |
| 244 | + analyze_ctx: AnalyzeContext |
| 245 | + The contenxt of the ongoing analysis. |
| 246 | + git_obj: Git | None |
| 247 | + The Pydriller Git object representing the repository, if any. |
| 248 | +
|
| 249 | + Returns |
| 250 | + ------- |
| 251 | + InTotoPayload | None |
| 252 | + The provenance payload, or None if not found. |
| 253 | + """ |
| 254 | + provenance_extensions = defaults.get_list( |
| 255 | + "slsa.verifier", |
| 256 | + "provenance_extensions", |
| 257 | + fallback=["intoto.jsonl"], |
| 258 | + ) |
| 259 | + component = analyze_ctx.component |
| 260 | + ci_info_entries = analyze_ctx.dynamic_data["ci_services"] |
| 261 | + |
| 262 | + if not component.repository: |
| 263 | + logger.debug("Unable to find a provenance because a repository was not found for %s.", component.purl) |
| 264 | + return None |
| 265 | + |
| 266 | + repo_full_name = component.repository.full_name |
| 267 | + for ci_info in ci_info_entries: |
| 268 | + ci_service = ci_info["service"] |
| 269 | + |
| 270 | + if isinstance(ci_service, NoneCIService): |
| 271 | + continue |
| 272 | + |
| 273 | + if isinstance(ci_service, GitHubActions): |
| 274 | + # Find the release for the software component version being analyzed. |
| 275 | + |
| 276 | + digest = component.repository.commit_sha |
| 277 | + tag = None |
| 278 | + if git_obj: |
| 279 | + # Use the software component commit to find the tag. |
| 280 | + if not digest: |
| 281 | + logger.debug("Cannot retrieve asset provenance without commit digest.") |
| 282 | + return None |
| 283 | + tags = git_obj.repo.tags |
| 284 | + for _tag in tags: |
| 285 | + if _tag.commit and _tag.commit == digest: |
| 286 | + tag = str(_tag) |
| 287 | + break |
| 288 | + |
| 289 | + if not tag: |
| 290 | + logger.debug("Could not find the tag matching commit: %s", digest) |
| 291 | + return None |
| 292 | + |
| 293 | + # Get the correct release using the tag. |
| 294 | + release_payload = ci_service.api_client.get_release_by_tag(repo_full_name, tag) |
| 295 | + if not release_payload: |
| 296 | + logger.debug("Failed to find release matching tag: %s", tag) |
| 297 | + return None |
| 298 | + |
| 299 | + # Store the release data for other checks. |
| 300 | + ci_info["release"] = release_payload |
| 301 | + |
| 302 | + # Get the provenance assets. |
| 303 | + for prov_ext in provenance_extensions: |
| 304 | + provenance_assets = ci_service.api_client.fetch_assets( |
| 305 | + release_payload, |
| 306 | + ext=prov_ext, |
| 307 | + ) |
| 308 | + if not provenance_assets: |
| 309 | + continue |
| 310 | + |
| 311 | + logger.info("Found the following provenance assets:") |
| 312 | + for provenance_asset in provenance_assets: |
| 313 | + logger.info("* %s", provenance_asset.url) |
| 314 | + |
| 315 | + # Store the provenance assets for other checks. |
| 316 | + ci_info["provenance_assets"].extend(provenance_assets) |
| 317 | + |
| 318 | + # Download the provenance assets and load the provenance payloads. |
| 319 | + self.download_provenances_from_github_actions_ci_service( |
| 320 | + ci_info, |
| 321 | + ) |
| 322 | + |
| 323 | + # TODO consider how to handle multiple payloads here. |
| 324 | + return ci_info["provenances"][0].payload if ci_info["provenances"] else None |
| 325 | + |
| 326 | + return None |
| 327 | + |
| 328 | + def download_provenances_from_github_actions_ci_service(self, ci_info: CIInfo) -> None: |
| 329 | + """Download provenances from GitHub Actions. |
| 330 | +
|
| 331 | + Parameters |
| 332 | + ---------- |
| 333 | + ci_info: CIInfo, |
| 334 | + A ``CIInfo`` instance that holds a GitHub Actions git service object. |
| 335 | + """ |
| 336 | + ci_service = ci_info["service"] |
| 337 | + prov_assets = ci_info["provenance_assets"] |
| 338 | + |
| 339 | + try: |
| 340 | + with tempfile.TemporaryDirectory() as temp_path: |
| 341 | + downloaded_provs = [] |
| 342 | + for prov_asset in prov_assets: |
| 343 | + # Check the size before downloading. |
| 344 | + if prov_asset.size_in_bytes > defaults.getint( |
| 345 | + "slsa.verifier", |
| 346 | + "max_download_size", |
| 347 | + fallback=1000000, |
| 348 | + ): |
| 349 | + logger.info( |
| 350 | + "Skip verifying the provenance %s: asset size too large.", |
| 351 | + prov_asset.name, |
| 352 | + ) |
| 353 | + continue |
| 354 | + |
| 355 | + provenance_filepath = os.path.join(temp_path, prov_asset.name) |
| 356 | + |
| 357 | + if not ci_service.api_client.download_asset( |
| 358 | + prov_asset.url, |
| 359 | + provenance_filepath, |
| 360 | + ): |
| 361 | + logger.debug( |
| 362 | + "Could not download the provenance %s. Skip verifying...", |
| 363 | + prov_asset.name, |
| 364 | + ) |
| 365 | + continue |
| 366 | + |
| 367 | + # Read the provenance. |
| 368 | + try: |
| 369 | + payload = load_provenance_payload(provenance_filepath) |
| 370 | + except LoadIntotoAttestationError as error: |
| 371 | + logger.error("Error logging provenance: %s", error) |
| 372 | + continue |
| 373 | + |
| 374 | + # Add the provenance file. |
| 375 | + downloaded_provs.append(SLSAProvenanceData(payload=payload, asset=prov_asset)) |
| 376 | + |
| 377 | + # Persist the provenance payloads into the CIInfo object. |
| 378 | + ci_info["provenances"] = downloaded_provs |
| 379 | + except OSError as error: |
| 380 | + logger.error("Error while storing provenance in the temporary directory: %s", error) |
0 commit comments