Skip to content

Commit c3a9a08

Browse files
authored
feat: add support for JFrog Artifactory and witness provenances produced on GitLab CI (#349)
In this PR (#349): * Introduce the concept of package registries, which is a place where artifacts and provenances are potentially published. * Add support for JFrog Artifactory as a package registry. Note that (1) only Artifactory repos following the Maven layout are supported, and (2) artifact & provenance discovery only works for projects built using Gradle. * Add support for Witness provenances, specifically the `witness_provenance_l1_check`. Note that only projects built on GitLab CI are supported. --------- Signed-off-by: Nathan Nguyen <[email protected]>
1 parent dc37e31 commit c3a9a08

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+4218
-1134
lines changed

src/macaron/__main__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from macaron.policy_engine.policy_engine import run_policy_engine, show_prelude
2323
from macaron.slsa_analyzer.analyzer import Analyzer
2424
from macaron.slsa_analyzer.git_service import GIT_SERVICES
25+
from macaron.slsa_analyzer.package_registry import PACKAGE_REGISTRIES
2526

2627
logger: logging.Logger = logging.getLogger(__name__)
2728

@@ -142,6 +143,8 @@ def perform_action(action_args: argparse.Namespace) -> None:
142143
try:
143144
for git_service in GIT_SERVICES:
144145
git_service.load_defaults()
146+
for package_registry in PACKAGE_REGISTRIES:
147+
package_registry.load_defaults()
145148
except ConfigurationError as error:
146149
logger.error(error)
147150
sys.exit(os.EX_USAGE)

src/macaron/config/defaults.ini

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,3 +337,20 @@ provenance_extensions =
337337
max_download_size = 70000000
338338
# This is the timeout (in seconds) to run the SLSA verifier.
339339
timeout = 120
340+
341+
# Witness provenance. See: https://github.com/testifysec/witness.
342+
[provenance.witness]
343+
# The allowed values of the `predicateType` field in the provenance (data type: list).
344+
# For more details, see:
345+
# https://github.com/in-toto/attestation/tree/main/spec/v0.1.0#statement
346+
predicate_types =
347+
https://witness.testifysec.com/attestation-collection/v0.1
348+
artifact_extensions =
349+
jar
350+
351+
# Package registries.
352+
# [package_registry.jfrog.maven]
353+
# In this example, the Maven repo can be accessed at `https://internal.registry.org/repo-name`.
354+
# hostname = internal.registry.org
355+
# repo = repo-name
356+
# download_timeout = 120

src/macaron/slsa_analyzer/analyze_context.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from macaron.slsa_analyzer.slsa_req import ReqName, SLSAReq, get_requirements_dict
2020
from macaron.slsa_analyzer.specs.build_spec import BuildSpec
2121
from macaron.slsa_analyzer.specs.ci_spec import CIInfo
22+
from macaron.slsa_analyzer.specs.package_registry_spec import PackageRegistryInfo
2223

2324
logger: logging.Logger = logging.getLogger(__name__)
2425

@@ -38,6 +39,8 @@ class ChecksOutputs(TypedDict):
3839
# class uses inlined functions, which is not supported by Protocol.
3940
expectation: Expectation | None
4041
"""The expectation to verify the provenance for this repository."""
42+
package_registries: list[PackageRegistryInfo]
43+
"""The package registries for this repository."""
4144

4245

4346
class AnalyzeContext:
@@ -82,6 +85,7 @@ def __init__(
8285
git_service=NoneGitService(),
8386
build_spec=BuildSpec(tools=[]),
8487
ci_services=[],
88+
package_registries=[],
8589
is_inferred_prov=True,
8690
expectation=None,
8791
)
@@ -93,12 +97,19 @@ def provenances(self) -> dict:
9397
Returns
9498
-------
9599
dict
100+
A dictionary in which each key is a CI service's name and each value is
101+
the corresponding provenance payload.
96102
"""
97103
try:
98104
ci_services = self.dynamic_data["ci_services"]
99105
result = {}
100106
for ci_info in ci_services:
101-
result[ci_info["service"].name] = ci_info["provenances"]
107+
result[ci_info["service"].name] = [payload.statement for payload in ci_info["provenances"]]
108+
package_registry_entries = self.dynamic_data["package_registries"]
109+
for package_registry_entry in package_registry_entries:
110+
result[package_registry_entry.package_registry.name] = [
111+
provenance.payload.statement for provenance in package_registry_entry.provenances
112+
]
102113
return result
103114
except KeyError:
104115
return {}

src/macaron/slsa_analyzer/analyzer.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,13 @@
4444
from macaron.slsa_analyzer.database_store import store_analyze_context_to_db
4545
from macaron.slsa_analyzer.git_service import GIT_SERVICES, BaseGitService
4646
from macaron.slsa_analyzer.git_service.base_git_service import NoneGitService
47+
from macaron.slsa_analyzer.package_registry import PACKAGE_REGISTRIES
4748
from macaron.slsa_analyzer.provenance.expectations.expectation_registry import ExpectationRegistry
49+
from macaron.slsa_analyzer.provenance.intoto import InTotoV01Payload
4850
from macaron.slsa_analyzer.registry import registry
4951
from macaron.slsa_analyzer.specs.ci_spec import CIInfo
5052
from macaron.slsa_analyzer.specs.inferred_provenance import Provenance
53+
from macaron.slsa_analyzer.specs.package_registry_spec import PackageRegistryInfo
5154

5255
logger: logging.Logger = logging.getLogger(__name__)
5356

@@ -808,7 +811,10 @@ def perform_checks(self, analyze_ctx: AnalyzeContext) -> dict[str, CheckResult]:
808811
ci_service.load_defaults()
809812
ci_service.set_api_client()
810813

811-
if ci_service.is_detected(analyze_ctx.component.repository.fs_path):
814+
if ci_service.is_detected(
815+
repo_path=analyze_ctx.component.repository.fs_path,
816+
git_service=analyze_ctx.dynamic_data["git_service"],
817+
):
812818
logger.info("The repo uses %s CI service.", ci_service.name)
813819

814820
# Parse configuration files and generate IRs.
@@ -825,7 +831,20 @@ def perform_checks(self, analyze_ctx: AnalyzeContext) -> dict[str, CheckResult]:
825831
callgraph=callgraph,
826832
provenance_assets=[],
827833
latest_release={},
828-
provenances=[Provenance().payload],
834+
provenances=[InTotoV01Payload(statement=Provenance().payload)],
835+
)
836+
)
837+
838+
# Determine the package registries.
839+
# We match the repo against package registries through build tools.
840+
build_tools = analyze_ctx.dynamic_data["build_spec"]["tools"]
841+
for package_registry in PACKAGE_REGISTRIES:
842+
for build_tool in build_tools:
843+
if package_registry.is_detected(build_tool):
844+
analyze_ctx.dynamic_data["package_registries"].append(
845+
PackageRegistryInfo(
846+
build_tool=build_tool,
847+
package_registry=package_registry,
829848
)
830849
)
831850

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved.
2+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
3+
4+
"""This module defines classes and interfaces related to assets.
5+
6+
Assets are files published from some build.
7+
"""
8+
9+
from typing import Protocol
10+
11+
12+
class AssetLocator(Protocol):
13+
"""Interface of an asset locator."""
14+
15+
@property
16+
def name(self) -> str:
17+
"""Get the name (file name) of the asset."""
18+
19+
@property
20+
def url(self) -> str:
21+
"""Get the url to the asset."""
22+
23+
@property
24+
def size_in_bytes(self) -> int:
25+
"""Get the size of the asset in bytes."""
26+
27+
def download(self, dest: str) -> bool:
28+
"""Download the asset.
29+
30+
Parameters
31+
----------
32+
dest : str
33+
The local destination where the asset is downloaded to.
34+
Note that this must include the file name.
35+
36+
Returns
37+
-------
38+
bool
39+
``True`` if the asset is downloaded successfully; ``False`` if not.
40+
"""

src/macaron/slsa_analyzer/build_tool/gradle.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88

99
import logging
1010
import os
11+
import subprocess # nosec B404
1112

13+
import macaron
1214
from macaron.config.defaults import defaults
1315
from macaron.config.global_config import global_config
1416
from macaron.dependency_analyzer import DependencyAnalyzer, DependencyAnalyzerError, DependencyTools
@@ -135,3 +137,107 @@ def get_dep_analyzer(self, repo_path: str) -> CycloneDxGradle:
135137
)
136138

137139
raise DependencyAnalyzerError(f"Unsupported SBOM generator for Gradle: {tool_name}.")
140+
141+
def get_gradle_exec(self, repo_path: str) -> str:
142+
"""Get the Gradle executable for the repo.
143+
144+
Parameters
145+
----------
146+
repo_path: str
147+
The absolute path to a repository containing Gradle projects.
148+
149+
Returns
150+
-------
151+
str
152+
The absolute path to the Gradle executable.
153+
"""
154+
# We try to use the gradlew that comes with the repository first.
155+
repo_gradlew = os.path.join(repo_path, "gradlew")
156+
if os.path.isfile(repo_gradlew) and os.access(repo_gradlew, os.X_OK):
157+
return repo_gradlew
158+
159+
# We use Macaron's built-in gradlew as a fallback option.
160+
return os.path.join(os.path.join(macaron.MACARON_PATH, "resources"), "gradlew")
161+
162+
def get_group_ids(self, repo_path: str) -> set[str]:
163+
"""Get the group ids of all Gradle projects in a repository.
164+
165+
A Gradle project is a directory containing a ``build.gradle`` file.
166+
According to the Gradle's documentation, there is a one-to-one mapping between
167+
a "project" and a ``build.gradle`` file.
168+
See: https://docs.gradle.org/current/javadoc/org/gradle/api/Project.html.
169+
170+
Note: This method makes the assumption that projects nested in a parent project
171+
directory has the same group id with the parent. This behavior is consistent with
172+
the behavior of the ``get_build_dirs`` method.
173+
174+
Parameters
175+
----------
176+
repo_path: str
177+
The absolute path to a repository containing Gradle projects.
178+
179+
Returns
180+
-------
181+
set[str]
182+
The set of group ids of all Gradle projects in the repository.
183+
"""
184+
gradle_exec = self.get_gradle_exec(repo_path)
185+
group_ids = set()
186+
187+
for gradle_project_relpath in self.get_build_dirs(repo_path):
188+
gradle_project_path = os.path.join(repo_path, gradle_project_relpath)
189+
group_id = self.get_group_id(
190+
gradle_exec=gradle_exec,
191+
project_path=gradle_project_path,
192+
)
193+
if group_id:
194+
group_ids.add(group_id)
195+
196+
return group_ids
197+
198+
def get_group_id(self, gradle_exec: str, project_path: str) -> str | None:
199+
"""Get the group id of a Gradle project.
200+
201+
A Gradle project is a directory containing a ``build.gradle`` file.
202+
According to the Gradle's documentation, there is a one-to-one mapping between
203+
a "project" and a ``build.gradle`` file.
204+
See: https://docs.gradle.org/current/javadoc/org/gradle/api/Project.html.
205+
206+
Parameters
207+
----------
208+
gradle_exec: str
209+
The absolute path to the Gradle executable.
210+
211+
project_path : str
212+
The absolute path to the Gradle project.
213+
214+
Returns
215+
-------
216+
str | None
217+
The group id of the project, if exists.
218+
"""
219+
try:
220+
result = subprocess.run( # nosec B603
221+
[gradle_exec, "properties"],
222+
capture_output=True,
223+
cwd=project_path,
224+
check=False,
225+
)
226+
except (subprocess.CalledProcessError, OSError) as error:
227+
logger.debug("Could not capture the group id of the Gradle project at %s", project_path)
228+
logger.debug("Error: %s", error)
229+
return None
230+
231+
if result.returncode == 0:
232+
lines = result.stdout.decode().split("\n")
233+
for line in lines:
234+
if line.startswith("group: "):
235+
group = line.replace("group: ", "")
236+
# The value of group here can be an empty string.
237+
if group:
238+
return group
239+
break
240+
241+
logger.debug("Could not capture the group id of the repo at %s", project_path)
242+
logger.debug("Stderr:\n%s", result.stderr)
243+
return None

src/macaron/slsa_analyzer/checks/build_as_code_check.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import logging
77
import os
8+
from typing import Any
89

910
from sqlalchemy import ForeignKey
1011
from sqlalchemy.orm import Mapped, mapped_column
@@ -22,6 +23,7 @@
2223
from macaron.slsa_analyzer.ci_service.gitlab_ci import GitLabCI
2324
from macaron.slsa_analyzer.ci_service.jenkins import Jenkins
2425
from macaron.slsa_analyzer.ci_service.travis import Travis
26+
from macaron.slsa_analyzer.provenance.intoto import InTotoV01Payload
2527
from macaron.slsa_analyzer.registry import registry
2628
from macaron.slsa_analyzer.slsa_req import ReqName
2729
from macaron.slsa_analyzer.specs.ci_spec import CIInfo
@@ -202,8 +204,12 @@ def _check_build_tool(
202204
else "However, could not find a passing workflow run.",
203205
]
204206
check_result["justification"].extend(justification)
205-
if ctx.dynamic_data["is_inferred_prov"] and ci_info["provenances"]:
206-
predicate = ci_info["provenances"][0]["predicate"]
207+
if (
208+
ctx.dynamic_data["is_inferred_prov"]
209+
and ci_info["provenances"]
210+
and isinstance(ci_info["provenances"][0], InTotoV01Payload)
211+
):
212+
predicate: Any = ci_info["provenances"][0].statement["predicate"]
207213
predicate["buildType"] = f"Custom {ci_service.name}"
208214
predicate["builder"]["id"] = deploy_action_source_link
209215
predicate["invocation"]["configSource"]["uri"] = (
@@ -261,8 +267,12 @@ def _check_build_tool(
261267
else "However, could not find a passing workflow run.",
262268
]
263269
check_result["justification"].extend(justification_cmd)
264-
if ctx.dynamic_data["is_inferred_prov"] and ci_info["provenances"]:
265-
predicate = ci_info["provenances"][0]["predicate"]
270+
if (
271+
ctx.dynamic_data["is_inferred_prov"]
272+
and ci_info["provenances"]
273+
and isinstance(ci_info["provenances"][0], InTotoV01Payload)
274+
):
275+
predicate = ci_info["provenances"][0].statement["predicate"]
266276
predicate["buildType"] = f"Custom {ci_service.name}"
267277
predicate["builder"]["id"] = bash_source_link
268278
predicate["invocation"]["configSource"]["uri"] = (
@@ -300,8 +310,13 @@ def _check_build_tool(
300310
f"The target repository uses build tool {build_tool.name}"
301311
+ f" in {ci_service.name} using {deploy_kw} to deploy."
302312
)
303-
if ctx.dynamic_data["is_inferred_prov"] and ci_info["provenances"]:
304-
predicate = ci_info["provenances"][0]["predicate"]
313+
314+
if (
315+
ctx.dynamic_data["is_inferred_prov"]
316+
and ci_info["provenances"]
317+
and isinstance(ci_info["provenances"][0], InTotoV01Payload)
318+
):
319+
predicate = ci_info["provenances"][0].statement["predicate"]
305320
predicate["buildType"] = f"Custom {ci_service.name}"
306321
predicate["builder"]["id"] = config_name
307322
predicate["invocation"]["configSource"]["uri"] = (

src/macaron/slsa_analyzer/checks/build_service_check.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import logging
77
import os
8+
from typing import Any
89

910
from sqlalchemy import ForeignKey
1011
from sqlalchemy.orm import Mapped, mapped_column
@@ -20,6 +21,7 @@
2021
from macaron.slsa_analyzer.ci_service.gitlab_ci import GitLabCI
2122
from macaron.slsa_analyzer.ci_service.jenkins import Jenkins
2223
from macaron.slsa_analyzer.ci_service.travis import Travis
24+
from macaron.slsa_analyzer.provenance.intoto import InTotoV01Payload
2325
from macaron.slsa_analyzer.registry import registry
2426
from macaron.slsa_analyzer.slsa_req import ReqName
2527
from macaron.slsa_analyzer.specs.ci_spec import CIInfo
@@ -183,8 +185,12 @@ def _check_build_tool(
183185
)
184186
]
185187

186-
if ctx.dynamic_data["is_inferred_prov"] and ci_info["provenances"]:
187-
predicate = ci_info["provenances"][0]["predicate"]
188+
if (
189+
ctx.dynamic_data["is_inferred_prov"]
190+
and ci_info["provenances"]
191+
and isinstance(ci_info["provenances"][0], InTotoV01Payload)
192+
):
193+
predicate: Any = ci_info["provenances"][0].statement["predicate"]
188194
predicate["buildType"] = f"Custom {ci_service.name}"
189195
predicate["builder"]["id"] = bash_source_link
190196
predicate["invocation"]["configSource"]["uri"] = (
@@ -219,8 +225,12 @@ def _check_build_tool(
219225
)
220226
]
221227

222-
if ctx.dynamic_data["is_inferred_prov"] and ci_info["provenances"]:
223-
predicate = ci_info["provenances"][0]["predicate"]
228+
if (
229+
ctx.dynamic_data["is_inferred_prov"]
230+
and ci_info["provenances"]
231+
and isinstance(ci_info["provenances"][0], InTotoV01Payload)
232+
):
233+
predicate = ci_info["provenances"][0].statement["predicate"]
224234
predicate["buildType"] = f"Custom {ci_service.name}"
225235
predicate["builder"]["id"] = config_name
226236
predicate["invocation"]["configSource"]["uri"] = (

0 commit comments

Comments
 (0)