Skip to content

Commit 8cc1d82

Browse files
authored
chore: allow independent extraction of repo and commit from provenance (#708)
Signed-off-by: Ben Selwyn-Smith <[email protected]>
1 parent 00cddcf commit 8cc1d82

File tree

7 files changed

+224
-157
lines changed

7 files changed

+224
-157
lines changed

src/macaron/errors.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,6 @@ class ProvenanceError(MacaronError):
6464
"""When there is an error while extracting from provenance."""
6565

6666

67-
class JsonError(MacaronError):
68-
"""When there is an error while extracting from JSON."""
69-
70-
7167
class InvalidAnalysisTargetError(MacaronError):
7268
"""When a valid Analysis Target cannot be constructed."""
7369

src/macaron/json_tools.py

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,17 @@
22
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
33

44
"""This module provides utility functions for JSON data."""
5-
5+
import logging
66
from typing import TypeVar
77

8-
from macaron.errors import JsonError
98
from macaron.util import JsonType
109

1110
T = TypeVar("T", bound=JsonType)
1211

12+
logger: logging.Logger = logging.getLogger(__name__)
13+
1314

14-
def json_extract(entry: JsonType, keys: list[str], type_: type[T]) -> T:
15+
def json_extract(entry: JsonType, keys: list[str], type_: type[T]) -> T | None:
1516
"""Return the value found by following the list of depth-sequential keys inside the passed JSON dictionary.
1617
1718
The value must be of the passed type.
@@ -27,24 +28,22 @@ def json_extract(entry: JsonType, keys: list[str], type_: type[T]) -> T:
2728
2829
Returns
2930
-------
30-
T:
31+
T | None:
3132
The found value as the type of the type parameter.
32-
33-
Raises
34-
------
35-
JsonError
36-
Raised if an error occurs while searching for or validating the value.
3733
"""
3834
target = entry
3935

4036
for index, key in enumerate(keys):
4137
if not isinstance(target, dict):
42-
raise JsonError(f"Expect the value .{'.'.join(keys[:index])} to be a dict.")
38+
logger.debug("Expect the value .%s to be a dict.", ".".join(keys[:index]))
39+
return None
4340
if key not in target:
44-
raise JsonError(f"JSON key '{key}' not found in .{'.'.join(keys[:index])}.")
41+
logger.debug("JSON key '%s' not found in .%s", key, ".".join(keys[:index]))
42+
return None
4543
target = target[key]
4644

4745
if isinstance(target, type_):
4846
return target
4947

50-
raise JsonError(f"Expect the value .{'.'.join(keys)} to be of type '{type_}'.")
48+
logger.debug("Expect the value .%s to be of type %s", ".".join(keys), type_)
49+
return None

src/macaron/parsers/actionparser.py

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
from macaron.config.defaults import defaults
1919
from macaron.config.global_config import global_config
20-
from macaron.errors import JsonError, ParseError
20+
from macaron.errors import ParseError
2121
from macaron.json_tools import json_extract
2222

2323
logger: logging.Logger = logging.getLogger(__name__)
@@ -90,11 +90,7 @@ def get_run_step(step: dict[str, Any]) -> str | None:
9090
str | None
9191
The inlined run script or None if the run step cannot be validated.
9292
"""
93-
try:
94-
return json_extract(step, ["Exec", "Run", "Value"], str)
95-
except JsonError as error:
96-
logger.debug(error)
97-
return None
93+
return json_extract(step, ["Exec", "Run", "Value"], str)
9894

9995

10096
def get_step_input(step: dict[str, Any], key: str) -> str | None:
@@ -115,8 +111,4 @@ def get_step_input(step: dict[str, Any], key: str) -> str | None:
115111
str | None
116112
The input value or None if it doesn't exist or the parsed object validation fails.
117113
"""
118-
try:
119-
return json_extract(step, ["Exec", "Inputs", key, "Value", "Value"], str)
120-
except JsonError as error:
121-
logger.debug(error)
122-
return None
114+
return json_extract(step, ["Exec", "Inputs", key, "Value", "Value"], str)

src/macaron/repo_finder/provenance_extractor.py

Lines changed: 81 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
"""This module contains methods for extracting repository and commit metadata from provenance files."""
55
import logging
66

7-
from macaron.errors import JsonError, ProvenanceError
7+
from macaron.errors import ProvenanceError
88
from macaron.json_tools import json_extract
99
from macaron.slsa_analyzer.provenance.intoto import InTotoPayload, InTotoV1Payload, InTotoV01Payload
1010
from macaron.util import JsonType
@@ -17,7 +17,7 @@
1717
SLSA_V1_DIGEST_SET_GIT_ALGORITHMS = ["sha1", "gitCommit"]
1818

1919

20-
def extract_repo_and_commit_from_provenance(payload: InTotoPayload) -> tuple[str, str]:
20+
def extract_repo_and_commit_from_provenance(payload: InTotoPayload) -> tuple[str | None, str | None]:
2121
"""Extract the repository and commit metadata from the passed provenance payload.
2222
2323
Parameters
@@ -35,129 +35,137 @@ def extract_repo_and_commit_from_provenance(payload: InTotoPayload) -> tuple[str
3535
ProvenanceError
3636
If the extraction process fails for any reason.
3737
"""
38-
repo = ""
39-
commit = ""
4038
predicate_type = payload.statement.get("predicateType")
41-
try:
42-
if isinstance(payload, InTotoV1Payload):
43-
if predicate_type == "https://slsa.dev/provenance/v1":
44-
repo, commit = _extract_from_slsa_v1(payload)
45-
elif isinstance(payload, InTotoV01Payload):
46-
if predicate_type == "https://slsa.dev/provenance/v0.2":
47-
repo, commit = _extract_from_slsa_v02(payload)
48-
if predicate_type == "https://slsa.dev/provenance/v0.1":
49-
repo, commit = _extract_from_slsa_v01(payload)
50-
if predicate_type == "https://witness.testifysec.com/attestation-collection/v0.1":
51-
repo, commit = _extract_from_witness_provenance(payload)
52-
except JsonError as error:
53-
logger.debug(error)
54-
raise ProvenanceError("JSON exception while extracting from provenance.") from error
55-
56-
if not repo or not commit:
57-
msg = (
58-
f"Extraction from provenance not supported for versions: "
59-
f"predicate_type {predicate_type}, in-toto {str(type(payload))}."
60-
)
61-
logger.debug(msg)
62-
raise ProvenanceError(msg)
63-
64-
logger.debug("Extracted repo and commit from provenance: %s, %s", repo, commit)
65-
return repo, commit
66-
67-
68-
def _extract_from_slsa_v01(payload: InTotoV01Payload) -> tuple[str, str]:
39+
if isinstance(payload, InTotoV1Payload):
40+
if predicate_type == "https://slsa.dev/provenance/v1":
41+
return _extract_from_slsa_v1(payload)
42+
elif isinstance(payload, InTotoV01Payload):
43+
if predicate_type == "https://slsa.dev/provenance/v0.2":
44+
return _extract_from_slsa_v02(payload)
45+
if predicate_type == "https://slsa.dev/provenance/v0.1":
46+
return _extract_from_slsa_v01(payload)
47+
if predicate_type == "https://witness.testifysec.com/attestation-collection/v0.1":
48+
return _extract_from_witness_provenance(payload)
49+
50+
msg = (
51+
f"Extraction from provenance not supported for versions: "
52+
f"predicate_type {predicate_type}, in-toto {str(type(payload))}."
53+
)
54+
logger.debug(msg)
55+
raise ProvenanceError(msg)
56+
57+
58+
def _extract_from_slsa_v01(payload: InTotoV01Payload) -> tuple[str | None, str | None]:
6959
"""Extract the repository and commit metadata from the slsa v01 provenance payload."""
7060
predicate: dict[str, JsonType] | None = payload.statement.get("predicate")
7161
if not predicate:
72-
raise ProvenanceError("No predicate in payload statement.")
62+
return None, None
7363

7464
# The repository URL and commit are stored inside an entry in the list of predicate -> materials.
7565
# In predicate -> recipe -> definedInMaterial we find the list index that points to the correct entry.
7666
list_index = json_extract(predicate, ["recipe", "definedInMaterial"], int)
67+
if not list_index:
68+
return None, None
69+
7770
material_list = json_extract(predicate, ["materials"], list)
71+
if not material_list:
72+
return None, None
73+
7874
if list_index >= len(material_list):
79-
raise ProvenanceError("Material list index outside of material list bounds.")
75+
logger.debug("Material list index outside of material list bounds.")
76+
return None, None
77+
8078
material = material_list[list_index]
8179
if not material or not isinstance(material, dict):
82-
raise ProvenanceError("Indexed material list entry is invalid.")
80+
logger.debug("Indexed material list entry is invalid.")
81+
return None, None
8382

83+
repo = None
8484
uri = json_extract(material, ["uri"], str)
85-
86-
repo = _clean_spdx(uri)
85+
if uri:
86+
repo = _clean_spdx(uri)
8787

8888
digest_set = json_extract(material, ["digest"], dict)
89+
if not digest_set:
90+
return repo, None
8991
commit = _extract_commit_from_digest_set(digest_set, SLSA_V01_DIGEST_SET_GIT_ALGORITHMS)
9092

91-
if not commit:
92-
raise ProvenanceError("Failed to extract commit hash from provenance.")
93-
94-
return repo, commit
93+
return repo, commit or None
9594

9695

97-
def _extract_from_slsa_v02(payload: InTotoV01Payload) -> tuple[str, str]:
96+
def _extract_from_slsa_v02(payload: InTotoV01Payload) -> tuple[str | None, str | None]:
9897
"""Extract the repository and commit metadata from the slsa v02 provenance payload."""
9998
predicate: dict[str, JsonType] | None = payload.statement.get("predicate")
10099
if not predicate:
101-
raise ProvenanceError("No predicate in payload statement.")
100+
logger.debug("No predicate in payload statement.")
101+
return None, None
102102

103103
# The repository URL and commit are stored within the predicate -> invocation -> configSource object.
104104
# See https://slsa.dev/spec/v0.2/provenance
105+
repo = None
105106
uri = json_extract(predicate, ["invocation", "configSource", "uri"], str)
106-
if not uri:
107-
raise ProvenanceError("Failed to extract repository URL from provenance.")
108-
repo = _clean_spdx(uri)
107+
if uri:
108+
repo = _clean_spdx(uri)
109109

110110
digest_set = json_extract(predicate, ["invocation", "configSource", "digest"], dict)
111+
if not digest_set:
112+
return repo, None
111113
commit = _extract_commit_from_digest_set(digest_set, SLSA_V02_DIGEST_SET_GIT_ALGORITHMS)
112114

113-
if not commit:
114-
raise ProvenanceError("Failed to extract commit hash from provenance.")
115-
116-
return repo, commit
115+
return repo, commit or None
117116

118117

119-
def _extract_from_slsa_v1(payload: InTotoV1Payload) -> tuple[str, str]:
118+
def _extract_from_slsa_v1(payload: InTotoV1Payload) -> tuple[str | None, str | None]:
120119
"""Extract the repository and commit metadata from the slsa v1 provenance payload."""
121120
predicate: dict[str, JsonType] | None = payload.statement.get("predicate")
122121
if not predicate:
123-
raise ProvenanceError("No predicate in payload statement.")
122+
logger.debug("No predicate in payload statement.")
123+
return None, None
124124

125125
build_def = json_extract(predicate, ["buildDefinition"], dict)
126+
if not build_def:
127+
return None, None
128+
126129
build_type = json_extract(build_def, ["buildType"], str)
130+
if not build_type:
131+
return None, None
127132

128133
# Extract the repository URL.
129-
repo = ""
134+
repo = None
130135
if build_type == "https://slsa-framework.github.io/gcb-buildtypes/triggered-build/v1":
131-
try:
132-
repo = json_extract(build_def, ["externalParameters", "sourceToBuild", "repository"], str)
133-
except JsonError:
136+
repo = json_extract(build_def, ["externalParameters", "sourceToBuild", "repository"], str)
137+
if not repo:
134138
repo = json_extract(build_def, ["externalParameters", "configSource", "repository"], str)
135139
if build_type == "https://slsa-framework.github.io/github-actions-buildtypes/workflow/v1":
136140
repo = json_extract(build_def, ["externalParameters", "workflow", "repository"], str)
137141

138142
if not repo:
139-
raise ProvenanceError("Failed to extract repository URL from provenance.")
143+
logger.debug("Repo required to extract commit from SLSA v1.")
144+
return None, None
140145

141146
# Extract the commit hash.
142-
commit = ""
147+
commit = None
143148
deps = json_extract(build_def, ["resolvedDependencies"], list)
149+
if not deps:
150+
return repo, None
144151
for dep in deps:
145152
if not isinstance(dep, dict):
146153
continue
147154
uri = json_extract(dep, ["uri"], str)
155+
if not uri:
156+
continue
148157
url = _clean_spdx(uri)
149158
if url != repo:
150159
continue
151160
digest_set = json_extract(dep, ["digest"], dict)
161+
if not digest_set:
162+
continue
152163
commit = _extract_commit_from_digest_set(digest_set, SLSA_V1_DIGEST_SET_GIT_ALGORITHMS)
153164

154-
if not commit:
155-
raise ProvenanceError("Failed to extract commit hash from provenance.")
165+
return repo, commit or None
156166

157-
return repo, commit
158167

159-
160-
def _extract_from_witness_provenance(payload: InTotoV01Payload) -> tuple[str, str]:
168+
def _extract_from_witness_provenance(payload: InTotoV01Payload) -> tuple[str | None, str | None]:
161169
"""Extract the repository and commit metadata from the witness provenance file found at the passed path.
162170
163171
To successfully return the commit and repository URL, the payload must respectively contain a Git attestation, and
@@ -175,11 +183,15 @@ def _extract_from_witness_provenance(payload: InTotoV01Payload) -> tuple[str, st
175183
"""
176184
predicate: dict[str, JsonType] | None = payload.statement.get("predicate")
177185
if not predicate:
178-
raise ProvenanceError("No predicate in payload statement.")
186+
logger.debug("No predicate in payload statement.")
187+
return None, None
179188

180189
attestations = json_extract(predicate, ["attestations"], list)
181-
commit = ""
182-
repo = ""
190+
if not attestations:
191+
return None, None
192+
193+
repo = None
194+
commit = None
183195
for entry in attestations:
184196
if not isinstance(entry, dict):
185197
continue
@@ -193,10 +205,7 @@ def _extract_from_witness_provenance(payload: InTotoV01Payload) -> tuple[str, st
193205
):
194206
repo = json_extract(entry, ["attestation", "projecturl"], str)
195207

196-
if not commit or not repo:
197-
raise ProvenanceError("Could not extract repo and commit from provenance.")
198-
199-
return repo, commit
208+
return repo or None, commit or None
200209

201210

202211
def _extract_commit_from_digest_set(digest_set: dict[str, JsonType], valid_algorithms: list[str]) -> str:
@@ -212,7 +221,8 @@ def _extract_commit_from_digest_set(digest_set: dict[str, JsonType], valid_algor
212221
value = digest_set.get(key)
213222
if isinstance(value, str):
214223
return value
215-
raise ProvenanceError(f"No valid digest in digest set: {digest_set.keys()} not in {valid_algorithms}")
224+
logger.debug("No valid digest in digest set: %s not in %s", digest_set.keys(), valid_algorithms)
225+
return ""
216226

217227

218228
def _clean_spdx(uri: str) -> str:

src/macaron/repo_finder/repo_finder_deps_dev.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99

1010
from packageurl import PackageURL
1111

12-
from macaron.errors import JsonError
1312
from macaron.repo_finder.provenance_extractor import json_extract
1413
from macaron.repo_finder.repo_finder_base import BaseRepoFinder
1514
from macaron.repo_finder.repo_validator import find_valid_repository_url
@@ -110,11 +109,11 @@ def _create_urls(self, purl: PackageURL) -> list[str]:
110109
return []
111110

112111
versions_keys = ["package", "versions"] if "package" in metadata else ["version"]
113-
try:
114-
versions = json_extract(metadata, versions_keys, list)
115-
latest_version = json_extract(versions[-1], ["versionKey", "version"], str)
116-
except JsonError as error:
117-
logger.debug("Could not extract 'version' from deps.dev response: %s", error)
112+
versions = json_extract(metadata, versions_keys, list)
113+
if not versions:
114+
return []
115+
latest_version = json_extract(versions[-1], ["versionKey", "version"], str)
116+
if not latest_version:
118117
return []
119118

120119
logger.debug("Found latest version: %s", latest_version)
@@ -161,11 +160,10 @@ def _read_json(self, json_data: str) -> list[str]:
161160
logger.debug("Failed to parse response from deps.dev: %s", error)
162161
return []
163162

164-
try:
165-
links_keys = ["version", "links"] if "version" in parsed else ["links"]
166-
links = json_extract(parsed, links_keys, list)
167-
except JsonError as error:
168-
logger.debug("Could not extract 'version' or 'links' from deps.dev response: %s", error)
163+
links_keys = ["version", "links"] if "version" in parsed else ["links"]
164+
links = json_extract(parsed, links_keys, list)
165+
if not links:
166+
logger.debug("Could not extract 'version' or 'links' from deps.dev response.")
169167
return []
170168

171169
result = []

0 commit comments

Comments
 (0)