4
4
"""This module contains methods for extracting repository and commit metadata from provenance files."""
5
5
import logging
6
6
7
- from macaron .errors import JsonError , ProvenanceError
7
+ from macaron .errors import ProvenanceError
8
8
from macaron .json_tools import json_extract
9
9
from macaron .slsa_analyzer .provenance .intoto import InTotoPayload , InTotoV1Payload , InTotoV01Payload
10
10
from macaron .util import JsonType
17
17
SLSA_V1_DIGEST_SET_GIT_ALGORITHMS = ["sha1" , "gitCommit" ]
18
18
19
19
20
- def extract_repo_and_commit_from_provenance (payload : InTotoPayload ) -> tuple [str , str ]:
20
+ def extract_repo_and_commit_from_provenance (payload : InTotoPayload ) -> tuple [str | None , str | None ]:
21
21
"""Extract the repository and commit metadata from the passed provenance payload.
22
22
23
23
Parameters
@@ -35,129 +35,137 @@ def extract_repo_and_commit_from_provenance(payload: InTotoPayload) -> tuple[str
35
35
ProvenanceError
36
36
If the extraction process fails for any reason.
37
37
"""
38
- repo = ""
39
- commit = ""
40
38
predicate_type = payload .statement .get ("predicateType" )
41
- try :
42
- if isinstance (payload , InTotoV1Payload ):
43
- if predicate_type == "https://slsa.dev/provenance/v1" :
44
- repo , commit = _extract_from_slsa_v1 (payload )
45
- elif isinstance (payload , InTotoV01Payload ):
46
- if predicate_type == "https://slsa.dev/provenance/v0.2" :
47
- repo , commit = _extract_from_slsa_v02 (payload )
48
- if predicate_type == "https://slsa.dev/provenance/v0.1" :
49
- repo , commit = _extract_from_slsa_v01 (payload )
50
- if predicate_type == "https://witness.testifysec.com/attestation-collection/v0.1" :
51
- repo , commit = _extract_from_witness_provenance (payload )
52
- except JsonError as error :
53
- logger .debug (error )
54
- raise ProvenanceError ("JSON exception while extracting from provenance." ) from error
55
-
56
- if not repo or not commit :
57
- msg = (
58
- f"Extraction from provenance not supported for versions: "
59
- f"predicate_type { predicate_type } , in-toto { str (type (payload ))} ."
60
- )
61
- logger .debug (msg )
62
- raise ProvenanceError (msg )
63
-
64
- logger .debug ("Extracted repo and commit from provenance: %s, %s" , repo , commit )
65
- return repo , commit
66
-
67
-
68
- def _extract_from_slsa_v01 (payload : InTotoV01Payload ) -> tuple [str , str ]:
39
+ if isinstance (payload , InTotoV1Payload ):
40
+ if predicate_type == "https://slsa.dev/provenance/v1" :
41
+ return _extract_from_slsa_v1 (payload )
42
+ elif isinstance (payload , InTotoV01Payload ):
43
+ if predicate_type == "https://slsa.dev/provenance/v0.2" :
44
+ return _extract_from_slsa_v02 (payload )
45
+ if predicate_type == "https://slsa.dev/provenance/v0.1" :
46
+ return _extract_from_slsa_v01 (payload )
47
+ if predicate_type == "https://witness.testifysec.com/attestation-collection/v0.1" :
48
+ return _extract_from_witness_provenance (payload )
49
+
50
+ msg = (
51
+ f"Extraction from provenance not supported for versions: "
52
+ f"predicate_type { predicate_type } , in-toto { str (type (payload ))} ."
53
+ )
54
+ logger .debug (msg )
55
+ raise ProvenanceError (msg )
56
+
57
+
58
+ def _extract_from_slsa_v01 (payload : InTotoV01Payload ) -> tuple [str | None , str | None ]:
69
59
"""Extract the repository and commit metadata from the slsa v01 provenance payload."""
70
60
predicate : dict [str , JsonType ] | None = payload .statement .get ("predicate" )
71
61
if not predicate :
72
- raise ProvenanceError ( "No predicate in payload statement." )
62
+ return None , None
73
63
74
64
# The repository URL and commit are stored inside an entry in the list of predicate -> materials.
75
65
# In predicate -> recipe -> definedInMaterial we find the list index that points to the correct entry.
76
66
list_index = json_extract (predicate , ["recipe" , "definedInMaterial" ], int )
67
+ if not list_index :
68
+ return None , None
69
+
77
70
material_list = json_extract (predicate , ["materials" ], list )
71
+ if not material_list :
72
+ return None , None
73
+
78
74
if list_index >= len (material_list ):
79
- raise ProvenanceError ("Material list index outside of material list bounds." )
75
+ logger .debug ("Material list index outside of material list bounds." )
76
+ return None , None
77
+
80
78
material = material_list [list_index ]
81
79
if not material or not isinstance (material , dict ):
82
- raise ProvenanceError ("Indexed material list entry is invalid." )
80
+ logger .debug ("Indexed material list entry is invalid." )
81
+ return None , None
83
82
83
+ repo = None
84
84
uri = json_extract (material , ["uri" ], str )
85
-
86
- repo = _clean_spdx (uri )
85
+ if uri :
86
+ repo = _clean_spdx (uri )
87
87
88
88
digest_set = json_extract (material , ["digest" ], dict )
89
+ if not digest_set :
90
+ return repo , None
89
91
commit = _extract_commit_from_digest_set (digest_set , SLSA_V01_DIGEST_SET_GIT_ALGORITHMS )
90
92
91
- if not commit :
92
- raise ProvenanceError ("Failed to extract commit hash from provenance." )
93
-
94
- return repo , commit
93
+ return repo , commit or None
95
94
96
95
97
- def _extract_from_slsa_v02 (payload : InTotoV01Payload ) -> tuple [str , str ]:
96
+ def _extract_from_slsa_v02 (payload : InTotoV01Payload ) -> tuple [str | None , str | None ]:
98
97
"""Extract the repository and commit metadata from the slsa v02 provenance payload."""
99
98
predicate : dict [str , JsonType ] | None = payload .statement .get ("predicate" )
100
99
if not predicate :
101
- raise ProvenanceError ("No predicate in payload statement." )
100
+ logger .debug ("No predicate in payload statement." )
101
+ return None , None
102
102
103
103
# The repository URL and commit are stored within the predicate -> invocation -> configSource object.
104
104
# See https://slsa.dev/spec/v0.2/provenance
105
+ repo = None
105
106
uri = json_extract (predicate , ["invocation" , "configSource" , "uri" ], str )
106
- if not uri :
107
- raise ProvenanceError ("Failed to extract repository URL from provenance." )
108
- repo = _clean_spdx (uri )
107
+ if uri :
108
+ repo = _clean_spdx (uri )
109
109
110
110
digest_set = json_extract (predicate , ["invocation" , "configSource" , "digest" ], dict )
111
+ if not digest_set :
112
+ return repo , None
111
113
commit = _extract_commit_from_digest_set (digest_set , SLSA_V02_DIGEST_SET_GIT_ALGORITHMS )
112
114
113
- if not commit :
114
- raise ProvenanceError ("Failed to extract commit hash from provenance." )
115
-
116
- return repo , commit
115
+ return repo , commit or None
117
116
118
117
119
- def _extract_from_slsa_v1 (payload : InTotoV1Payload ) -> tuple [str , str ]:
118
+ def _extract_from_slsa_v1 (payload : InTotoV1Payload ) -> tuple [str | None , str | None ]:
120
119
"""Extract the repository and commit metadata from the slsa v1 provenance payload."""
121
120
predicate : dict [str , JsonType ] | None = payload .statement .get ("predicate" )
122
121
if not predicate :
123
- raise ProvenanceError ("No predicate in payload statement." )
122
+ logger .debug ("No predicate in payload statement." )
123
+ return None , None
124
124
125
125
build_def = json_extract (predicate , ["buildDefinition" ], dict )
126
+ if not build_def :
127
+ return None , None
128
+
126
129
build_type = json_extract (build_def , ["buildType" ], str )
130
+ if not build_type :
131
+ return None , None
127
132
128
133
# Extract the repository URL.
129
- repo = ""
134
+ repo = None
130
135
if build_type == "https://slsa-framework.github.io/gcb-buildtypes/triggered-build/v1" :
131
- try :
132
- repo = json_extract (build_def , ["externalParameters" , "sourceToBuild" , "repository" ], str )
133
- except JsonError :
136
+ repo = json_extract (build_def , ["externalParameters" , "sourceToBuild" , "repository" ], str )
137
+ if not repo :
134
138
repo = json_extract (build_def , ["externalParameters" , "configSource" , "repository" ], str )
135
139
if build_type == "https://slsa-framework.github.io/github-actions-buildtypes/workflow/v1" :
136
140
repo = json_extract (build_def , ["externalParameters" , "workflow" , "repository" ], str )
137
141
138
142
if not repo :
139
- raise ProvenanceError ("Failed to extract repository URL from provenance." )
143
+ logger .debug ("Repo required to extract commit from SLSA v1." )
144
+ return None , None
140
145
141
146
# Extract the commit hash.
142
- commit = ""
147
+ commit = None
143
148
deps = json_extract (build_def , ["resolvedDependencies" ], list )
149
+ if not deps :
150
+ return repo , None
144
151
for dep in deps :
145
152
if not isinstance (dep , dict ):
146
153
continue
147
154
uri = json_extract (dep , ["uri" ], str )
155
+ if not uri :
156
+ continue
148
157
url = _clean_spdx (uri )
149
158
if url != repo :
150
159
continue
151
160
digest_set = json_extract (dep , ["digest" ], dict )
161
+ if not digest_set :
162
+ continue
152
163
commit = _extract_commit_from_digest_set (digest_set , SLSA_V1_DIGEST_SET_GIT_ALGORITHMS )
153
164
154
- if not commit :
155
- raise ProvenanceError ("Failed to extract commit hash from provenance." )
165
+ return repo , commit or None
156
166
157
- return repo , commit
158
167
159
-
160
- def _extract_from_witness_provenance (payload : InTotoV01Payload ) -> tuple [str , str ]:
168
+ def _extract_from_witness_provenance (payload : InTotoV01Payload ) -> tuple [str | None , str | None ]:
161
169
"""Extract the repository and commit metadata from the witness provenance file found at the passed path.
162
170
163
171
To successfully return the commit and repository URL, the payload must respectively contain a Git attestation, and
@@ -175,11 +183,15 @@ def _extract_from_witness_provenance(payload: InTotoV01Payload) -> tuple[str, st
175
183
"""
176
184
predicate : dict [str , JsonType ] | None = payload .statement .get ("predicate" )
177
185
if not predicate :
178
- raise ProvenanceError ("No predicate in payload statement." )
186
+ logger .debug ("No predicate in payload statement." )
187
+ return None , None
179
188
180
189
attestations = json_extract (predicate , ["attestations" ], list )
181
- commit = ""
182
- repo = ""
190
+ if not attestations :
191
+ return None , None
192
+
193
+ repo = None
194
+ commit = None
183
195
for entry in attestations :
184
196
if not isinstance (entry , dict ):
185
197
continue
@@ -193,10 +205,7 @@ def _extract_from_witness_provenance(payload: InTotoV01Payload) -> tuple[str, st
193
205
):
194
206
repo = json_extract (entry , ["attestation" , "projecturl" ], str )
195
207
196
- if not commit or not repo :
197
- raise ProvenanceError ("Could not extract repo and commit from provenance." )
198
-
199
- return repo , commit
208
+ return repo or None , commit or None
200
209
201
210
202
211
def _extract_commit_from_digest_set (digest_set : dict [str , JsonType ], valid_algorithms : list [str ]) -> str :
@@ -212,7 +221,8 @@ def _extract_commit_from_digest_set(digest_set: dict[str, JsonType], valid_algor
212
221
value = digest_set .get (key )
213
222
if isinstance (value , str ):
214
223
return value
215
- raise ProvenanceError (f"No valid digest in digest set: { digest_set .keys ()} not in { valid_algorithms } " )
224
+ logger .debug ("No valid digest in digest set: %s not in %s" , digest_set .keys (), valid_algorithms )
225
+ return ""
216
226
217
227
218
228
def _clean_spdx (uri : str ) -> str :
0 commit comments