Skip to content

Commit eb38298

Browse files
authored
Merge pull request #13745 from hugueskamba/hk_evaluate_code_fix
Scancode: Fix false positive reported by scancode output analyser script
2 parents 2d01a44 + 4ce6c8a commit eb38298

File tree

7 files changed

+1347
-86
lines changed

7 files changed

+1347
-86
lines changed

.travis.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,18 +70,18 @@ matrix:
7070
| ( grep -v '^tools/test/toolchains/api_test.py' || true ) \
7171
| while read file; do cp --parents "${file}" SCANCODE; done
7272
- scancode -l --json-pp scancode.json SCANCODE
73-
- python ./tools/test/travis-ci/scancode-evaluate.py -f scancode.json || true
73+
- python ./tools/test/travis-ci/scancode-evaluate.py scancode.json || true
7474
# run the same but for new files. All new files must have SPDX
7575
- >-
7676
git diff --name-only --diff-filter=A FETCH_HEAD..HEAD \
7777
| ( grep '.\(c\|cpp\|h\|hpp\|py\)$' || true ) \
7878
| ( grep -v '^tools/test/toolchains/api_test.py' || true ) \
7979
| while read file; do cp --parents "${file}" SCANCODE_NEW_FILES; done
8080
- scancode -l --json-pp scancode_new_files.json SCANCODE_NEW_FILES
81-
- python ./tools/test/travis-ci/scancode-evaluate.py -f scancode_new_files.json || true
81+
- python ./tools/test/travis-ci/scancode-evaluate.py scancode_new_files.json || true
8282
- cat scancode-evaluate.log
8383
- COUNT=$(cat scancode-evaluate.log | grep 'File:' | wc -l) || true
84-
- python ./tools/test/travis-ci/scancode-evaluate.py -f scancode_new_files.json
84+
- python ./tools/test/travis-ci/scancode-evaluate.py scancode_new_files.json
8585
- cat scancode-evaluate.log
8686
- COUNT_NEW_FILES=$(cat scancode-evaluate.log | grep 'File:' | wc -l) || true
8787
- |

tools/test/travis-ci/scancode-evaluate.py

Lines changed: 114 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -16,118 +16,149 @@
1616
limitations
1717
"""
1818

19-
# Asumptions for this script:
20-
# 1. directory_name is scanned directory.
21-
# Files are copied to this directory with full tree. As result, if we find
22-
# license offender, we can have full path (just scrape directory_name). We do this
23-
# magic because scancode allows to scan directories/one file.
24-
# 2. SPDX and license text is a must for all code files
25-
26-
import json
2719
import argparse
28-
import sys
29-
import os.path
20+
import json
3021
import logging
22+
import os.path
3123
import re
32-
33-
userlog = logging.getLogger("scancode-evaluate")
34-
userlog.setLevel(logging.INFO)
35-
logfile = os.path.join(os.getcwd(), 'scancode-evaluate.log')
36-
log_file_handler = logging.FileHandler(logfile, mode='w')
37-
userlog.addHandler(log_file_handler)
24+
import sys
25+
from enum import Enum
3826

3927
MISSING_LICENSE_TEXT = "Missing license header"
40-
MISSING_PERMISIVE_LICENSE_TEXT = "Non-permissive license"
28+
MISSING_PERMISSIVE_LICENSE_TEXT = "Non-permissive license"
4129
MISSING_SPDX_TEXT = "Missing SPDX license identifier"
4230

43-
def license_check(directory_name, file):
44-
""" Check licenses in the scancode json file for specified directory
31+
userlog = logging.getLogger("scancode-evaluate")
32+
33+
class ReturnCode(Enum):
34+
"""Return codes."""
35+
36+
SUCCESS = 0
37+
ERROR = -1
38+
39+
40+
def init_logger():
41+
"""Initialise the logger."""
42+
userlog.setLevel(logging.INFO)
43+
userlog.addHandler(
44+
logging.FileHandler(
45+
os.path.join(os.getcwd(), 'scancode-evaluate.log'), mode='w'
46+
)
47+
)
48+
49+
50+
def path_leaf(path):
51+
"""Return the leaf of a path."""
52+
head, tail = os.path.split(path)
53+
# Ensure the correct file name is returned if the file ends with a slash
54+
return tail or os.path.basename(head)
55+
56+
57+
def has_permissive_text_in_scancode_output(scancode_output_data_file_licenses):
58+
"""Returns true if at list one license in the scancode output is permissive."""
59+
return any(
60+
scancode_output_data_file_license['category'] == 'Permissive'
61+
for scancode_output_data_file_license in scancode_output_data_file_licenses
62+
)
63+
64+
65+
def has_spdx_text_in_scancode_output(scancode_output_data_file_licenses):
66+
"""Returns true if at least one license in the scancode output has the spdx identifier."""
67+
return any(
68+
'spdx' in scancode_output_data_file_license['matched_rule']['identifier']
69+
for scancode_output_data_file_license in scancode_output_data_file_licenses
70+
)
71+
72+
73+
def has_spdx_text_in_analysed_file(scanned_file_content):
74+
"""Returns true if the file analysed by ScanCode contains SPDX identifier."""
75+
return bool(re.findall("SPDX-License-Identifier:?", scanned_file_content))
76+
77+
78+
def license_check(scancode_output_path):
79+
"""Check licenses in the scancode json file for specified directory.
4580
4681
This function does not verify if file exists, should be done prior the call.
4782
48-
Args:
49-
directory_name - where scancode was run, used to scrape this from paths
50-
file - scancode json output file (output from scancode --license --json-pp)
83+
Args:
84+
scancode_output_path: path to the scancode json output file (output from scancode --license --json-pp)
5185
52-
Returns:
86+
Returns:
5387
0 if nothing found
5488
>0 - count how many license isses found
55-
-1 if any error in file licenses found
89+
ReturnCode.ERROR.value if any error in file licenses found
5690
"""
5791

5892
offenders = []
5993
try:
60-
# find all licenses in the files, must be licensed and permissive
61-
with open(file, 'r') as scancode_output:
62-
results = json.load(scancode_output)
63-
except ValueError:
64-
userlog.warning("JSON could not be decoded")
65-
return -1
66-
67-
try:
68-
for file in results['files']:
69-
license_offender = {}
70-
license_offender['file'] = file
71-
# ignore directory, not relevant here
72-
if license_offender['file']['type'] == 'directory':
73-
continue
74-
if not license_offender['file']['licenses']:
75-
license_offender['reason'] = MISSING_LICENSE_TEXT
76-
offenders.append(license_offender)
94+
with open(scancode_output_path, 'r') as read_file:
95+
scancode_output_data = json.load(read_file)
96+
except json.JSONDecodeError as jex:
97+
userlog.warning("JSON could not be decoded, Invalid JSON in body: %s", jex)
98+
return ReturnCode.ERROR.value
99+
100+
if 'files' not in scancode_output_data:
101+
userlog.warning("Missing `files` attribute in %s" % (scancode_output_path))
102+
return ReturnCode.ERROR.value
103+
104+
for scancode_output_data_file in scancode_output_data['files']:
105+
if scancode_output_data_file['type'] != 'file':
106+
continue
107+
108+
if not scancode_output_data_file['licenses']:
109+
scancode_output_data_file['fail_reason'] = MISSING_LICENSE_TEXT
110+
offenders.append(scancode_output_data_file)
111+
# check the next file in the scancode output
112+
continue
113+
114+
if not has_permissive_text_in_scancode_output(scancode_output_data_file['licenses']):
115+
scancode_output_data_file['fail_reason'] = MISSING_PERMISSIVE_LICENSE_TEXT
116+
offenders.append(scancode_output_data_file)
117+
118+
if not has_spdx_text_in_scancode_output(scancode_output_data_file['licenses']):
119+
# Scancode does not recognize license notice in Python file headers.
120+
# Issue: https://github.com/nexB/scancode-toolkit/issues/1913
121+
# Therefore check if the file tested by ScanCode actually has a licence notice.
122+
file_path = os.path.abspath(scancode_output_data_file['path'])
123+
try:
124+
with open(file_path, 'r') as read_file:
125+
scanned_file_content = read_file.read()
126+
except UnicodeDecodeError:
127+
userlog.warning("Unable to look for SPDX text in `{}`:".format(file_path))
128+
# Ignore files that cannot be decoded
129+
# check the next file in the scancode output
77130
continue
78131

79-
found_spdx = False
80-
for i in range(len(license_offender['file']['licenses'])):
81-
if license_offender['file']['licenses'][i]['category'] != 'Permissive':
82-
license_offender['reason'] = MISSING_PERMISIVE_LICENSE_TEXT
83-
offenders.append(license_offender)
84-
# find SPDX, it shall be one of licenses found
85-
if license_offender['file']['licenses'][i]['matched_rule']['identifier'].find("spdx") != -1:
86-
found_spdx = True
87-
88-
if not found_spdx:
89-
try:
90-
# Issue reported here https://github.com/nexB/scancode-toolkit/issues/1913
91-
# We verify here if SPDX is not really there as SDPX is part of the license text
92-
# scancode has some problems detecting it properly
93-
with open(os.path.join(os.path.abspath(license_offender['file']['path'])), 'r') as spdx_file_check:
94-
filetext = spdx_file_check.read()
95-
matches = re.findall("SPDX-License-Identifier:?", filetext)
96-
if matches:
97-
continue
98-
license_offender['reason'] = MISSING_SPDX_TEXT
99-
offenders.append(license_offender)
100-
except UnicodeDecodeError:
101-
# not valid file for license check
102-
continue
103-
except KeyError:
104-
userlog.warning("Invalid scancode json file")
105-
return -1
132+
if not has_spdx_text_in_analysed_file(scanned_file_content):
133+
scancode_output_data_file['fail_reason'] = MISSING_SPDX_TEXT
134+
offenders.append(scancode_output_data_file)
106135

107136
if offenders:
108137
userlog.warning("Found files with missing license details, please review and fix")
109138
for offender in offenders:
110-
userlog.warning("File: " + offender['file']['path'][len(directory_name):] + " " + "reason: " + offender['reason'])
139+
userlog.warning("File: %s reason: %s" % (path_leaf(offender['path']), offender['fail_reason']))
111140
return len(offenders)
112141

142+
113143
def parse_args():
114-
parser = argparse.ArgumentParser(
115-
description="License check.")
116-
parser.add_argument('-f', '--file',
117-
help="scancode-toolkit output json file")
118-
parser.add_argument('-d', '--directory_name', default="SCANCODE",
119-
help='Directory name where are files being checked')
144+
"""Parse command line arguments."""
145+
parser = argparse.ArgumentParser(description="License check.")
146+
parser.add_argument(
147+
'scancode_output_path',
148+
help="scancode-toolkit output json file"
149+
)
120150
return parser.parse_args()
121151

122-
if __name__ == "__main__":
123152

153+
if __name__ == "__main__":
154+
init_logger()
124155
args = parse_args()
125-
if args.file and os.path.isfile(args.file):
126-
count = license_check(args.directory_name, args.file)
127-
if count == 0:
128-
sys.exit(0)
129-
else:
130-
sys.exit(-1)
156+
if os.path.isfile(args.scancode_output_path):
157+
sys.exit(
158+
ReturnCode.SUCCESS.value
159+
if license_check(args.scancode_output_path) == 0
160+
else ReturnCode.ERROR.value
161+
)
131162
else:
132163
userlog.warning("Could not find the scancode json file")
133-
sys.exit(-1)
164+
sys.exit(ReturnCode.ERROR.value)
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
#!/usr/bin/env python
2+
# Copyright (c) 2020 Arm Limited and Contributors. All rights reserved.
3+
#
4+
# SPDX-License-Identifier: Apache-2.0
5+
import importlib
6+
import os
7+
import pytest
8+
9+
license_check = importlib.import_module("scancode-evaluate").license_check
10+
11+
STUBS_PATH = os.path.join(
12+
os.path.abspath(os.path.join(os.path.dirname(__file__))), "scancode_test"
13+
)
14+
15+
HEADER_WITHOUT_SPDX = "/* Copyright (C) Arm Limited, Inc - All Rights Reserved\
16+
* Unauthorized copying of this. file, via any medium is strictly prohibited\
17+
* Proprietary and confidential\
18+
*/"
19+
20+
HEADER_WITH_SPDX = "/* mbed Microcontroller Library\
21+
* Copyright (c) 2006-2013 ARM Limited\
22+
*\
23+
* SPDX-License-Identifier: Apache-2.0\
24+
* Licensed under the Apache License, Version 2.0 (the \"License\");\
25+
* you may not use this file except in compliance with the License.\
26+
* You may obtain a copy of the License at\
27+
*\
28+
* http://www.apache.org/licenses/LICENSE-2.0\
29+
*\
30+
* Unless required by applicable law or agreed to in writing, software\
31+
* distributed under the License is distributed on an \"AS IS\" BASIS,\
32+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\
33+
* See the License for the specific language governing permissions and\
34+
* limitations under the License.\
35+
*/"
36+
37+
@pytest.fixture()
38+
def create_scanned_files():
39+
"""Create stub files.
40+
test3.h missing license notice
41+
test4.h with license notice
42+
test5.h with license notice
43+
"""
44+
file_paths = [
45+
os.path.join(STUBS_PATH, "test3.h"),
46+
os.path.join(STUBS_PATH, "test4.h"),
47+
os.path.join(STUBS_PATH, "test5.h")
48+
]
49+
for file_path in file_paths:
50+
with open(file_path, "w") as new_file:
51+
if file_path in [os.path.join(STUBS_PATH, "test3.h")]:
52+
new_file.write(HEADER_WITHOUT_SPDX)
53+
else:
54+
new_file.write(HEADER_WITH_SPDX)
55+
yield
56+
for file_path in file_paths:
57+
os.remove(file_path)
58+
59+
60+
class TestScancodeEvaluate:
61+
62+
def test_missing_files_attribute(self):
63+
""" Missing `files` attribute in JSON.
64+
@inputs scancode_test/scancode_test_1.json
65+
@outputs -1
66+
"""
67+
assert license_check(os.path.join(STUBS_PATH, "scancode_test_1.json")) == -1
68+
69+
def test_various_combinations_permissive_license_with_spdx(self):
70+
""" Various combinations where at least one license in
71+
a file is permissive and has spdx in the match.identifier
72+
attribute.
73+
@inputs scancode_test/scancode_test_2.json
74+
@outputs 0
75+
"""
76+
assert license_check(os.path.join(STUBS_PATH, "scancode_test_2.json")) == 0
77+
78+
def test_missing_license_permissive_license_and_spdx(self, create_scanned_files):
79+
""" Test four files scanned with various issues.
80+
test.h: Missing license text (error count += 1)
81+
test3.h: Missing `Permissive` license text and `spdx` in match.identifier and not in file tested by ScanCode (error count += 2)
82+
test4.h: Missing `Permissive` license text and `spdx` in match.identifier but found in file tested by ScanCode (error count += 1)
83+
test5.h: Missing `spdx` in match.identifier but found in file tested by ScanCode. (error count += 0)
84+
@inputs scancode_test/scancode_test_2.json
85+
@output 4
86+
"""
87+
assert license_check(os.path.join(STUBS_PATH, "scancode_test_3.json")) == 4
88+
89+
def test_permissive_license_no_spdx(self, create_scanned_files):
90+
""" Multiple `Permissive` licenses in one file but none with `spdx` in
91+
match.identifier and not in file tested by ScanCode (error count += 1)
92+
@inputs scancode_test/scancode_test_2.json
93+
@outputs 1
94+
"""
95+
assert license_check(os.path.join(STUBS_PATH, "scancode_test_4.json")) == 1
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
"headers": [
3+
{
4+
"tool_name": "scancode test fail"
5+
}
6+
]
7+
}

0 commit comments

Comments
 (0)