You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
190 lines
8.1 KiB
Python
190 lines
8.1 KiB
Python
#
|
|
# Copyright (c) Contributors to the Open 3D Engine Project. For complete copyright and license terms please see the LICENSE at the root of this distribution.
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0 OR MIT
|
|
#
|
|
#
|
|
|
|
import argparse
|
|
from collections import OrderedDict
|
|
import fnmatch
|
|
import json
|
|
import os
|
|
import pathlib
|
|
import re
|
|
import sys
|
|
|
|
|
|
class LicenseScanner:
|
|
"""Class to contain license scanner.
|
|
|
|
Scans source tree for license files using provided filename patterns and generates a file
|
|
with the contents of all the licenses.
|
|
|
|
:param config_file: Config file with license patterns and scanner settings
|
|
"""
|
|
|
|
DEFAULT_CONFIG_FILE = 'scanner_config.json'
|
|
DEFAULT_EXCLUDE_FILE = '.gitignore'
|
|
DEFAULT_PACKAGE_INFO_FILE = 'PackageInfo.json'
|
|
|
|
def __init__(self, config_file=None):
|
|
self.config_file = config_file
|
|
self.config_data = self._load_config()
|
|
self.file_regex = self._load_file_regex(self.config_data['license_patterns'])
|
|
self.package_info = self._load_file_regex(self.config_data['package_patterns'])
|
|
self.excluded_directories = self._load_file_regex(self.config_data['excluded_directories'])
|
|
|
|
def _load_config(self):
|
|
"""Load config from the provided file. Sets default file if one is not provided."""
|
|
if not self.config_file:
|
|
script_directory = os.path.dirname(os.path.abspath(__file__)) # Default file expected in same dir as script
|
|
self.config_file = os.path.join(script_directory, self.DEFAULT_CONFIG_FILE)
|
|
|
|
try:
|
|
with open(self.config_file) as f:
|
|
return json.load(f)
|
|
except FileNotFoundError:
|
|
print('Config file cannot be found')
|
|
raise
|
|
|
|
def _load_file_regex(self, patterns):
|
|
"""Returns regex object with case-insensitive matching from the list of filename patterns."""
|
|
regex_patterns = []
|
|
for pattern in patterns:
|
|
regex_patterns.append(fnmatch.translate(pattern))
|
|
|
|
if not regex_patterns:
|
|
print(f'Warning: No patterns from {patterns} found')
|
|
return None
|
|
|
|
return re.compile('|'.join(regex_patterns), re.IGNORECASE)
|
|
|
|
def scan(self, paths=os.curdir):
|
|
"""Scan directory tree for filenames matching file_regex, package info, and exclusion files.
|
|
|
|
:param paths: Paths of the directory to run scanner
|
|
:return: Package paths and their corresponding file contents
|
|
:rtype: Ordered dict
|
|
"""
|
|
files = 0
|
|
matching_files = OrderedDict()
|
|
excluded_directories = None
|
|
|
|
if not self.package_info:
|
|
self.package_info = self.DEFAULT_PACKAGE_INFO_FILE
|
|
|
|
if not self.excluded_directories:
|
|
print(f'No excluded directory in config, looking for {self.DEFAULT_EXCLUDE_FILE} instead')
|
|
|
|
for path in paths:
|
|
for dirpath, dirnames, filenames in os.walk(path, topdown=True):
|
|
dirnames.sort(key=str.casefold) # Ensure that results are sorted
|
|
for file in filenames:
|
|
if self.file_regex.match(file) or self.package_info.match(file):
|
|
file_path = os.path.join(dirpath, file)
|
|
matching_file_content = self._get_file_contents(file_path)
|
|
matching_files[file_path] = matching_file_content
|
|
files += 1
|
|
print(f'Matching file: {file_path}')
|
|
if self.package_info.match(file):
|
|
dirnames[:] = [] # Stop scanning subdirectories if package info file found
|
|
if self.DEFAULT_EXCLUDE_FILE in file and not self.excluded_directories:
|
|
ignore_list = self._get_file_contents(os.path.join(dirpath, file)).splitlines()
|
|
ignore_list.append('.git') # .gitignore doesn't usually have .git in its exclusions
|
|
excluded_directories = self._load_file_regex(ignore_list)
|
|
|
|
# Remove directories that should not be scanned
|
|
if self.excluded_directories:
|
|
excluded_directories = self.excluded_directories
|
|
for dir in dirnames:
|
|
if excluded_directories.match(dir):
|
|
dirnames.remove(dir)
|
|
|
|
print(f'{files} files found.')
|
|
return matching_files
|
|
|
|
def _get_file_contents(self, filepath):
|
|
try:
|
|
with open(filepath, encoding='utf8') as f:
|
|
return f.read()
|
|
except UnicodeDecodeError:
|
|
print(f'Unable to read file: {filepath}')
|
|
pass
|
|
|
|
def create_license_file(self, licenses, filepath='NOTICES.txt'):
|
|
"""Creates file with all the provided license file contents.
|
|
|
|
:param licenses: Dict with package paths and their corresponding license file contents
|
|
:param filepath: Path to write the file
|
|
"""
|
|
license_separator = '------------------------------------'
|
|
with open(filepath, 'w', encoding='utf8') as lf:
|
|
for directory, license in licenses.items():
|
|
if not self.package_info.match(os.path.basename(directory)):
|
|
license_output = '\n\n'.join([
|
|
f'{license_separator}',
|
|
f'Package path: {os.path.relpath(directory)}',
|
|
'License:',
|
|
f'{license}\n'
|
|
])
|
|
lf.write(license_output)
|
|
return None
|
|
|
|
def create_package_file(self, packages, filepath='SPDX-Licenses.json', get_contents=False):
|
|
"""Creates file with all the provided SPDX package info summaries in json.
|
|
Optional dirpath parameter will follow the license file path in the package info and return its contents in a dictionary
|
|
|
|
:param licenses: Dict with package info paths and their corresponding file contents
|
|
:param filepath: Path to write the file
|
|
:param dirpath: Root path for packages
|
|
:rtype: Ordered dict
|
|
"""
|
|
licenses = OrderedDict()
|
|
package_json = []
|
|
|
|
with open(filepath, 'w', encoding='utf8') as pf:
|
|
for directory, package in packages.items():
|
|
if self.package_info.match(os.path.basename(directory)):
|
|
package_obj = json.loads(package)
|
|
package_json.append(package_obj)
|
|
if get_contents:
|
|
license_path = os.path.join(os.path.dirname(directory), pathlib.Path(package_obj['LicenseFile']))
|
|
licenses[license_path] = self._get_file_contents(license_path)
|
|
else:
|
|
licenses[directory] = package
|
|
pf.write(json.dumps(package_json, indent=4))
|
|
return licenses
|
|
|
|
|
|
def parse_args():
|
|
parser = argparse.ArgumentParser(
|
|
description='Script to run LicenseScanner and generate license file')
|
|
parser.add_argument('--config-file', '-c', type=pathlib.Path, help='Config file for LicenseScanner')
|
|
parser.add_argument('--license-file-path', '-l', type=pathlib.Path, help='Create license file in the provided path')
|
|
parser.add_argument('--package-file-path', '-p', type=pathlib.Path, help='Create package summary file in the provided path')
|
|
parser.add_argument('--scan-path', '-s', default=os.curdir, type=pathlib.Path, nargs='+', help='Path to scan, multiple space separated paths can be used')
|
|
return parser.parse_args()
|
|
|
|
|
|
def main():
|
|
try:
|
|
args = parse_args()
|
|
ls = LicenseScanner(args.config_file)
|
|
scanned_path_data = ls.scan(args.scan_path)
|
|
|
|
if args.license_file_path:
|
|
ls.create_license_file(scanned_path_data, args.license_file_path)
|
|
if args.package_file_path:
|
|
ls.create_package_file(scanned_path_data, args.package_file_path)
|
|
if args.license_file_path and args.package_file_path:
|
|
license_files = ls.create_package_file(scanned_path_data, args.package_file_path, True)
|
|
ls.create_license_file(license_files, args.license_file_path)
|
|
except FileNotFoundError as e:
|
|
print(f'Type: {type(e).__name__}, Error: {e}')
|
|
return 1
|
|
|
|
|
|
if __name__ == '__main__':
|
|
sys.exit(main())
|