You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
o3de/scripts/license_scanner/license_scanner.py

190 lines
8.1 KiB
Python

#
# Copyright (c) Contributors to the Open 3D Engine Project. For complete copyright and license terms please see the LICENSE at the root of this distribution.
#
# SPDX-License-Identifier: Apache-2.0 OR MIT
#
#
import argparse
from collections import OrderedDict
import fnmatch
import json
import os
import pathlib
import re
import sys
class LicenseScanner:
"""Class to contain license scanner.
Scans source tree for license files using provided filename patterns and generates a file
with the contents of all the licenses.
:param config_file: Config file with license patterns and scanner settings
"""
DEFAULT_CONFIG_FILE = 'scanner_config.json'
DEFAULT_EXCLUDE_FILE = '.gitignore'
DEFAULT_PACKAGE_INFO_FILE = 'PackageInfo.json'
def __init__(self, config_file=None):
self.config_file = config_file
self.config_data = self._load_config()
self.file_regex = self._load_file_regex(self.config_data['license_patterns'])
self.package_info = self._load_file_regex(self.config_data['package_patterns'])
self.excluded_directories = self._load_file_regex(self.config_data['excluded_directories'])
def _load_config(self):
"""Load config from the provided file. Sets default file if one is not provided."""
if not self.config_file:
script_directory = os.path.dirname(os.path.abspath(__file__)) # Default file expected in same dir as script
self.config_file = os.path.join(script_directory, self.DEFAULT_CONFIG_FILE)
try:
with open(self.config_file) as f:
return json.load(f)
except FileNotFoundError:
print('Config file cannot be found')
raise
def _load_file_regex(self, patterns):
"""Returns regex object with case-insensitive matching from the list of filename patterns."""
regex_patterns = []
for pattern in patterns:
regex_patterns.append(fnmatch.translate(pattern))
if not regex_patterns:
print(f'Warning: No patterns from {patterns} found')
return None
return re.compile('|'.join(regex_patterns), re.IGNORECASE)
def scan(self, paths=os.curdir):
"""Scan directory tree for filenames matching file_regex, package info, and exclusion files.
:param paths: Paths of the directory to run scanner
:return: Package paths and their corresponding file contents
:rtype: Ordered dict
"""
files = 0
matching_files = OrderedDict()
excluded_directories = None
if not self.package_info:
self.package_info = self.DEFAULT_PACKAGE_INFO_FILE
if not self.excluded_directories:
print(f'No excluded directory in config, looking for {self.DEFAULT_EXCLUDE_FILE} instead')
for path in paths:
for dirpath, dirnames, filenames in os.walk(path, topdown=True):
dirnames.sort(key=str.casefold) # Ensure that results are sorted
for file in filenames:
if self.file_regex.match(file) or self.package_info.match(file):
file_path = os.path.join(dirpath, file)
matching_file_content = self._get_file_contents(file_path)
matching_files[file_path] = matching_file_content
files += 1
print(f'Matching file: {file_path}')
if self.package_info.match(file):
dirnames[:] = [] # Stop scanning subdirectories if package info file found
if self.DEFAULT_EXCLUDE_FILE in file and not self.excluded_directories:
ignore_list = self._get_file_contents(os.path.join(dirpath, file)).splitlines()
ignore_list.append('.git') # .gitignore doesn't usually have .git in its exclusions
excluded_directories = self._load_file_regex(ignore_list)
# Remove directories that should not be scanned
if self.excluded_directories:
excluded_directories = self.excluded_directories
for dir in dirnames:
if excluded_directories.match(dir):
dirnames.remove(dir)
print(f'{files} files found.')
return matching_files
def _get_file_contents(self, filepath):
try:
with open(filepath, encoding='utf8') as f:
return f.read()
except UnicodeDecodeError:
print(f'Unable to read file: {filepath}')
pass
def create_license_file(self, licenses, filepath='NOTICES.txt'):
"""Creates file with all the provided license file contents.
:param licenses: Dict with package paths and their corresponding license file contents
:param filepath: Path to write the file
"""
license_separator = '------------------------------------'
with open(filepath, 'w', encoding='utf8') as lf:
for directory, license in licenses.items():
if not self.package_info.match(os.path.basename(directory)):
license_output = '\n\n'.join([
f'{license_separator}',
f'Package path: {os.path.relpath(directory)}',
'License:',
f'{license}\n'
])
lf.write(license_output)
return None
def create_package_file(self, packages, filepath='SPDX-Licenses.json', get_contents=False):
"""Creates file with all the provided SPDX package info summaries in json.
Optional dirpath parameter will follow the license file path in the package info and return its contents in a dictionary
:param licenses: Dict with package info paths and their corresponding file contents
:param filepath: Path to write the file
:param dirpath: Root path for packages
:rtype: Ordered dict
"""
licenses = OrderedDict()
package_json = []
with open(filepath, 'w', encoding='utf8') as pf:
for directory, package in packages.items():
if self.package_info.match(os.path.basename(directory)):
package_obj = json.loads(package)
package_json.append(package_obj)
if get_contents:
license_path = os.path.join(os.path.dirname(directory), pathlib.Path(package_obj['LicenseFile']))
licenses[license_path] = self._get_file_contents(license_path)
else:
licenses[directory] = package
pf.write(json.dumps(package_json, indent=4))
return licenses
def parse_args():
parser = argparse.ArgumentParser(
description='Script to run LicenseScanner and generate license file')
parser.add_argument('--config-file', '-c', type=pathlib.Path, help='Config file for LicenseScanner')
parser.add_argument('--license-file-path', '-l', type=pathlib.Path, help='Create license file in the provided path')
parser.add_argument('--package-file-path', '-p', type=pathlib.Path, help='Create package summary file in the provided path')
parser.add_argument('--scan-path', '-s', default=os.curdir, type=pathlib.Path, nargs='+', help='Path to scan, multiple space separated paths can be used')
return parser.parse_args()
def main():
try:
args = parse_args()
ls = LicenseScanner(args.config_file)
scanned_path_data = ls.scan(args.scan_path)
if args.license_file_path:
ls.create_license_file(scanned_path_data, args.license_file_path)
if args.package_file_path:
ls.create_package_file(scanned_path_data, args.package_file_path)
if args.license_file_path and args.package_file_path:
license_files = ls.create_package_file(scanned_path_data, args.package_file_path, True)
ls.create_license_file(license_files, args.license_file_path)
except FileNotFoundError as e:
print(f'Type: {type(e).__name__}, Error: {e}')
return 1
if __name__ == '__main__':
sys.exit(main())