You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
o3de/scripts/commit_validation/fix_unicode.py

76 lines
2.7 KiB
Python

#
# All or portions of this file Copyright (c) Amazon.com, Inc. or its affiliates or
# its licensors.
#
# For complete copyright and license terms please see the LICENSE at the root of this
# distribution (the "License"). All use of this software is governed by the License,
# or, if provided, by the license below or the license accompanying this file. Do not
# remove or modify any license notices. This file is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#
import argparse
import fnmatch
import os
handled_file_patterns = [
'*.c', '*.cc', '*.cpp', '*.cxx', '*.h', '*.hpp', '*.hxx', '*.inl', '*.m', '*.mm', '*.cs', '*.java',
'*.py', '*.lua', '*.bat', '*.cmd', '*.sh', '*.js',
'*.cmake', 'CMakeLists.txt'
]
replacement_map = {
0xA0: ' ',
0xA6: '|',
0x2019: '\'',
0x2014: '-',
0x2191: '^',
0x2212: '-',
0x2217: '*',
0x2248: 'is close to',
0xFEFF: '',
}
def fixUnicode(input_file):
try:
basename = os.path.basename(input_file)
for pattern in handled_file_patterns:
if fnmatch.fnmatch(basename, pattern):
with open(input_file, 'r', encoding='utf-8', errors='replace') as fh:
fileContents = fh.read()
modified = False
for uni, repl in replacement_map.items():
uni_str = chr(uni)
if uni_str in fileContents:
fileContents = fileContents.replace(uni_str, repl)
modified = True
if modified:
with open(input_file, 'w') as destination_file:
destination_file.writelines(fileContents)
print(f'[INFO] Patched {input_file}')
break
except (IOError, UnicodeDecodeError) as err:
print('[ERROR] reading {}: {}'.format(input_file, err))
return
def main():
"""script main function"""
parser = argparse.ArgumentParser(description='This script replaces unicode characters, some of them are replaced for spaces (e.g. xA0), others are replaced with the escape sequence',
formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument('file_or_dir', type=str, nargs='+',
help='list of files or directories to search within for files to fix up unicode characters')
args = parser.parse_args()
for input_file in args.file_or_dir:
if os.path.isdir(input_file):
for dp, dn, filenames in os.walk(input_file):
for f in filenames:
fixUnicode(os.path.join(dp, f))
else:
fixUnicode(input_file)
#entrypoint
if __name__ == '__main__':
main()