Address commit re-run corner case.

Signed-off-by: John <jonawals@amazon.com>
monroegm-disable-blank-issue-2
John 4 years ago
parent 4cf384c2c5
commit 4dd39c3929

@ -87,7 +87,7 @@ class TestImpact:
try: try:
# Attempt to generate a diff between the src and dst commits # Attempt to generate a diff between the src and dst commits
logger.error(f"Source '{self._src_commit}' and destination '{self._dst_commit}' will be diff'd.") logger.info(f"Source '{self._src_commit}' and destination '{self._dst_commit}' will be diff'd.")
diff_path = pathlib.Path(pathlib.PurePath(self._temp_workspace).joinpath(f"changelist.{self._instance_id}.diff")) diff_path = pathlib.Path(pathlib.PurePath(self._temp_workspace).joinpath(f"changelist.{self._instance_id}.diff"))
self._repo.create_diff_file(self._src_commit, self._dst_commit, diff_path, multi_branch) self._repo.create_diff_file(self._src_commit, self._dst_commit, diff_path, multi_branch)
except RuntimeError as e: except RuntimeError as e:
@ -219,28 +219,37 @@ class TestImpact:
try: try:
# Persistent storage location # Persistent storage location
if s3_bucket: if s3_bucket:
persistent_storage = PersistentStorageS3(self._config, suite, s3_bucket, s3_top_level_dir, self._source_of_truth_branch) persistent_storage = PersistentStorageS3(self._config, suite, self._dst_commit, s3_bucket, s3_top_level_dir, self._source_of_truth_branch)
else: else:
persistent_storage = PersistentStorageLocal(self._config, suite) persistent_storage = PersistentStorageLocal(self._config, suite, self._dst_commit)
except SystemError as e: except SystemError as e:
logger.warning(f"The persistent storage encountered an irrecoverable error, test impact analysis will be disabled: '{e}'") logger.warning(f"The persistent storage encountered an irrecoverable error, test impact analysis will be disabled: '{e}'")
persistent_storage = None persistent_storage = None
if persistent_storage: if persistent_storage:
# Flag to signify whether or not this is a re-run (multiple runs of the same commit)
# Right now, we don't fully support re-runs but in the future we will have an extra subfolder for each commit hash with the # Flag for corner case where:
# last run hash that was used for the first run for the commit so we can retreive the same reference point for building the # 1. TIAF was already run previously for this commit.
# change list to ensure each subsequent run is using the same data but for the time being, just perform a regular run # 2. There was no last commit hash when TIAF last ran on this commit (due to no coverage data existing get for this branch)
is_rerun = False # 3. TIAF has not been run on any other commits between the run for this commit and the last run for this commit.
# The above results in TIAF being stuck in a state of generating an empty change list (and thus doing no work until another
# commit comes in) which is problematic if the commit needs to be re-run for whatever reason so in these conditions we revert
# back to a regular test run until another commit comes in
cannot_rerun_with_instrumentation = False
if persistent_storage.has_historic_data: if persistent_storage.has_historic_data:
logger.info("Historic data found.") logger.info("Historic data found.")
self._src_commit = persistent_storage.last_commit_hash self._src_commit = persistent_storage.last_commit_hash
# Perform some basic sanity checks on the commit hashes to ensure confidence in the integrity of the environment # Check to see if this is a re-run for this commit before any other changes have come in
if self._src_commit == self._dst_commit: if persistent_storage.is_repeat_sequence:
logger.info(f"Source commit '{self._src_commit}' and destination commit '{self._dst_commit}', implying this is a re-run. A regular sequence will instead be performed.") if persistent_storage.can_rerun_sequence:
persistent_storage = None logger.info(f"This sequence is being re-run before any other changes have come in so the last commit '{persistent_storage.this_commit_last_commit_hash}' used for the previous sequence will be used instead.")
is_rerun = True self._src_commit = persistent_storage.this_commit_last_commit_hash
else:
logger.info(f"This sequence is being re-run before any other changes have come in but there is no useful historic data. A regular sequence will be performed instead.")
persistent_storage = None
cannot_rerun_with_instrumentation = True
else: else:
self._attempt_to_generate_change_list() self._attempt_to_generate_change_list()
else: else:
@ -268,7 +277,7 @@ class TestImpact:
args.append(f"--changelist={self._change_list_path}") args.append(f"--changelist={self._change_list_path}")
logger.info(f"Change list is set to '{self._change_list_path}'.") logger.info(f"Change list is set to '{self._change_list_path}'.")
else: else:
if self._is_source_of_truth_branch and not is_rerun: if self._is_source_of_truth_branch and not cannot_rerun_with_instrumentation:
# Use seed sequence (instrumented all tests) for coverage updating branches so we can generate the coverage bed for future sequences # Use seed sequence (instrumented all tests) for coverage updating branches so we can generate the coverage bed for future sequences
sequence_type = "seed" sequence_type = "seed"
# We always continue after test failures when seeding to ensure we capture the coverage for all test targets # We always continue after test failures when seeding to ensure we capture the coverage for all test targets
@ -314,14 +323,18 @@ class TestImpact:
logger.info(f"Args: {unpacked_args}") logger.info(f"Args: {unpacked_args}")
runtime_result = subprocess.run([str(self._tiaf_bin)] + args) runtime_result = subprocess.run([str(self._tiaf_bin)] + args)
report = None report = None
# If the sequence completed (with or without failures) we will update the historical meta-data # If the sequence completed (with or without failures) we will update the historical meta-data
if runtime_result.returncode == 0 or runtime_result.returncode == 7: if runtime_result.returncode == 0 or runtime_result.returncode == 7:
logger.info("Test impact analysis runtime returned successfully.") logger.info("Test impact analysis runtime returned successfully.")
if self._is_source_of_truth_branch and persistent_storage is not None:
persistent_storage.update_and_store_historic_data(self._dst_commit) # Get the sequence report the runtime generated
with open(report_file) as json_file: with open(report_file) as json_file:
report = json.load(json_file) report = json.load(json_file)
# Attempt to store the historic data for this branch and sequence
if self._is_source_of_truth_branch and persistent_storage is not None:
persistent_storage.update_and_store_historic_data()
else: else:
logger.error(f"The test impact analysis runtime returned with error: '{runtime_result.returncode}'.") logger.error(f"The test impact analysis runtime returned with error: '{runtime_result.returncode}'.")

@ -15,23 +15,39 @@ logger = get_logger(__file__)
# Abstraction for the persistent storage required by TIAF to store and retrieve the branch coverage data and other meta-data # Abstraction for the persistent storage required by TIAF to store and retrieve the branch coverage data and other meta-data
class PersistentStorage(ABC): class PersistentStorage(ABC):
def __init__(self, config: dict, suite: str):
WORKSPACE_KEY = "workspace"
LAST_RUNS_KEY = "last_runs"
ACTIVE_KEY = "active"
ROOT_KEY = "root"
RELATIVE_PATHS_KEY = "relative_paths"
TEST_IMPACT_DATA_FILES_KEY = "test_impact_data_files"
LAST_COMMIT_HASH_KEY = "last_commit_hash"
COVERAGE_DATA_KEY = "coverage_data"
def __init__(self, config: dict, suite: str, commit: str):
""" """
Initializes the persistent storage into a state for which there is no historic data available. Initializes the persistent storage into a state for which there is no historic data available.
@param config: The runtime configuration to obtain the data file paths from. @param config: The runtime configuration to obtain the data file paths from.
@param suite: The test suite for which the historic data will be obtained for. @param suite: The test suite for which the historic data will be obtained for.
@param commit: The commit hash for this build.
""" """
# Work on the assumption that there is no historic meta-data (a valid state to be in, should none exist) # Work on the assumption that there is no historic meta-data (a valid state to be in, should none exist)
self._last_commit_hash = None self._last_commit_hash = None
self._has_historic_data = False self._has_historic_data = False
self._has_previous_last_commit_hash = False
self._this_commit_hash = commit
self._this_commit_hash_last_commit_hash = None
self._historic_data = None
logger.info(f"Attempting to access persistent storage for the commit {self._this_commit_hash}")
try: try:
# The runtime expects the coverage data to be in the location specified in the config file (unless overridden with # The runtime expects the coverage data to be in the location specified in the config file (unless overridden with
# the --datafile command line argument, which the TIAF scripts do not do) # the --datafile command line argument, which the TIAF scripts do not do)
self._active_workspace = pathlib.Path(config["workspace"]["active"]["root"]) self._active_workspace = pathlib.Path(config[self.WORKSPACE_KEY][self.ACTIVE_KEY][self.ROOT_KEY])
unpacked_coverage_data_file = config["workspace"]["active"]["relative_paths"]["test_impact_data_files"][suite] unpacked_coverage_data_file = config[self.WORKSPACE_KEY][self.ACTIVE_KEY][self.RELATIVE_PATHS_KEY][self.TEST_IMPACT_DATA_FILES_KEY][suite]
except KeyError as e: except KeyError as e:
raise SystemError(f"The config does not contain the key {str(e)}.") raise SystemError(f"The config does not contain the key {str(e)}.")
@ -45,17 +61,36 @@ class PersistentStorage(ABC):
""" """
self._has_historic_data = False self._has_historic_data = False
self._has_previous_last_commit_hash = False
try: try:
historic_data = json.loads(historic_data_json) self._historic_data = json.loads(historic_data_json)
self._last_commit_hash = historic_data["last_commit_hash"]
# Last commit hash for this branch
self._last_commit_hash = self._historic_data[self.LAST_COMMIT_HASH_KEY]
logger.info(f"Last commit hash '{self._last_commit_hash}' found.") logger.info(f"Last commit hash '{self._last_commit_hash}' found.")
if self.LAST_RUNS_KEY in self._historic_data:
# Last commit hash for the sequence that was run for this commit previously (if any)
if self._this_commit_hash in self._historic_data[self.LAST_RUNS_KEY]:
# 'None' is a valid value for the previously used last commit hash if there was no coverage data at that time
self._this_commit_hash_last_commit_hash = self._historic_data[self.LAST_RUNS_KEY][self._this_commit_hash]
self._has_previous_last_commit_hash = self._this_commit_hash_last_commit_hash is not None
if self._has_previous_last_commit_hash:
logger.info(f"Last commit hash '{self._this_commit_hash_last_commit_hash}' was used previously for this commit.")
else:
logger.info(f"Prior sequence data found for this commit but it is empty (there was no coverage data vailable at that time).")
else:
logger.info(f"No prior sequence data found for commit '{self._this_commit_hash}', this is the first sequence for this commit.")
else:
logger.info(f"No prior sequence data found for any commits.")
# Create the active workspace directory where the coverage data file will be placed and unpack the coverage data so # Create the active workspace directory where the coverage data file will be placed and unpack the coverage data so
# it is accessible by the runtime # it is accessible by the runtime
self._active_workspace.mkdir(exist_ok=True) self._active_workspace.mkdir(exist_ok=True)
with open(self._unpacked_coverage_data_file, "w", newline='\n') as coverage_data: with open(self._unpacked_coverage_data_file, "w", newline='\n') as coverage_data:
coverage_data.write(historic_data["coverage_data"]) coverage_data.write(self._historic_data[self.COVERAGE_DATA_KEY])
self._has_historic_data = True self._has_historic_data = True
except json.JSONDecodeError: except json.JSONDecodeError:
@ -65,20 +100,31 @@ class PersistentStorage(ABC):
except EnvironmentError as e: except EnvironmentError as e:
logger.error(f"There was a problem the coverage data file '{self._unpacked_coverage_data_file}': '{e}'.") logger.error(f"There was a problem the coverage data file '{self._unpacked_coverage_data_file}': '{e}'.")
def _pack_historic_data(self, last_commit_hash: str): def _pack_historic_data(self):
""" """
Packs the current historic data into a JSON file for serializing. Packs the current historic data into a JSON file for serializing.
@param last_commit_hash: The commit hash to associate the coverage data (and any other meta data) with. @return: The packed historic data in JSON format.
@return: The packed historic data in JSON format.
""" """
try: try:
# Attempt to read the existing coverage data # Attempt to read the existing coverage data
if self._unpacked_coverage_data_file.is_file(): if self._unpacked_coverage_data_file.is_file():
if not self._historic_data:
self._historic_data = {}
# Last commit hash for this branch
self._historic_data[self.LAST_COMMIT_HASH_KEY] = self._this_commit_hash
# Last commit hash for this commit
if not self.LAST_RUNS_KEY in self._historic_data:
self._historic_data[self.LAST_RUNS_KEY] = {}
self._historic_data[self.LAST_RUNS_KEY][self._this_commit_hash] = self._last_commit_hash
# Coverage data for this branch
with open(self._unpacked_coverage_data_file, "r") as coverage_data: with open(self._unpacked_coverage_data_file, "r") as coverage_data:
historic_data = {"last_commit_hash": last_commit_hash, "coverage_data": coverage_data.read()} self._historic_data[self.COVERAGE_DATA_KEY] = coverage_data.read()
return json.dumps(historic_data) return json.dumps(self._historic_data)
else: else:
logger.info(f"No coverage data exists at location '{self._unpacked_coverage_data_file}'.") logger.info(f"No coverage data exists at location '{self._unpacked_coverage_data_file}'.")
except EnvironmentError as e: except EnvironmentError as e:
@ -97,16 +143,14 @@ class PersistentStorage(ABC):
""" """
pass pass
def update_and_store_historic_data(self, last_commit_hash: str): def update_and_store_historic_data(self):
""" """
Updates the historic data and stores it in the designated persistent storage location. Updates the historic data and stores it in the designated persistent storage location.
@param last_commit_hash: The commit hash to associate the coverage data (and any other meta data) with.
""" """
historic_data_json = self._pack_historic_data(last_commit_hash) historic_data_json = self._pack_historic_data()
if historic_data_json: if historic_data_json:
logger.info(f"Attempting to store historic data with new last commit hash '{last_commit_hash}'...") logger.info(f"Attempting to store historic data with new last commit hash '{self._this_commit_hash}'...")
self._store_historic_data(historic_data_json) self._store_historic_data(historic_data_json)
logger.info("The historic data was successfully stored.") logger.info("The historic data was successfully stored.")
@ -119,4 +163,16 @@ class PersistentStorage(ABC):
@property @property
def last_commit_hash(self): def last_commit_hash(self):
return self._last_commit_hash return self._last_commit_hash
@property
def is_repeat_sequence(self):
return self._last_commit_hash == self._this_commit_hash
@property
def this_commit_last_commit_hash(self):
return self._this_commit_hash_last_commit_hash
@property
def can_rerun_sequence(self):
return self._has_previous_last_commit_hash

@ -15,19 +15,24 @@ logger = get_logger(__file__)
# Implementation of local persistent storage # Implementation of local persistent storage
class PersistentStorageLocal(PersistentStorage): class PersistentStorageLocal(PersistentStorage):
def __init__(self, config: str, suite: str):
HISTORIC_KEY = "historic"
DATA_KEY = "data"
def __init__(self, config: str, suite: str, commit: str):
""" """
Initializes the persistent storage with any local historic data available. Initializes the persistent storage with any local historic data available.
@param config: The runtime config file to obtain the data file paths from. @param config: The runtime config file to obtain the data file paths from.
@param suite: The test suite for which the historic data will be obtained for. @param suite: The test suite for which the historic data will be obtained for.
@param commit: The commit hash for this build.
""" """
super().__init__(config, suite) super().__init__(config, suite, commit)
try: try:
# Attempt to obtain the local persistent data location specified in the runtime config file # Attempt to obtain the local persistent data location specified in the runtime config file
self._historic_workspace = pathlib.Path(config["workspace"]["historic"]["root"]) self._historic_workspace = pathlib.Path(config[self.WORKSPACE_KEY][self.HISTORIC_KEY][self.ROOT_KEY])
historic_data_file = pathlib.Path(config["workspace"]["historic"]["relative_paths"]["data"]) historic_data_file = pathlib.Path(config[self.WORKSPACE_KEY][self.HISTORIC_KEY][self.RELATIVE_PATHS_KEY][self.DATA_KEY])
# Attempt to unpack the local historic data file # Attempt to unpack the local historic data file
self._historic_data_file = self._historic_workspace.joinpath(historic_data_file) self._historic_data_file = self._historic_workspace.joinpath(historic_data_file)

@ -18,16 +18,23 @@ logger = get_logger(__file__)
# Implementation of s3 bucket persistent storage # Implementation of s3 bucket persistent storage
class PersistentStorageS3(PersistentStorage): class PersistentStorageS3(PersistentStorage):
def __init__(self, config: dict, suite: str, s3_bucket: str, root_dir: str, branch: str):
META_KEY = "meta"
BUILD_CONFIG_KEY = "build_config"
def __init__(self, config: dict, suite: str, commit: str, s3_bucket: str, root_dir: str, branch: str):
""" """
Initializes the persistent storage with the specified s3 bucket. Initializes the persistent storage with the specified s3 bucket.
@param config: The runtime config file to obtain the data file paths from. @param config: The runtime config file to obtain the data file paths from.
@param suite: The test suite for which the historic data will be obtained for. @param suite: The test suite for which the historic data will be obtained for.
@param commit: The commit hash for this build.
@param s3_bucket: The s3 bucket to use for storing nd retrieving historic data. @param s3_bucket: The s3 bucket to use for storing nd retrieving historic data.
@param root_dir: The root directory to use for the historic data object.
@branch branch: The branch to retrieve the historic data for.
""" """
super().__init__(config, suite) super().__init__(config, suite, commit)
try: try:
# We store the historic data as compressed JSON # We store the historic data as compressed JSON
@ -37,8 +44,8 @@ class PersistentStorageS3(PersistentStorage):
historic_data_file = f"historic_data.{object_extension}" historic_data_file = f"historic_data.{object_extension}"
# The location of the data is in the form <root_dir>/<branch>/<config> so the build config of each branch gets its own historic data # The location of the data is in the form <root_dir>/<branch>/<config> so the build config of each branch gets its own historic data
self._dir = f'{root_dir}/{branch}/{config["meta"]["build_config"]}' self._historic_data_dir = f'{root_dir}/{branch}/{config[self.META_KEY][self.BUILD_CONFIG_KEY]}'
self._historic_data_key = f'{self._dir}/{historic_data_file}' self._historic_data_key = f'{self._historic_data_dir}/{historic_data_file}'
logger.info(f"Attempting to retrieve historic data for branch '{branch}' at location '{self._historic_data_key}' on bucket '{s3_bucket}'...") logger.info(f"Attempting to retrieve historic data for branch '{branch}' at location '{self._historic_data_key}' on bucket '{s3_bucket}'...")
self._s3 = boto3.resource("s3") self._s3 = boto3.resource("s3")
@ -49,7 +56,7 @@ class PersistentStorageS3(PersistentStorage):
logger.info(f"Historic data found for branch '{branch}'.") logger.info(f"Historic data found for branch '{branch}'.")
# Archive the existing object with the name of the existing last commit hash # Archive the existing object with the name of the existing last commit hash
#archive_key = f"{self._dir}/archive/{self._last_commit_hash}.{object_extension}" #archive_key = f"{self._historic_data_dir}/archive/{self._last_commit_hash}.{object_extension}"
#logger.info(f"Archiving existing historic data to '{archive_key}' in bucket '{self._bucket.name}'...") #logger.info(f"Archiving existing historic data to '{archive_key}' in bucket '{self._bucket.name}'...")
#self._bucket.copy({"Bucket": self._bucket.name, "Key": self._historic_data_key}, archive_key) #self._bucket.copy({"Bucket": self._bucket.name, "Key": self._historic_data_key}, archive_key)
#logger.info(f"Archiving complete.") #logger.info(f"Archiving complete.")

Loading…
Cancel
Save