diff --git a/scripts/build/TestImpactAnalysis/tiaf.py b/scripts/build/TestImpactAnalysis/tiaf.py index 6a43ad3c70..28de00d6ba 100644 --- a/scripts/build/TestImpactAnalysis/tiaf.py +++ b/scripts/build/TestImpactAnalysis/tiaf.py @@ -87,7 +87,7 @@ class TestImpact: try: # Attempt to generate a diff between the src and dst commits - logger.error(f"Source '{self._src_commit}' and destination '{self._dst_commit}' will be diff'd.") + logger.info(f"Source '{self._src_commit}' and destination '{self._dst_commit}' will be diff'd.") diff_path = pathlib.Path(pathlib.PurePath(self._temp_workspace).joinpath(f"changelist.{self._instance_id}.diff")) self._repo.create_diff_file(self._src_commit, self._dst_commit, diff_path, multi_branch) except RuntimeError as e: @@ -219,28 +219,37 @@ class TestImpact: try: # Persistent storage location if s3_bucket: - persistent_storage = PersistentStorageS3(self._config, suite, s3_bucket, s3_top_level_dir, self._source_of_truth_branch) + persistent_storage = PersistentStorageS3(self._config, suite, self._dst_commit, s3_bucket, s3_top_level_dir, self._source_of_truth_branch) else: - persistent_storage = PersistentStorageLocal(self._config, suite) + persistent_storage = PersistentStorageLocal(self._config, suite, self._dst_commit) except SystemError as e: logger.warning(f"The persistent storage encountered an irrecoverable error, test impact analysis will be disabled: '{e}'") persistent_storage = None if persistent_storage: - # Flag to signify whether or not this is a re-run (multiple runs of the same commit) - # Right now, we don't fully support re-runs but in the future we will have an extra subfolder for each commit hash with the - # last run hash that was used for the first run for the commit so we can retreive the same reference point for building the - # change list to ensure each subsequent run is using the same data but for the time being, just perform a regular run - is_rerun = False + + # Flag for corner case where: + # 1. TIAF was already run previously for this commit. + # 2. There was no last commit hash when TIAF last ran on this commit (due to no coverage data existing get for this branch) + # 3. TIAF has not been run on any other commits between the run for this commit and the last run for this commit. + # The above results in TIAF being stuck in a state of generating an empty change list (and thus doing no work until another + # commit comes in) which is problematic if the commit needs to be re-run for whatever reason so in these conditions we revert + # back to a regular test run until another commit comes in + cannot_rerun_with_instrumentation = False + if persistent_storage.has_historic_data: logger.info("Historic data found.") self._src_commit = persistent_storage.last_commit_hash - # Perform some basic sanity checks on the commit hashes to ensure confidence in the integrity of the environment - if self._src_commit == self._dst_commit: - logger.info(f"Source commit '{self._src_commit}' and destination commit '{self._dst_commit}', implying this is a re-run. A regular sequence will instead be performed.") - persistent_storage = None - is_rerun = True + # Check to see if this is a re-run for this commit before any other changes have come in + if persistent_storage.is_repeat_sequence: + if persistent_storage.can_rerun_sequence: + logger.info(f"This sequence is being re-run before any other changes have come in so the last commit '{persistent_storage.this_commit_last_commit_hash}' used for the previous sequence will be used instead.") + self._src_commit = persistent_storage.this_commit_last_commit_hash + else: + logger.info(f"This sequence is being re-run before any other changes have come in but there is no useful historic data. A regular sequence will be performed instead.") + persistent_storage = None + cannot_rerun_with_instrumentation = True else: self._attempt_to_generate_change_list() else: @@ -268,7 +277,7 @@ class TestImpact: args.append(f"--changelist={self._change_list_path}") logger.info(f"Change list is set to '{self._change_list_path}'.") else: - if self._is_source_of_truth_branch and not is_rerun: + if self._is_source_of_truth_branch and not cannot_rerun_with_instrumentation: # Use seed sequence (instrumented all tests) for coverage updating branches so we can generate the coverage bed for future sequences sequence_type = "seed" # We always continue after test failures when seeding to ensure we capture the coverage for all test targets @@ -314,14 +323,18 @@ class TestImpact: logger.info(f"Args: {unpacked_args}") runtime_result = subprocess.run([str(self._tiaf_bin)] + args) report = None - + # If the sequence completed (with or without failures) we will update the historical meta-data if runtime_result.returncode == 0 or runtime_result.returncode == 7: logger.info("Test impact analysis runtime returned successfully.") - if self._is_source_of_truth_branch and persistent_storage is not None: - persistent_storage.update_and_store_historic_data(self._dst_commit) + + # Get the sequence report the runtime generated with open(report_file) as json_file: report = json.load(json_file) + + # Attempt to store the historic data for this branch and sequence + if self._is_source_of_truth_branch and persistent_storage is not None: + persistent_storage.update_and_store_historic_data() else: logger.error(f"The test impact analysis runtime returned with error: '{runtime_result.returncode}'.") diff --git a/scripts/build/TestImpactAnalysis/tiaf_persistent_storage.py b/scripts/build/TestImpactAnalysis/tiaf_persistent_storage.py index 1ee3ac7e8c..3fff05b549 100644 --- a/scripts/build/TestImpactAnalysis/tiaf_persistent_storage.py +++ b/scripts/build/TestImpactAnalysis/tiaf_persistent_storage.py @@ -15,23 +15,39 @@ logger = get_logger(__file__) # Abstraction for the persistent storage required by TIAF to store and retrieve the branch coverage data and other meta-data class PersistentStorage(ABC): - def __init__(self, config: dict, suite: str): + + WORKSPACE_KEY = "workspace" + LAST_RUNS_KEY = "last_runs" + ACTIVE_KEY = "active" + ROOT_KEY = "root" + RELATIVE_PATHS_KEY = "relative_paths" + TEST_IMPACT_DATA_FILES_KEY = "test_impact_data_files" + LAST_COMMIT_HASH_KEY = "last_commit_hash" + COVERAGE_DATA_KEY = "coverage_data" + + def __init__(self, config: dict, suite: str, commit: str): """ Initializes the persistent storage into a state for which there is no historic data available. @param config: The runtime configuration to obtain the data file paths from. @param suite: The test suite for which the historic data will be obtained for. + @param commit: The commit hash for this build. """ # Work on the assumption that there is no historic meta-data (a valid state to be in, should none exist) self._last_commit_hash = None self._has_historic_data = False + self._has_previous_last_commit_hash = False + self._this_commit_hash = commit + self._this_commit_hash_last_commit_hash = None + self._historic_data = None + logger.info(f"Attempting to access persistent storage for the commit {self._this_commit_hash}") try: # The runtime expects the coverage data to be in the location specified in the config file (unless overridden with # the --datafile command line argument, which the TIAF scripts do not do) - self._active_workspace = pathlib.Path(config["workspace"]["active"]["root"]) - unpacked_coverage_data_file = config["workspace"]["active"]["relative_paths"]["test_impact_data_files"][suite] + self._active_workspace = pathlib.Path(config[self.WORKSPACE_KEY][self.ACTIVE_KEY][self.ROOT_KEY]) + unpacked_coverage_data_file = config[self.WORKSPACE_KEY][self.ACTIVE_KEY][self.RELATIVE_PATHS_KEY][self.TEST_IMPACT_DATA_FILES_KEY][suite] except KeyError as e: raise SystemError(f"The config does not contain the key {str(e)}.") @@ -45,17 +61,36 @@ class PersistentStorage(ABC): """ self._has_historic_data = False + self._has_previous_last_commit_hash = False try: - historic_data = json.loads(historic_data_json) - self._last_commit_hash = historic_data["last_commit_hash"] + self._historic_data = json.loads(historic_data_json) + + # Last commit hash for this branch + self._last_commit_hash = self._historic_data[self.LAST_COMMIT_HASH_KEY] logger.info(f"Last commit hash '{self._last_commit_hash}' found.") + if self.LAST_RUNS_KEY in self._historic_data: + # Last commit hash for the sequence that was run for this commit previously (if any) + if self._this_commit_hash in self._historic_data[self.LAST_RUNS_KEY]: + # 'None' is a valid value for the previously used last commit hash if there was no coverage data at that time + self._this_commit_hash_last_commit_hash = self._historic_data[self.LAST_RUNS_KEY][self._this_commit_hash] + self._has_previous_last_commit_hash = self._this_commit_hash_last_commit_hash is not None + + if self._has_previous_last_commit_hash: + logger.info(f"Last commit hash '{self._this_commit_hash_last_commit_hash}' was used previously for this commit.") + else: + logger.info(f"Prior sequence data found for this commit but it is empty (there was no coverage data vailable at that time).") + else: + logger.info(f"No prior sequence data found for commit '{self._this_commit_hash}', this is the first sequence for this commit.") + else: + logger.info(f"No prior sequence data found for any commits.") + # Create the active workspace directory where the coverage data file will be placed and unpack the coverage data so # it is accessible by the runtime self._active_workspace.mkdir(exist_ok=True) with open(self._unpacked_coverage_data_file, "w", newline='\n') as coverage_data: - coverage_data.write(historic_data["coverage_data"]) + coverage_data.write(self._historic_data[self.COVERAGE_DATA_KEY]) self._has_historic_data = True except json.JSONDecodeError: @@ -65,20 +100,31 @@ class PersistentStorage(ABC): except EnvironmentError as e: logger.error(f"There was a problem the coverage data file '{self._unpacked_coverage_data_file}': '{e}'.") - def _pack_historic_data(self, last_commit_hash: str): + def _pack_historic_data(self): """ Packs the current historic data into a JSON file for serializing. - @param last_commit_hash: The commit hash to associate the coverage data (and any other meta data) with. - @return: The packed historic data in JSON format. + @return: The packed historic data in JSON format. """ try: # Attempt to read the existing coverage data if self._unpacked_coverage_data_file.is_file(): + if not self._historic_data: + self._historic_data = {} + + # Last commit hash for this branch + self._historic_data[self.LAST_COMMIT_HASH_KEY] = self._this_commit_hash + + # Last commit hash for this commit + if not self.LAST_RUNS_KEY in self._historic_data: + self._historic_data[self.LAST_RUNS_KEY] = {} + self._historic_data[self.LAST_RUNS_KEY][self._this_commit_hash] = self._last_commit_hash + + # Coverage data for this branch with open(self._unpacked_coverage_data_file, "r") as coverage_data: - historic_data = {"last_commit_hash": last_commit_hash, "coverage_data": coverage_data.read()} - return json.dumps(historic_data) + self._historic_data[self.COVERAGE_DATA_KEY] = coverage_data.read() + return json.dumps(self._historic_data) else: logger.info(f"No coverage data exists at location '{self._unpacked_coverage_data_file}'.") except EnvironmentError as e: @@ -97,16 +143,14 @@ class PersistentStorage(ABC): """ pass - def update_and_store_historic_data(self, last_commit_hash: str): + def update_and_store_historic_data(self): """ Updates the historic data and stores it in the designated persistent storage location. - - @param last_commit_hash: The commit hash to associate the coverage data (and any other meta data) with. """ - historic_data_json = self._pack_historic_data(last_commit_hash) + historic_data_json = self._pack_historic_data() if historic_data_json: - logger.info(f"Attempting to store historic data with new last commit hash '{last_commit_hash}'...") + logger.info(f"Attempting to store historic data with new last commit hash '{self._this_commit_hash}'...") self._store_historic_data(historic_data_json) logger.info("The historic data was successfully stored.") @@ -119,4 +163,16 @@ class PersistentStorage(ABC): @property def last_commit_hash(self): - return self._last_commit_hash \ No newline at end of file + return self._last_commit_hash + + @property + def is_repeat_sequence(self): + return self._last_commit_hash == self._this_commit_hash + + @property + def this_commit_last_commit_hash(self): + return self._this_commit_hash_last_commit_hash + + @property + def can_rerun_sequence(self): + return self._has_previous_last_commit_hash \ No newline at end of file diff --git a/scripts/build/TestImpactAnalysis/tiaf_persistent_storage_local.py b/scripts/build/TestImpactAnalysis/tiaf_persistent_storage_local.py index ba9b58fbf3..c72fafc580 100644 --- a/scripts/build/TestImpactAnalysis/tiaf_persistent_storage_local.py +++ b/scripts/build/TestImpactAnalysis/tiaf_persistent_storage_local.py @@ -15,19 +15,24 @@ logger = get_logger(__file__) # Implementation of local persistent storage class PersistentStorageLocal(PersistentStorage): - def __init__(self, config: str, suite: str): + + HISTORIC_KEY = "historic" + DATA_KEY = "data" + + def __init__(self, config: str, suite: str, commit: str): """ Initializes the persistent storage with any local historic data available. @param config: The runtime config file to obtain the data file paths from. @param suite: The test suite for which the historic data will be obtained for. + @param commit: The commit hash for this build. """ - super().__init__(config, suite) + super().__init__(config, suite, commit) try: # Attempt to obtain the local persistent data location specified in the runtime config file - self._historic_workspace = pathlib.Path(config["workspace"]["historic"]["root"]) - historic_data_file = pathlib.Path(config["workspace"]["historic"]["relative_paths"]["data"]) + self._historic_workspace = pathlib.Path(config[self.WORKSPACE_KEY][self.HISTORIC_KEY][self.ROOT_KEY]) + historic_data_file = pathlib.Path(config[self.WORKSPACE_KEY][self.HISTORIC_KEY][self.RELATIVE_PATHS_KEY][self.DATA_KEY]) # Attempt to unpack the local historic data file self._historic_data_file = self._historic_workspace.joinpath(historic_data_file) diff --git a/scripts/build/TestImpactAnalysis/tiaf_persistent_storage_s3.py b/scripts/build/TestImpactAnalysis/tiaf_persistent_storage_s3.py index 1a279855ea..074caf73a1 100644 --- a/scripts/build/TestImpactAnalysis/tiaf_persistent_storage_s3.py +++ b/scripts/build/TestImpactAnalysis/tiaf_persistent_storage_s3.py @@ -18,16 +18,23 @@ logger = get_logger(__file__) # Implementation of s3 bucket persistent storage class PersistentStorageS3(PersistentStorage): - def __init__(self, config: dict, suite: str, s3_bucket: str, root_dir: str, branch: str): + + META_KEY = "meta" + BUILD_CONFIG_KEY = "build_config" + + def __init__(self, config: dict, suite: str, commit: str, s3_bucket: str, root_dir: str, branch: str): """ Initializes the persistent storage with the specified s3 bucket. @param config: The runtime config file to obtain the data file paths from. @param suite: The test suite for which the historic data will be obtained for. + @param commit: The commit hash for this build. @param s3_bucket: The s3 bucket to use for storing nd retrieving historic data. + @param root_dir: The root directory to use for the historic data object. + @branch branch: The branch to retrieve the historic data for. """ - super().__init__(config, suite) + super().__init__(config, suite, commit) try: # We store the historic data as compressed JSON @@ -37,8 +44,8 @@ class PersistentStorageS3(PersistentStorage): historic_data_file = f"historic_data.{object_extension}" # The location of the data is in the form // so the build config of each branch gets its own historic data - self._dir = f'{root_dir}/{branch}/{config["meta"]["build_config"]}' - self._historic_data_key = f'{self._dir}/{historic_data_file}' + self._historic_data_dir = f'{root_dir}/{branch}/{config[self.META_KEY][self.BUILD_CONFIG_KEY]}' + self._historic_data_key = f'{self._historic_data_dir}/{historic_data_file}' logger.info(f"Attempting to retrieve historic data for branch '{branch}' at location '{self._historic_data_key}' on bucket '{s3_bucket}'...") self._s3 = boto3.resource("s3") @@ -49,7 +56,7 @@ class PersistentStorageS3(PersistentStorage): logger.info(f"Historic data found for branch '{branch}'.") # Archive the existing object with the name of the existing last commit hash - #archive_key = f"{self._dir}/archive/{self._last_commit_hash}.{object_extension}" + #archive_key = f"{self._historic_data_dir}/archive/{self._last_commit_hash}.{object_extension}" #logger.info(f"Archiving existing historic data to '{archive_key}' in bucket '{self._bucket.name}'...") #self._bucket.copy({"Bucket": self._bucket.name, "Key": self._historic_data_key}, archive_key) #logger.info(f"Archiving complete.")