Address commit re-run corner case.

Signed-off-by: John <jonawals@amazon.com>
4 years ago · 4dd39c3929
parent 4cf384c2c5
commit 4dd39c3929
4 changed files with 124 additions and 43 deletions
--- a/scripts/build/TestImpactAnalysis/tiaf.py
+++ b/scripts/build/TestImpactAnalysis/tiaf.py
@ -87,7 +87,7 @@ class TestImpact:
            try:
                # Attempt to generate a diff between the src and dst commits
-                logger.error(f"Source '{self._src_commit}' and destination '{self._dst_commit}' will be diff'd.")
+                logger.info(f"Source '{self._src_commit}' and destination '{self._dst_commit}' will be diff'd.")
                diff_path = pathlib.Path(pathlib.PurePath(self._temp_workspace).joinpath(f"changelist.{self._instance_id}.diff"))
                self._repo.create_diff_file(self._src_commit, self._dst_commit, diff_path, multi_branch)
            except RuntimeError as e:
@ -219,28 +219,37 @@ class TestImpact:
            try:
                # Persistent storage location
                if s3_bucket:
-                    persistent_storage = PersistentStorageS3(self._config, suite, s3_bucket, s3_top_level_dir, self._source_of_truth_branch)
+                    persistent_storage = PersistentStorageS3(self._config, suite, self._dst_commit, s3_bucket, s3_top_level_dir, self._source_of_truth_branch)
                else:
-                    persistent_storage = PersistentStorageLocal(self._config, suite)
+                    persistent_storage = PersistentStorageLocal(self._config, suite, self._dst_commit)
            except SystemError as e:
                logger.warning(f"The persistent storage encountered an irrecoverable error, test impact analysis will be disabled: '{e}'")
                persistent_storage = None
            if persistent_storage:
-                # Flag to signify whether or not this is a re-run (multiple runs of the same commit)
+                
-                # Right now, we don't fully support re-runs but in the future we will have an extra subfolder for each commit hash with the
+                # Flag for corner case where:
-                # last run hash that was used for the first run for the commit so we can retreive the same reference point for building the
+                # 1. TIAF was already run previously for this commit.
-                # change list to ensure each subsequent run is using the same data but for the time being, just perform a regular run
+                # 2. There was no last commit hash when TIAF last ran on this commit (due to no coverage data existing get for this branch)
-                is_rerun = False
+                # 3. TIAF has not been run on any other commits between the run for this commit and the last run for this commit.
                # The above results in TIAF being stuck in a state of generating an empty change list (and thus doing no work until another
                # commit comes in) which is problematic if the commit needs to be re-run for whatever reason so in these conditions we revert
                # back to a regular test run until another commit comes in
                cannot_rerun_with_instrumentation = False
                if persistent_storage.has_historic_data:
                    logger.info("Historic data found.")
                    self._src_commit = persistent_storage.last_commit_hash
-                    # Perform some basic sanity checks on the commit hashes to ensure confidence in the integrity of the environment
+                    # Check to see if this is a re-run for this commit before any other changes have come in
-                    if self._src_commit == self._dst_commit:
+                    if persistent_storage.is_repeat_sequence:
-                        logger.info(f"Source commit '{self._src_commit}' and destination commit '{self._dst_commit}', implying this is a re-run. A regular sequence will instead be performed.")
+                        if persistent_storage.can_rerun_sequence:
-                        persistent_storage = None
+                            logger.info(f"This sequence is being re-run before any other changes have come in so the last commit '{persistent_storage.this_commit_last_commit_hash}' used for the previous sequence will be used instead.")
-                        is_rerun = True
+                            self._src_commit = persistent_storage.this_commit_last_commit_hash
                        else:
                            logger.info(f"This sequence is being re-run before any other changes have come in but there is no useful historic data. A regular sequence will be performed instead.")
                            persistent_storage = None
                            cannot_rerun_with_instrumentation = True
                    else:
                        self._attempt_to_generate_change_list()
                else:
@ -268,7 +277,7 @@ class TestImpact:
                    args.append(f"--changelist={self._change_list_path}")
                    logger.info(f"Change list is set to '{self._change_list_path}'.")
                else:
-                    if self._is_source_of_truth_branch and not is_rerun:
+                    if self._is_source_of_truth_branch and not cannot_rerun_with_instrumentation:
                        # Use seed sequence (instrumented all tests) for coverage updating branches so we can generate the coverage bed for future sequences
                        sequence_type = "seed"
                        # We always continue after test failures when seeding to ensure we capture the coverage for all test targets
@ -314,14 +323,18 @@ class TestImpact:
        logger.info(f"Args: {unpacked_args}")
        runtime_result = subprocess.run([str(self._tiaf_bin)] + args)
        report = None
-
+        
        # If the sequence completed (with or without failures) we will update the historical meta-data
        if runtime_result.returncode == 0 or runtime_result.returncode == 7:
            logger.info("Test impact analysis runtime returned successfully.")
-            if self._is_source_of_truth_branch and persistent_storage is not None:
+
-                persistent_storage.update_and_store_historic_data(self._dst_commit)
+            # Get the sequence report the runtime generated
            with open(report_file) as json_file:
                report = json.load(json_file)
            # Attempt to store the historic data for this branch and sequence
            if self._is_source_of_truth_branch and persistent_storage is not None:
                persistent_storage.update_and_store_historic_data()
        else:
            logger.error(f"The test impact analysis runtime returned with error: '{runtime_result.returncode}'.")
--- a/scripts/build/TestImpactAnalysis/tiaf_persistent_storage.py
+++ b/scripts/build/TestImpactAnalysis/tiaf_persistent_storage.py
@ -15,23 +15,39 @@ logger = get_logger(__file__)
 # Abstraction for the persistent storage required by TIAF to store and retrieve the branch coverage data and other meta-data
 class PersistentStorage(ABC):
-    def __init__(self, config: dict, suite: str):
+
    WORKSPACE_KEY = "workspace"
    LAST_RUNS_KEY = "last_runs"
    ACTIVE_KEY = "active"
    ROOT_KEY = "root"
    RELATIVE_PATHS_KEY = "relative_paths"
    TEST_IMPACT_DATA_FILES_KEY = "test_impact_data_files"
    LAST_COMMIT_HASH_KEY = "last_commit_hash"
    COVERAGE_DATA_KEY = "coverage_data"
    def __init__(self, config: dict, suite: str, commit: str):
        """
        Initializes the persistent storage into a state for which there is no historic data available.
        @param config: The runtime configuration to obtain the data file paths from.
        @param suite:  The test suite for which the historic data will be obtained for.
        @param commit: The commit hash for this build.
        """
        # Work on the assumption that there is no historic meta-data (a valid state to be in, should none exist)
        self._last_commit_hash = None
        self._has_historic_data = False
        self._has_previous_last_commit_hash = False
        self._this_commit_hash = commit
        self._this_commit_hash_last_commit_hash = None
        self._historic_data = None
        logger.info(f"Attempting to access persistent storage for the commit {self._this_commit_hash}")
        try:
            # The runtime expects the coverage data to be in the location specified in the config file (unless overridden with 
            # the --datafile command line argument, which the TIAF scripts do not do)
-            self._active_workspace = pathlib.Path(config["workspace"]["active"]["root"])
+            self._active_workspace = pathlib.Path(config[self.WORKSPACE_KEY][self.ACTIVE_KEY][self.ROOT_KEY])
-            unpacked_coverage_data_file = config["workspace"]["active"]["relative_paths"]["test_impact_data_files"][suite]
+            unpacked_coverage_data_file = config[self.WORKSPACE_KEY][self.ACTIVE_KEY][self.RELATIVE_PATHS_KEY][self.TEST_IMPACT_DATA_FILES_KEY][suite]
        except KeyError as e:
            raise SystemError(f"The config does not contain the key {str(e)}.")
@ -45,17 +61,36 @@ class PersistentStorage(ABC):
        """
        self._has_historic_data = False
        self._has_previous_last_commit_hash = False
        try:
-            historic_data = json.loads(historic_data_json)
+            self._historic_data = json.loads(historic_data_json)
-            self._last_commit_hash = historic_data["last_commit_hash"]
+
            # Last commit hash for this branch
            self._last_commit_hash = self._historic_data[self.LAST_COMMIT_HASH_KEY]
            logger.info(f"Last commit hash '{self._last_commit_hash}' found.")
            if self.LAST_RUNS_KEY in self._historic_data:
                # Last commit hash for the sequence that was run for this commit previously (if any)
                if self._this_commit_hash in self._historic_data[self.LAST_RUNS_KEY]:
                    # 'None' is a valid value for the previously used last commit hash if there was no coverage data at that time
                    self._this_commit_hash_last_commit_hash = self._historic_data[self.LAST_RUNS_KEY][self._this_commit_hash]
                    self._has_previous_last_commit_hash = self._this_commit_hash_last_commit_hash is not None
                    if self._has_previous_last_commit_hash:
                        logger.info(f"Last commit hash '{self._this_commit_hash_last_commit_hash}' was used previously for this commit.")
                    else:
                        logger.info(f"Prior sequence data found for this commit but it is empty (there was no coverage data vailable at that time).")
                else:
                    logger.info(f"No prior sequence data found for commit '{self._this_commit_hash}', this is the first sequence for this commit.")
            else:
                logger.info(f"No prior sequence data found for any commits.")
            # Create the active workspace directory where the coverage data file will be placed and unpack the coverage data so 
            # it is accessible by the runtime
            self._active_workspace.mkdir(exist_ok=True)
            with open(self._unpacked_coverage_data_file, "w", newline='\n') as coverage_data:
-                coverage_data.write(historic_data["coverage_data"])
+                coverage_data.write(self._historic_data[self.COVERAGE_DATA_KEY])
            self._has_historic_data = True
        except json.JSONDecodeError:
@ -65,20 +100,31 @@ class PersistentStorage(ABC):
        except EnvironmentError as e:
            logger.error(f"There was a problem the coverage data file '{self._unpacked_coverage_data_file}': '{e}'.")
-    def _pack_historic_data(self, last_commit_hash: str):
+    def _pack_historic_data(self):
        """
        Packs the current historic data into a JSON file for serializing.
-        @param last_commit_hash: The commit hash to associate the coverage data (and any other meta data) with.
+        @return: The packed historic data in JSON format.
        @return:                 The packed historic data in JSON format.
        """
        try:
            # Attempt to read the existing coverage data
            if self._unpacked_coverage_data_file.is_file():
                if not self._historic_data:
                    self._historic_data = {}
                # Last commit hash for this branch
                self._historic_data[self.LAST_COMMIT_HASH_KEY] = self._this_commit_hash
                # Last commit hash for this commit
                if not self.LAST_RUNS_KEY in self._historic_data:
                    self._historic_data[self.LAST_RUNS_KEY] = {}
                self._historic_data[self.LAST_RUNS_KEY][self._this_commit_hash] = self._last_commit_hash
                # Coverage data for this branch
                with open(self._unpacked_coverage_data_file, "r") as coverage_data:
-                    historic_data = {"last_commit_hash": last_commit_hash, "coverage_data": coverage_data.read()}
+                    self._historic_data[self.COVERAGE_DATA_KEY] = coverage_data.read()
-                    return json.dumps(historic_data)
+                    return json.dumps(self._historic_data)
            else:
                logger.info(f"No coverage data exists at location '{self._unpacked_coverage_data_file}'.")
        except EnvironmentError as e:
@ -97,16 +143,14 @@ class PersistentStorage(ABC):
        """
        pass
-    def update_and_store_historic_data(self, last_commit_hash: str):
+    def update_and_store_historic_data(self):
        """
        Updates the historic data and stores it in the designated persistent storage location.
        @param last_commit_hash: The commit hash to associate the coverage data (and any other meta data) with.
        """
-        historic_data_json = self._pack_historic_data(last_commit_hash)
+        historic_data_json = self._pack_historic_data()
        if historic_data_json:
-            logger.info(f"Attempting to store historic data with new last commit hash '{last_commit_hash}'...")
+            logger.info(f"Attempting to store historic data with new last commit hash '{self._this_commit_hash}'...")
            self._store_historic_data(historic_data_json)
            logger.info("The historic data was successfully stored.")
@ -119,4 +163,16 @@ class PersistentStorage(ABC):
    @property
    def last_commit_hash(self):
-        return self._last_commit_hash
+        return self._last_commit_hash
    @property
    def is_repeat_sequence(self):
        return self._last_commit_hash == self._this_commit_hash
    @property
    def this_commit_last_commit_hash(self):
        return self._this_commit_hash_last_commit_hash
    @property
    def can_rerun_sequence(self):
        return self._has_previous_last_commit_hash
--- a/scripts/build/TestImpactAnalysis/tiaf_persistent_storage_local.py
+++ b/scripts/build/TestImpactAnalysis/tiaf_persistent_storage_local.py
@ -15,19 +15,24 @@ logger = get_logger(__file__)
 # Implementation of local persistent storage
 class PersistentStorageLocal(PersistentStorage):
-    def __init__(self, config: str, suite: str):
+
    HISTORIC_KEY = "historic"
    DATA_KEY = "data"
    def __init__(self, config: str, suite: str, commit: str):
        """
        Initializes the persistent storage with any local historic data available.
        @param config: The runtime config file to obtain the data file paths from.
        @param suite:  The test suite for which the historic data will be obtained for.
        @param commit: The commit hash for this build.
        """
-        super().__init__(config, suite)
+        super().__init__(config, suite, commit)
        try:
            # Attempt to obtain the local persistent data location specified in the runtime config file
-            self._historic_workspace = pathlib.Path(config["workspace"]["historic"]["root"])
+            self._historic_workspace = pathlib.Path(config[self.WORKSPACE_KEY][self.HISTORIC_KEY][self.ROOT_KEY])
-            historic_data_file = pathlib.Path(config["workspace"]["historic"]["relative_paths"]["data"])
+            historic_data_file = pathlib.Path(config[self.WORKSPACE_KEY][self.HISTORIC_KEY][self.RELATIVE_PATHS_KEY][self.DATA_KEY])
            # Attempt to unpack the local historic data file
            self._historic_data_file = self._historic_workspace.joinpath(historic_data_file)
--- a/scripts/build/TestImpactAnalysis/tiaf_persistent_storage_s3.py
+++ b/scripts/build/TestImpactAnalysis/tiaf_persistent_storage_s3.py
@ -18,16 +18,23 @@ logger = get_logger(__file__)
 # Implementation of s3 bucket persistent storage
 class PersistentStorageS3(PersistentStorage):
-    def __init__(self, config: dict, suite: str, s3_bucket: str, root_dir: str, branch: str):
+
    META_KEY = "meta"
    BUILD_CONFIG_KEY = "build_config"
    def __init__(self, config: dict, suite: str, commit: str, s3_bucket: str, root_dir: str, branch: str):
        """
        Initializes the persistent storage with the specified s3 bucket.
        @param config:    The runtime config file to obtain the data file paths from.
        @param suite:     The test suite for which the historic data will be obtained for.
        @param commit:    The commit hash for this build.
        @param s3_bucket: The s3 bucket to use for storing nd retrieving historic data.
        @param root_dir:  The root directory to use for the historic data object.
        @branch branch:   The branch to retrieve the historic data for.
        """
-        super().__init__(config, suite)
+        super().__init__(config, suite, commit)
        try:
            # We store the historic data as compressed JSON
@ -37,8 +44,8 @@ class PersistentStorageS3(PersistentStorage):
            historic_data_file = f"historic_data.{object_extension}"
            # The location of the data is in the form <root_dir>/<branch>/<config> so the build config of each branch gets its own historic data
-            self._dir = f'{root_dir}/{branch}/{config["meta"]["build_config"]}'
+            self._historic_data_dir = f'{root_dir}/{branch}/{config[self.META_KEY][self.BUILD_CONFIG_KEY]}'
-            self._historic_data_key = f'{self._dir}/{historic_data_file}'
+            self._historic_data_key = f'{self._historic_data_dir}/{historic_data_file}'
            logger.info(f"Attempting to retrieve historic data for branch '{branch}' at location '{self._historic_data_key}' on bucket '{s3_bucket}'...")
            self._s3 = boto3.resource("s3")
@ -49,7 +56,7 @@ class PersistentStorageS3(PersistentStorage):
                logger.info(f"Historic data found for branch '{branch}'.")
                # Archive the existing object with the name of the existing last commit hash
-                #archive_key = f"{self._dir}/archive/{self._last_commit_hash}.{object_extension}"
+                #archive_key = f"{self._historic_data_dir}/archive/{self._last_commit_hash}.{object_extension}"
                #logger.info(f"Archiving existing historic data to '{archive_key}' in bucket '{self._bucket.name}'...")
                #self._bucket.copy({"Bucket": self._bucket.name, "Key": self._historic_data_key}, archive_key)
                #logger.info(f"Archiving complete.")