add code to support inference test

mravanelli · mravanelli · commit eccbea71d7eb · 2023-07-26T14:47:04.000-04:00
diff --git a/tests/utils/recipe_tests.py b/tests/utils/recipe_tests.py
@@ -182,7 +182,8 @@ def check_performance(
     check field of the csv recipe file
     For instance: performance_check=[train_log.txt, train loss, <=15, epoch: 2]),
     will check the variable "train_loss" in the train_log.txt at epoch 2. It will
-    raise an error if the train_loss is >15.
+    raise an error if the train_loss is >15. If epoch is -1, we check the last
+    line of the file.
 
     Arguments
     ---------
@@ -228,10 +229,18 @@ def check_performance(
 
     # Fitler the lines
     lines_filt = []
+    last_line = ""
     for line in lines:
+        if len(line.strip()) > 0:
+            last_line = line
         if epoch in line:
             lines_filt.append(line)
 
+    # If epoch: -1, we take the last line of the file
+    epoch_id = int(epoch.split(":")[-1].strip())
+    if epoch_id == -1:
+        lines_filt.append(last_line)
+
     # Raising an error if there are no lines after applying the filter
     if len(lines_filt) == 0:
         print(
@@ -243,16 +252,15 @@ def check_performance(
     for line in lines_filt:
 
         # Search variable value
-        pattern = variable + ": " + "(.*?) "
-        var_value = re.search(pattern, line)
+        var_value = extract_value(line, variable)
 
         if var_value is None:
             print(
                 "\tERROR: The file %s of recipe %s does not contain the variable %s (needed for performance checks)"
                 % (filename, recipe_id, variable)
             )
             return False
-        var_value = float(var_value.group(1))
+        var_value = float(var_value)
         check = check_threshold(threshold, var_value)
 
         if not (check):
@@ -266,6 +274,41 @@ def check_performance(
     return check
 
 
+def extract_value(string, key):
+    """Extracts from the input string the value given a key. For instance:
+    input_string = "Epoch loaded: 49 - test loss: 4.71e-01, test PER: 14.21"
+    print(extract_value(input_string, "test loss"))    # Output: 0.471
+    print(extract_value(input_string, "test PER"))     # Output: 14.21
+
+    Arguments
+    ---------
+    string: str
+        The input string. It should be in the format mentioned above.
+    key: str
+        The key argument to extract.
+
+    Returns
+    ---------
+    value: float or str
+        The value corresponding to the specified key.
+    """
+    escaped_key = re.escape(key)
+
+    # Create the regular expression pattern to match the argument and its corresponding value
+    pattern = r"(?P<key>{})\s*:\s*(?P<value>[-+]?\d*\.\d+([eE][-+]?\d+)?)".format(
+        escaped_key
+    )
+
+    # Search for the pattern in the input string
+    match = re.search(pattern, string)
+
+    if match:
+        value = match.group("value")
+        return value
+    else:
+        return None
+
+
 def check_threshold(threshold, value):
     """Checks if the value satisfied the threshold constraints.
 
@@ -411,56 +454,36 @@ def run_recipe_tests(
 
     # Download all upfront
     if download_only:
-        for i, recipe_id in enumerate(test_script.keys()):
-            # If we are interested in performance checks only, skip
-            check_str = test_check[recipe_id].strip()
-            if run_tests_with_checks_only:
-                if len(check_str) == 0:
-                    continue
+        download_only_test(
+            test_script,
+            test_hparam,
+            test_flag,
+            test_check,
+            run_opts,
+            run_tests_with_checks_only,
+            output_folder,
+        )
+        return False
 
-            print(
-                "(%i/%i) Collecting pretrained models for %s..."
-                % (i + 1, len(test_script.keys()), recipe_id)
-            )
+    # Run  script (check how to get std out, std err and save them in files)
+    check = True
+    for i, recipe_id in enumerate(test_script.keys()):
 
+        # Check if the output folder is specified in test_field
+        spec_outfold = False
+        if "--output_folder" in test_flag[recipe_id]:
+            pattern = r"--output_folder\s*=?\s*([^\s']+|'[^']*')"
+            match = re.search(pattern, test_flag[recipe_id])
+            output_fold = match.group(1).strip("'")
+            spec_outfold = True
+        else:
             output_fold = os.path.join(output_folder, recipe_id)
             os.makedirs(output_fold, exist_ok=True)
-            stdout_file = os.path.join(output_fold, "stdout.txt")
-            stderr_file = os.path.join(output_fold, "stderr.txt")
-
-            cmd = (
-                "python -c 'import sys;from hyperpyyaml import load_hyperpyyaml;import speechbrain;"
-                "hparams_file, run_opts, overrides = speechbrain.parse_arguments(sys.argv[1:]);"
-                "fin=open(hparams_file);hparams = load_hyperpyyaml(fin, overrides);fin.close();"
-                # 'speechbrain.create_experiment_directory(experiment_directory=hparams["output_folder"],'
-                # 'hyperparams_to_save=hparams_file,overrides=overrides,);'
-            )
-            with open(test_hparam[recipe_id]) as hparam_file:
-                for line in hparam_file:
-                    if "pretrainer" in line:
-                        cmd += 'hparams["pretrainer"].collect_files();hparams["pretrainer"].load_collected(device="cpu");'
-                    elif "from_pretrained" in line:
-                        field = line.split(":")[0].strip()
-                        cmd += f'hparams["{field}"]'
-            cmd += (
-                "' "
-                + test_hparam[recipe_id]
-                + " --output_folder="
-                + output_fold
-                + " "
-                + test_flag[recipe_id]
-                + " "
-                + run_opts
-            )
-
-            # Prepare the test
-            run_test_cmd(cmd, stdout_file, stderr_file)
 
-        return False
+        # Create files for storing standard input and standard output
+        stdout_file = os.path.join(output_fold, "stdout.txt")
+        stderr_file = os.path.join(output_fold, "stderr.txt")
 
-    # Run  script (check how to get std out, std err and save them in files)
-    check = True
-    for i, recipe_id in enumerate(test_script.keys()):
         # If we are interested in performance checks only, skip
         check_str = test_check[recipe_id].strip()
         if run_tests_with_checks_only:
@@ -472,11 +495,6 @@ def run_recipe_tests(
             % (i + 1, len(test_script.keys()), recipe_id)
         )
 
-        output_fold = os.path.join(output_folder, recipe_id)
-        os.makedirs(output_fold, exist_ok=True)
-        stdout_file = os.path.join(output_fold, "stdout.txt")
-        stderr_file = os.path.join(output_fold, "stderr.txt")
-
         # Check for setup scripts
         setup_script = os.path.join(
             "tests/recipes/setup",
@@ -494,14 +512,15 @@ def run_recipe_tests(
             + test_script[recipe_id]
             + " "
             + test_hparam[recipe_id]
-            + " --output_folder="
-            + output_fold
             + " "
             + test_flag[recipe_id]
             + " "
             + run_opts
         )
 
+        if not spec_outfold:
+            cmd = cmd + " --output_folder=" + output_fold
+
         # add --debug if no do_checks to save testing time
         if not do_checks:
             cmd += " --debug --debug_persistently"
@@ -536,6 +555,83 @@ def run_recipe_tests(
     return check
 
 
+def download_only_test(
+    test_script,
+    test_hparam,
+    test_flag,
+    test_check,
+    run_opts,
+    run_tests_with_checks_only,
+    output_folder,
+):
+    """Downloads only the needed data (useful for off-line tests).
+
+    Arguments
+    ---------
+    test_script: dict
+        A Dictionary containing recipe IDs as keys and test_scripts as values.
+    test_hparam: dict
+        A dictionary containing recipe IDs as keys and hparams as values.
+    test_flag: dict
+        A dictionary containing recipe IDs as keys and the test flags as values.
+    test_check: dict
+        A dictionary containing recipe IDs as keys and the checks as values.
+    run_opts: str
+        Running options to append to each test.
+    run_tests_with_checks_only: str
+            Running options to append to each test.
+    run_tests_with_checks_only: bool
+        If True skips all tests that do not have performance check criteria defined.
+    output_folder: path
+        The output folder where to store all the test outputs.
+    """
+
+    for i, recipe_id in enumerate(test_script.keys()):
+        # If we are interested in performance checks only, skip
+        check_str = test_check[recipe_id].strip()
+        if run_tests_with_checks_only:
+            if len(check_str) == 0:
+                continue
+
+        print(
+            "(%i/%i) Collecting pretrained models for %s..."
+            % (i + 1, len(test_script.keys()), recipe_id)
+        )
+
+        output_fold = os.path.join(output_folder, recipe_id)
+        os.makedirs(output_fold, exist_ok=True)
+        stdout_file = os.path.join(output_fold, "stdout.txt")
+        stderr_file = os.path.join(output_fold, "stderr.txt")
+
+        cmd = (
+            "python -c 'import sys;from hyperpyyaml import load_hyperpyyaml;import speechbrain;"
+            "hparams_file, run_opts, overrides = speechbrain.parse_arguments(sys.argv[1:]);"
+            "fin=open(hparams_file);hparams = load_hyperpyyaml(fin, overrides);fin.close();"
+            # 'speechbrain.create_experiment_directory(experiment_directory=hparams["output_folder"],'
+            # 'hyperparams_to_save=hparams_file,overrides=overrides,);'
+        )
+        with open(test_hparam[recipe_id]) as hparam_file:
+            for line in hparam_file:
+                if "pretrainer" in line:
+                    cmd += 'hparams["pretrainer"].collect_files();hparams["pretrainer"].load_collected(device="cpu");'
+                elif "from_pretrained" in line:
+                    field = line.split(":")[0].strip()
+                    cmd += f'hparams["{field}"]'
+        cmd += (
+            "' "
+            + test_hparam[recipe_id]
+            + " --output_folder="
+            + output_fold
+            + " "
+            + test_flag[recipe_id]
+            + " "
+            + run_opts
+        )
+
+        # Prepare the test
+        run_test_cmd(cmd, stdout_file, stderr_file)
+
+
 def load_yaml_test(
     recipe_folder="tests/recipes",
     script_field="Script_file",