| Brad Bishop | c04b3f4 | 2020-05-01 08:17:59 -0400 | [diff] [blame] | 1 | #!/usr/bin/env python3 | 
 | 2 | # SPDX-License-Identifier: Apache-2.0 | 
 | 3 | """ | 
 | 4 | A tool for validating entity manager configurations. | 
 | 5 | """ | 
| Patrick Williams | 809fbdc | 2025-05-09 10:50:39 -0400 | [diff] [blame] | 6 |  | 
| Brad Bishop | c04b3f4 | 2020-05-01 08:17:59 -0400 | [diff] [blame] | 7 | import argparse | 
 | 8 | import json | 
| Brad Bishop | c04b3f4 | 2020-05-01 08:17:59 -0400 | [diff] [blame] | 9 | import os | 
| Potin Lai | 0f3a4d9 | 2023-12-05 00:13:55 +0800 | [diff] [blame] | 10 | import re | 
| Brad Bishop | c04b3f4 | 2020-05-01 08:17:59 -0400 | [diff] [blame] | 11 | import sys | 
| Patrick Williams | f8f6027 | 2025-05-03 01:36:31 -0400 | [diff] [blame] | 12 | from concurrent.futures import ProcessPoolExecutor | 
| Brad Bishop | c04b3f4 | 2020-05-01 08:17:59 -0400 | [diff] [blame] | 13 |  | 
| Patrick Williams | 809fbdc | 2025-05-09 10:50:39 -0400 | [diff] [blame] | 14 | import jsonschema.exceptions | 
| Patrick Williams | fa8ee87 | 2022-12-07 07:00:42 -0600 | [diff] [blame] | 15 | import jsonschema.validators | 
| Patrick Williams | 809fbdc | 2025-05-09 10:50:39 -0400 | [diff] [blame] | 16 | import referencing | 
 | 17 | from referencing.jsonschema import DRAFT202012 | 
| Patrick Williams | fa8ee87 | 2022-12-07 07:00:42 -0600 | [diff] [blame] | 18 |  | 
| Brad Bishop | c04b3f4 | 2020-05-01 08:17:59 -0400 | [diff] [blame] | 19 | DEFAULT_SCHEMA_FILENAME = "global.json" | 
 | 20 |  | 
 | 21 |  | 
| Patrick Williams | f8f6027 | 2025-05-03 01:36:31 -0400 | [diff] [blame] | 22 | def get_default_thread_count() -> int: | 
 | 23 |     """ | 
 | 24 |     Returns the number of CPUs available to the current process. | 
 | 25 |     """ | 
 | 26 |     try: | 
 | 27 |         # This will respect CPU affinity settings | 
 | 28 |         return len(os.sched_getaffinity(0)) | 
 | 29 |     except AttributeError: | 
 | 30 |         # Fallback for systems without sched_getaffinity | 
 | 31 |         return os.cpu_count() or 1 | 
 | 32 |  | 
 | 33 |  | 
| Potin Lai | 0f3a4d9 | 2023-12-05 00:13:55 +0800 | [diff] [blame] | 34 | def remove_c_comments(string): | 
 | 35 |     # first group captures quoted strings (double or single) | 
 | 36 |     # second group captures comments (//single-line or /* multi-line */) | 
 | 37 |     pattern = r"(\".*?(?<!\\)\"|\'.*?(?<!\\)\')|(/\*.*?\*/|//[^\r\n]*$)" | 
 | 38 |     regex = re.compile(pattern, re.MULTILINE | re.DOTALL) | 
 | 39 |  | 
 | 40 |     def _replacer(match): | 
 | 41 |         if match.group(2) is not None: | 
 | 42 |             return "" | 
 | 43 |         else: | 
 | 44 |             return match.group(1) | 
 | 45 |  | 
 | 46 |     return regex.sub(_replacer, string) | 
 | 47 |  | 
 | 48 |  | 
| Brad Bishop | c04b3f4 | 2020-05-01 08:17:59 -0400 | [diff] [blame] | 49 | def main(): | 
 | 50 |     parser = argparse.ArgumentParser( | 
 | 51 |         description="Entity manager configuration validator", | 
 | 52 |     ) | 
 | 53 |     parser.add_argument( | 
| Patrick Williams | fa8ee87 | 2022-12-07 07:00:42 -0600 | [diff] [blame] | 54 |         "-s", | 
 | 55 |         "--schema", | 
 | 56 |         help=( | 
| Brad Bishop | c04b3f4 | 2020-05-01 08:17:59 -0400 | [diff] [blame] | 57 |             "Use the specified schema file instead of the default " | 
| Patrick Williams | fa8ee87 | 2022-12-07 07:00:42 -0600 | [diff] [blame] | 58 |             "(__file__/../../schemas/global.json)" | 
 | 59 |         ), | 
 | 60 |     ) | 
| Brad Bishop | c04b3f4 | 2020-05-01 08:17:59 -0400 | [diff] [blame] | 61 |     parser.add_argument( | 
| Patrick Williams | fa8ee87 | 2022-12-07 07:00:42 -0600 | [diff] [blame] | 62 |         "-c", | 
 | 63 |         "--config", | 
 | 64 |         action="append", | 
 | 65 |         help=( | 
| Brad Bishop | c04b3f4 | 2020-05-01 08:17:59 -0400 | [diff] [blame] | 66 |             "Validate the specified configuration files (can be " | 
 | 67 |             "specified more than once) instead of the default " | 
| Patrick Williams | fa8ee87 | 2022-12-07 07:00:42 -0600 | [diff] [blame] | 68 |             "(__file__/../../configurations/**.json)" | 
 | 69 |         ), | 
 | 70 |     ) | 
| Brad Bishop | c04b3f4 | 2020-05-01 08:17:59 -0400 | [diff] [blame] | 71 |     parser.add_argument( | 
| Patrick Williams | fa8ee87 | 2022-12-07 07:00:42 -0600 | [diff] [blame] | 72 |         "-e", | 
 | 73 |         "--expected-fails", | 
 | 74 |         help=( | 
| Brad Bishop | c04b3f4 | 2020-05-01 08:17:59 -0400 | [diff] [blame] | 75 |             "A file with a list of configurations to ignore should " | 
| Patrick Williams | fa8ee87 | 2022-12-07 07:00:42 -0600 | [diff] [blame] | 76 |             "they fail to validate" | 
 | 77 |         ), | 
 | 78 |     ) | 
| Brad Bishop | c04b3f4 | 2020-05-01 08:17:59 -0400 | [diff] [blame] | 79 |     parser.add_argument( | 
| Patrick Williams | fa8ee87 | 2022-12-07 07:00:42 -0600 | [diff] [blame] | 80 |         "-k", | 
 | 81 |         "--continue", | 
 | 82 |         action="store_true", | 
 | 83 |         help="keep validating after a failure", | 
 | 84 |     ) | 
| Brad Bishop | c04b3f4 | 2020-05-01 08:17:59 -0400 | [diff] [blame] | 85 |     parser.add_argument( | 
| Patrick Williams | fa8ee87 | 2022-12-07 07:00:42 -0600 | [diff] [blame] | 86 |         "-v", "--verbose", action="store_true", help="be noisy" | 
 | 87 |     ) | 
| Patrick Williams | f8f6027 | 2025-05-03 01:36:31 -0400 | [diff] [blame] | 88 |     parser.add_argument( | 
 | 89 |         "-t", | 
 | 90 |         "--threads", | 
 | 91 |         type=int, | 
 | 92 |         default=get_default_thread_count(), | 
 | 93 |         help="Number of threads to use for parallel validation (default: number of CPUs)", | 
 | 94 |     ) | 
| Brad Bishop | c04b3f4 | 2020-05-01 08:17:59 -0400 | [diff] [blame] | 95 |     args = parser.parse_args() | 
 | 96 |  | 
 | 97 |     schema_file = args.schema | 
 | 98 |     if schema_file is None: | 
 | 99 |         try: | 
 | 100 |             source_dir = os.path.realpath(__file__).split(os.sep)[:-2] | 
 | 101 |             schema_file = os.sep + os.path.join( | 
| Patrick Williams | fa8ee87 | 2022-12-07 07:00:42 -0600 | [diff] [blame] | 102 |                 *source_dir, "schemas", DEFAULT_SCHEMA_FILENAME | 
 | 103 |             ) | 
| Patrick Williams | cad2d1f | 2022-12-04 14:38:16 -0600 | [diff] [blame] | 104 |         except Exception: | 
| Patrick Williams | f8f6027 | 2025-05-03 01:36:31 -0400 | [diff] [blame] | 105 |             print( | 
 | 106 |                 f"Could not guess location of {DEFAULT_SCHEMA_FILENAME}", | 
 | 107 |                 file=sys.stderr, | 
| Patrick Williams | fa8ee87 | 2022-12-07 07:00:42 -0600 | [diff] [blame] | 108 |             ) | 
| Brad Bishop | c04b3f4 | 2020-05-01 08:17:59 -0400 | [diff] [blame] | 109 |             sys.exit(2) | 
 | 110 |  | 
| Brad Bishop | c04b3f4 | 2020-05-01 08:17:59 -0400 | [diff] [blame] | 111 |     config_files = args.config or [] | 
 | 112 |     if len(config_files) == 0: | 
 | 113 |         try: | 
 | 114 |             source_dir = os.path.realpath(__file__).split(os.sep)[:-2] | 
| Patrick Williams | fa8ee87 | 2022-12-07 07:00:42 -0600 | [diff] [blame] | 115 |             configs_dir = os.sep + os.path.join(*source_dir, "configurations") | 
| Brad Bishop | c04b3f4 | 2020-05-01 08:17:59 -0400 | [diff] [blame] | 116 |             data = os.walk(configs_dir) | 
 | 117 |             for root, _, files in data: | 
 | 118 |                 for f in files: | 
| Patrick Williams | fa8ee87 | 2022-12-07 07:00:42 -0600 | [diff] [blame] | 119 |                     if f.endswith(".json"): | 
| Brad Bishop | c04b3f4 | 2020-05-01 08:17:59 -0400 | [diff] [blame] | 120 |                         config_files.append(os.path.join(root, f)) | 
| Patrick Williams | cad2d1f | 2022-12-04 14:38:16 -0600 | [diff] [blame] | 121 |         except Exception: | 
| Patrick Williams | f8f6027 | 2025-05-03 01:36:31 -0400 | [diff] [blame] | 122 |             print( | 
 | 123 |                 "Could not guess location of configurations", file=sys.stderr | 
 | 124 |             ) | 
| Brad Bishop | c04b3f4 | 2020-05-01 08:17:59 -0400 | [diff] [blame] | 125 |             sys.exit(2) | 
 | 126 |  | 
 | 127 |     configs = [] | 
 | 128 |     for config_file in config_files: | 
 | 129 |         try: | 
 | 130 |             with open(config_file) as fd: | 
| Potin Lai | 0f3a4d9 | 2023-12-05 00:13:55 +0800 | [diff] [blame] | 131 |                 configs.append(json.loads(remove_c_comments(fd.read()))) | 
| Patrick Williams | cad2d1f | 2022-12-04 14:38:16 -0600 | [diff] [blame] | 132 |         except FileNotFoundError: | 
| Patrick Williams | f8f6027 | 2025-05-03 01:36:31 -0400 | [diff] [blame] | 133 |             print( | 
 | 134 |                 f"Could not parse config file: {config_file}", file=sys.stderr | 
| Patrick Williams | fa8ee87 | 2022-12-07 07:00:42 -0600 | [diff] [blame] | 135 |             ) | 
| Brad Bishop | c04b3f4 | 2020-05-01 08:17:59 -0400 | [diff] [blame] | 136 |             sys.exit(2) | 
 | 137 |  | 
 | 138 |     expected_fails = [] | 
 | 139 |     if args.expected_fails: | 
 | 140 |         try: | 
 | 141 |             with open(args.expected_fails) as fd: | 
 | 142 |                 for line in fd: | 
 | 143 |                     expected_fails.append(line.strip()) | 
| Patrick Williams | cad2d1f | 2022-12-04 14:38:16 -0600 | [diff] [blame] | 144 |         except Exception: | 
| Patrick Williams | f8f6027 | 2025-05-03 01:36:31 -0400 | [diff] [blame] | 145 |             print( | 
 | 146 |                 f"Could not read expected fails file: {args.expected_fails}", | 
 | 147 |                 file=sys.stderr, | 
| Patrick Williams | fa8ee87 | 2022-12-07 07:00:42 -0600 | [diff] [blame] | 148 |             ) | 
| Brad Bishop | c04b3f4 | 2020-05-01 08:17:59 -0400 | [diff] [blame] | 149 |             sys.exit(2) | 
 | 150 |  | 
| Brad Bishop | c04b3f4 | 2020-05-01 08:17:59 -0400 | [diff] [blame] | 151 |     results = { | 
 | 152 |         "invalid": [], | 
 | 153 |         "unexpected_pass": [], | 
 | 154 |     } | 
| Patrick Williams | f8f6027 | 2025-05-03 01:36:31 -0400 | [diff] [blame] | 155 |  | 
 | 156 |     should_continue = getattr(args, "continue") | 
 | 157 |  | 
 | 158 |     with ProcessPoolExecutor(max_workers=args.threads) as executor: | 
 | 159 |         # Submit all validation tasks | 
 | 160 |         config_to_future = {} | 
 | 161 |         for config_file, config in zip(config_files, configs): | 
 | 162 |             filename = os.path.split(config_file)[1] | 
 | 163 |             future = executor.submit( | 
 | 164 |                 validate_single_config, | 
 | 165 |                 args, | 
 | 166 |                 filename, | 
 | 167 |                 config, | 
 | 168 |                 expected_fails, | 
 | 169 |                 schema_file, | 
 | 170 |             ) | 
 | 171 |             config_to_future[config_file] = future | 
 | 172 |  | 
 | 173 |         # Process results as they complete | 
 | 174 |         for config_file, future in config_to_future.items(): | 
 | 175 |             # Wait for the future to complete and get its result | 
 | 176 |             is_invalid, is_unexpected_pass = future.result() | 
 | 177 |             # Update the results with the validation result | 
 | 178 |             filename = os.path.split(config_file)[1] | 
 | 179 |             if is_invalid: | 
 | 180 |                 results["invalid"].append(filename) | 
 | 181 |             if is_unexpected_pass: | 
 | 182 |                 results["unexpected_pass"].append(filename) | 
 | 183 |  | 
 | 184 |             # Stop validation if validation failed unexpectedly and --continue is not set | 
 | 185 |             validation_failed = is_invalid or is_unexpected_pass | 
 | 186 |             if validation_failed and not should_continue: | 
 | 187 |                 executor.shutdown(wait=False, cancel_futures=True) | 
 | 188 |                 break | 
| Brad Bishop | c04b3f4 | 2020-05-01 08:17:59 -0400 | [diff] [blame] | 189 |  | 
 | 190 |     exit_status = 0 | 
 | 191 |     if len(results["invalid"]) + len(results["unexpected_pass"]): | 
 | 192 |         exit_status = 1 | 
 | 193 |         unexpected_pass_suffix = " **" | 
 | 194 |         show_suffix_explanation = False | 
 | 195 |         print("results:") | 
 | 196 |         for f in config_files: | 
 | 197 |             if any([x in f for x in results["unexpected_pass"]]): | 
 | 198 |                 show_suffix_explanation = True | 
| Patrick Williams | f8f6027 | 2025-05-03 01:36:31 -0400 | [diff] [blame] | 199 |                 print(f"  '{f}' passed!{unexpected_pass_suffix}") | 
| Brad Bishop | c04b3f4 | 2020-05-01 08:17:59 -0400 | [diff] [blame] | 200 |             if any([x in f for x in results["invalid"]]): | 
| Patrick Williams | f8f6027 | 2025-05-03 01:36:31 -0400 | [diff] [blame] | 201 |                 print(f"  '{f}' failed!") | 
| Brad Bishop | c04b3f4 | 2020-05-01 08:17:59 -0400 | [diff] [blame] | 202 |  | 
 | 203 |         if show_suffix_explanation: | 
 | 204 |             print("\n** configuration expected to fail") | 
 | 205 |  | 
 | 206 |     sys.exit(exit_status) | 
 | 207 |  | 
 | 208 |  | 
| Alexander Hansen | 46072c4 | 2025-04-11 16:16:07 +0200 | [diff] [blame] | 209 | def validator_from_file(schema_file): | 
| Patrick Williams | 809fbdc | 2025-05-09 10:50:39 -0400 | [diff] [blame] | 210 |     # Get root directory of schema file, so we can walk all the directories | 
 | 211 |     # for referenced schemas. | 
 | 212 |     schema_path = os.path.dirname(schema_file) | 
| Alexander Hansen | 46072c4 | 2025-04-11 16:16:07 +0200 | [diff] [blame] | 213 |  | 
| Patrick Williams | 809fbdc | 2025-05-09 10:50:39 -0400 | [diff] [blame] | 214 |     root_schema = None | 
 | 215 |     registry = referencing.Registry() | 
 | 216 |  | 
 | 217 |     # Pre-load all .json files from the schemas directory and its subdirectories | 
 | 218 |     # into the registry. This allows $refs to resolve to any schema. | 
 | 219 |     for dirpath, _, directory in os.walk(schema_path): | 
 | 220 |         for filename in directory: | 
 | 221 |             if filename.endswith(".json"): | 
 | 222 |                 full_file_path = os.path.join(dirpath, filename) | 
 | 223 |  | 
 | 224 |                 # The URI  is their path relative to schema_path. | 
 | 225 |                 relative_uri = os.path.relpath(full_file_path, schema_path) | 
 | 226 |  | 
 | 227 |                 with open(full_file_path, "r") as fd: | 
 | 228 |                     schema_contents = json.loads(remove_c_comments(fd.read())) | 
 | 229 |                     jsonschema.validators.Draft202012Validator.check_schema( | 
 | 230 |                         schema_contents | 
 | 231 |                     ) | 
 | 232 |  | 
 | 233 |                     # Add to the registry. | 
 | 234 |                     registry = registry.with_resource( | 
 | 235 |                         uri=relative_uri, | 
 | 236 |                         resource=referencing.Resource.from_contents( | 
 | 237 |                             schema_contents, default_specification=DRAFT202012 | 
 | 238 |                         ), | 
 | 239 |                     ) | 
 | 240 |  | 
 | 241 |                     # If this was the schema_file we need to save the contents | 
 | 242 |                     # as the root schema. | 
 | 243 |                     if schema_file == full_file_path: | 
 | 244 |                         root_schema = schema_contents | 
 | 245 |  | 
 | 246 |     # Create the validator instance with the schema content and the configured registry. | 
 | 247 |     validator = jsonschema.validators.Draft202012Validator( | 
 | 248 |         root_schema, registry=registry | 
| Alexander Hansen | 46072c4 | 2025-04-11 16:16:07 +0200 | [diff] [blame] | 249 |     ) | 
| Alexander Hansen | 46072c4 | 2025-04-11 16:16:07 +0200 | [diff] [blame] | 250 |  | 
 | 251 |     return validator | 
 | 252 |  | 
 | 253 |  | 
| Alexander Hansen | a47bdad | 2025-04-11 16:05:28 +0200 | [diff] [blame] | 254 | def validate_single_config( | 
| Patrick Williams | f8f6027 | 2025-05-03 01:36:31 -0400 | [diff] [blame] | 255 |     args, filename, config, expected_fails, schema_file | 
| Alexander Hansen | a47bdad | 2025-04-11 16:05:28 +0200 | [diff] [blame] | 256 | ): | 
| Patrick Williams | f8f6027 | 2025-05-03 01:36:31 -0400 | [diff] [blame] | 257 |     expect_fail = filename in expected_fails | 
 | 258 |  | 
 | 259 |     is_invalid = False | 
 | 260 |     is_unexpected_pass = False | 
 | 261 |  | 
| Alexander Hansen | a47bdad | 2025-04-11 16:05:28 +0200 | [diff] [blame] | 262 |     try: | 
| Patrick Williams | f8f6027 | 2025-05-03 01:36:31 -0400 | [diff] [blame] | 263 |         validator = validator_from_file(schema_file) | 
| Alexander Hansen | a47bdad | 2025-04-11 16:05:28 +0200 | [diff] [blame] | 264 |         validator.validate(config) | 
 | 265 |         if expect_fail: | 
| Patrick Williams | f8f6027 | 2025-05-03 01:36:31 -0400 | [diff] [blame] | 266 |             is_unexpected_pass = True | 
| Alexander Hansen | a47bdad | 2025-04-11 16:05:28 +0200 | [diff] [blame] | 267 |     except jsonschema.exceptions.ValidationError as e: | 
 | 268 |         if not expect_fail: | 
| Patrick Williams | f8f6027 | 2025-05-03 01:36:31 -0400 | [diff] [blame] | 269 |             is_invalid = True | 
| Alexander Hansen | a47bdad | 2025-04-11 16:05:28 +0200 | [diff] [blame] | 270 |             if args.verbose: | 
| Patrick Williams | 809fbdc | 2025-05-09 10:50:39 -0400 | [diff] [blame] | 271 |                 print(f"Validation Error for {filename}: {e}") | 
| Patrick Williams | f8f6027 | 2025-05-03 01:36:31 -0400 | [diff] [blame] | 272 |  | 
 | 273 |     return (is_invalid, is_unexpected_pass) | 
| Alexander Hansen | a47bdad | 2025-04-11 16:05:28 +0200 | [diff] [blame] | 274 |  | 
 | 275 |  | 
| Brad Bishop | c04b3f4 | 2020-05-01 08:17:59 -0400 | [diff] [blame] | 276 | if __name__ == "__main__": | 
 | 277 |     main() |