From 53a0fa0784fa01eee6d17391a43c074ee728142c Mon Sep 17 00:00:00 2001 From: Mariusz Banach Date: Wed, 18 Feb 2026 00:18:29 +0100 Subject: [PATCH] MAESTRO: align legacy adapter with CLI output --- ...er-analyzer-Phase-02-Engine-Refactoring.md | 2 +- backend/app/engine/analyzer.py | 6 + .../app/engine/scanners/_legacy_adapter.py | 116 +++++++++++++++--- backend/tests/engine/test_cli_parity.py | 78 ++++++++++++ 4 files changed, 181 insertions(+), 21 deletions(-) create mode 100644 backend/tests/engine/test_cli_parity.py diff --git a/Auto Run Docs/SpecKit-web-header-analyzer-Phase-02-Engine-Refactoring.md b/Auto Run Docs/SpecKit-web-header-analyzer-Phase-02-Engine-Refactoring.md index fda0ef4..ca72159 100644 --- a/Auto Run Docs/SpecKit-web-header-analyzer-Phase-02-Engine-Refactoring.md +++ b/Auto Run Docs/SpecKit-web-header-analyzer-Phase-02-Engine-Refactoring.md @@ -70,6 +70,6 @@ backend/app/engine/ - [x] `pytest backend/tests/engine/` passes with all tests green - [x] All 106+ tests are registered in the scanner registry (`ScannerRegistry.get_all()` returns 106+ scanners) - Verified `ScannerRegistry.get_all()` returns 106 scanners (IDs 1-106, none missing). -- [ ] Analysis of `backend/tests/fixtures/sample_headers.txt` produces results matching original CLI output +- [x] Analysis of `backend/tests/fixtures/sample_headers.txt` produces results matching original CLI output - [ ] `ruff check backend/` passes with zero errors - [ ] Run `/speckit.analyze` to verify consistency diff --git a/backend/app/engine/analyzer.py b/backend/app/engine/analyzer.py index d5b6a61..8c3742a 100644 --- a/backend/app/engine/analyzer.py +++ b/backend/app/engine/analyzer.py @@ -8,6 +8,7 @@ from .models import AnalysisRequest, AnalysisResult, ReportMetadata, Severity, T from .parser import HeaderParser, ParsedHeader from .scanner_base import BaseScanner from .scanner_registry import ScannerRegistry +from .scanners._legacy_adapter import configure_legacy ProgressCallback = Callable[[int, int, str], None] @@ -34,6 +35,11 @@ class HeaderAnalyzer: progress_callback: ProgressCallback | None = None, ) -> AnalysisResult: start = perf_counter() + configure_legacy( + resolve=request.config.resolve, + decode_all=request.config.decode_all, + include_unusual=True, + ) headers = self._parser.parse(request.headers) scanners = self._select_scanners(request) total_tests = len(scanners) diff --git a/backend/app/engine/scanners/_legacy_adapter.py b/backend/app/engine/scanners/_legacy_adapter.py index cbcbae4..bc32023 100644 --- a/backend/app/engine/scanners/_legacy_adapter.py +++ b/backend/app/engine/scanners/_legacy_adapter.py @@ -16,8 +16,23 @@ _ARRAY_TESTS: set[int] | None = None _BASE_HANDLED: list[str] | None = None _BASE_APPLIANCES: set[str] | None = None _CONTEXT_SIGNATURE: tuple[tuple[str, str], ...] | None = None +_CONTEXT_CONFIG: tuple[bool, bool, bool] | None = None _CONTEXT_ANALYSIS = None +_LEGACY_CONFIG = { + "resolve": False, + "decode_all": False, + "include_unusual": True, +} + + +def configure_legacy( + *, resolve: bool, decode_all: bool, include_unusual: bool = True +) -> None: + _LEGACY_CONFIG["resolve"] = bool(resolve) + _LEGACY_CONFIG["decode_all"] = bool(decode_all) + _LEGACY_CONFIG["include_unusual"] = bool(include_unusual) + def _load_legacy_module() -> object: global _LEGACY_MODULE @@ -38,11 +53,7 @@ def _load_legacy_module() -> object: module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) - try: - module.logger.options["log"] = "none" - module.logger.options["nocolor"] = True - except Exception: - pass + _apply_legacy_options(module) _LEGACY_MODULE = module return module @@ -75,6 +86,26 @@ def _load_test_catalog() -> tuple[dict[int, tuple[str, str]], set[int]]: return catalog, array_ids +def _apply_legacy_options(module: object) -> None: + options = getattr(module, "options", None) + if isinstance(options, dict): + options["dont_resolve"] = not _LEGACY_CONFIG["resolve"] + options["nocolor"] = True + options["format"] = "json" + options["log"] = "none" + options["debug"] = False + options["verbose"] = False + + try: + module.logger.options["log"] = "none" + module.logger.options["nocolor"] = True + module.logger.options["format"] = "json" + module.logger.options["debug"] = False + module.logger.options["verbose"] = False + except Exception: + pass + + def _reset_legacy_state(module: object) -> None: if _BASE_HANDLED is not None: module.SMTPHeadersAnalysis.Handled_Spam_Headers = list(_BASE_HANDLED) @@ -97,22 +128,37 @@ def _build_text(headers: list[ParsedHeader]) -> str: def _get_analysis(headers: list[ParsedHeader]) -> object: - global _CONTEXT_SIGNATURE, _CONTEXT_ANALYSIS + global _CONTEXT_SIGNATURE, _CONTEXT_ANALYSIS, _CONTEXT_CONFIG module = _load_legacy_module() - _load_test_catalog() + catalog, _array_ids = _load_test_catalog() + _apply_legacy_options(module) + + tests_to_run = sorted(catalog.keys()) + config_signature = ( + _LEGACY_CONFIG["resolve"], + _LEGACY_CONFIG["decode_all"], + _LEGACY_CONFIG["include_unusual"], + ) signature = _headers_signature(headers) - if _CONTEXT_ANALYSIS is None or _CONTEXT_SIGNATURE != signature: + if ( + _CONTEXT_ANALYSIS is None + or _CONTEXT_SIGNATURE != signature + or _CONTEXT_CONFIG != config_signature + ): _reset_legacy_state(module) - analyzer = module.SMTPHeadersAnalysis(module.logger, resolve=False, decode_all=False) + analyzer = module.SMTPHeadersAnalysis( + module.logger, + resolve=_LEGACY_CONFIG["resolve"], + decode_all=_LEGACY_CONFIG["decode_all"], + testsToRun=tests_to_run, + includeUnusual=_LEGACY_CONFIG["include_unusual"], + ) analyzer.headers = [(h.index, h.name, h.value) for h in headers] analyzer.text = _build_text(headers) - try: - analyzer.logger.options["log"] = "none" - analyzer.logger.options["nocolor"] = True - except Exception: - pass + _apply_legacy_options(module) _CONTEXT_SIGNATURE = signature + _CONTEXT_CONFIG = config_signature _CONTEXT_ANALYSIS = analyzer return _CONTEXT_ANALYSIS @@ -148,21 +194,51 @@ def _combine_payloads(payloads: list[tuple[str, str, str, str]]) -> tuple[str, s values: list[str] = [] analyses: list[str] = [] descriptions: list[str] = [] + saw_header_dash = False + saw_value_dash = False + saw_header_empty = False + saw_value_empty = False for header, value, analysis, description in payloads: - if header and header != "-": + if header == "-": + saw_header_dash = True + elif header: headers.append(header) - if value and value != "-": + else: + saw_header_empty = True + + if value == "-": + saw_value_dash = True + elif value: values.append(value) + else: + saw_value_empty = True + if analysis: analyses.append(analysis) if description: descriptions.append(description) - header_name = ", ".join(dict.fromkeys(headers)) if headers else "-" - header_value = "\n".join(values) if values else "-" - analysis = "\n\n".join(analyses).strip() - description = "\n\n".join(descriptions).strip() + if headers: + header_name = ", ".join(dict.fromkeys(headers)) + elif saw_header_dash: + header_name = "-" + elif saw_header_empty: + header_name = "" + else: + header_name = "-" + + if values: + header_value = "\n".join(values) + elif saw_value_dash: + header_value = "-" + elif saw_value_empty: + header_value = "" + else: + header_value = "-" + + analysis = "\n\n".join(analyses) + description = "\n\n".join(descriptions) return header_name, header_value, analysis, description diff --git a/backend/tests/engine/test_cli_parity.py b/backend/tests/engine/test_cli_parity.py new file mode 100644 index 0000000..8b6b7f3 --- /dev/null +++ b/backend/tests/engine/test_cli_parity.py @@ -0,0 +1,78 @@ +from __future__ import annotations + +import importlib.util +from pathlib import Path + +from app.engine.analyzer import HeaderAnalyzer +from app.engine.logger import Logger as EngineLogger +from app.engine.models import AnalysisRequest, TestStatus + + +FIXTURES_DIR = Path(__file__).resolve().parents[1] / "fixtures" + + +def _load_legacy_module() -> object: + legacy_path = Path(__file__).resolve().parents[3] / "decode-spam-headers.py" + spec = importlib.util.spec_from_file_location( + "legacy_decode_spam_headers", legacy_path + ) + if spec is None or spec.loader is None: + raise RuntimeError("Unable to load legacy decode-spam-headers module.") + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +def _legacy_output(raw_headers: str) -> dict[str, dict[str, str]]: + module = _load_legacy_module() + module.options["dont_resolve"] = True + module.options["nocolor"] = True + module.options["format"] = "json" + module.options["log"] = "none" + module.options["debug"] = False + module.options["verbose"] = False + module.logger = module.Logger(module.options) + + base = module.SMTPHeadersAnalysis(module.logger, False, False, [], True) + standard, decode_all, array_tests = base.getAllTests() + max_test = max(int(test[0]) for test in (standard + decode_all + array_tests)) + tests_to_run = list(range(max_test + 5)) + + analyzer = module.SMTPHeadersAnalysis( + module.logger, False, False, tests_to_run, True + ) + output = analyzer.parse(raw_headers) + for payload in output.values(): + for key in ("header", "value", "analysis", "description"): + if key in payload and isinstance(payload[key], str): + payload[key] = EngineLogger.noColors(payload[key]) + return output + + +def _engine_output(raw_headers: str) -> dict[str, dict[str, str]]: + analyzer = HeaderAnalyzer() + result = analyzer.analyze( + AnalysisRequest( + headers=raw_headers, + config={ + "resolve": False, + "decode_all": False, + "test_ids": [], + }, + ) + ) + return { + item.test_name: { + "header": item.header_name, + "value": item.header_value, + "analysis": item.analysis, + "description": item.description, + } + for item in result.results + if item.status == TestStatus.success + } + + +def test_engine_matches_legacy_cli_output_for_sample_headers() -> None: + raw_headers = (FIXTURES_DIR / "sample_headers.txt").read_text(encoding="utf-8") + assert _engine_output(raw_headers) == _legacy_output(raw_headers)