From 53a0fa0784fa01eee6d17391a43c074ee728142c Mon Sep 17 00:00:00 2001
From: Mariusz Banach <mariusz@outflank.nl>
Date: Wed, 18 Feb 2026 00:18:29 +0100
Subject: [PATCH] MAESTRO: align legacy adapter with CLI output

---
 ...er-analyzer-Phase-02-Engine-Refactoring.md |   2 +-
 backend/app/engine/analyzer.py                |   6 +
 .../app/engine/scanners/_legacy_adapter.py    | 116 +++++++++++++++---
 backend/tests/engine/test_cli_parity.py       |  78 ++++++++++++
 4 files changed, 181 insertions(+), 21 deletions(-)
 create mode 100644 backend/tests/engine/test_cli_parity.py

diff --git a/Auto Run Docs/SpecKit-web-header-analyzer-Phase-02-Engine-Refactoring.md b/Auto Run Docs/SpecKit-web-header-analyzer-Phase-02-Engine-Refactoring.md
index fda0ef4..ca72159 100644
--- a/Auto Run Docs/SpecKit-web-header-analyzer-Phase-02-Engine-Refactoring.md	
+++ b/Auto Run Docs/SpecKit-web-header-analyzer-Phase-02-Engine-Refactoring.md	
@@ -70,6 +70,6 @@ backend/app/engine/
 - [x] `pytest backend/tests/engine/` passes with all tests green
 - [x] All 106+ tests are registered in the scanner registry (`ScannerRegistry.get_all()` returns 106+ scanners)
   - Verified `ScannerRegistry.get_all()` returns 106 scanners (IDs 1-106, none missing).
-- [ ] Analysis of `backend/tests/fixtures/sample_headers.txt` produces results matching original CLI output
+- [x] Analysis of `backend/tests/fixtures/sample_headers.txt` produces results matching original CLI output
 - [ ] `ruff check backend/` passes with zero errors
 - [ ] Run `/speckit.analyze` to verify consistency
diff --git a/backend/app/engine/analyzer.py b/backend/app/engine/analyzer.py
index d5b6a61..8c3742a 100644
--- a/backend/app/engine/analyzer.py
+++ b/backend/app/engine/analyzer.py
@@ -8,6 +8,7 @@ from .models import AnalysisRequest, AnalysisResult, ReportMetadata, Severity, T
 from .parser import HeaderParser, ParsedHeader
 from .scanner_base import BaseScanner
 from .scanner_registry import ScannerRegistry
+from .scanners._legacy_adapter import configure_legacy
 
 
 ProgressCallback = Callable[[int, int, str], None]
@@ -34,6 +35,11 @@ class HeaderAnalyzer:
         progress_callback: ProgressCallback | None = None,
     ) -> AnalysisResult:
         start = perf_counter()
+        configure_legacy(
+            resolve=request.config.resolve,
+            decode_all=request.config.decode_all,
+            include_unusual=True,
+        )
         headers = self._parser.parse(request.headers)
         scanners = self._select_scanners(request)
         total_tests = len(scanners)
diff --git a/backend/app/engine/scanners/_legacy_adapter.py b/backend/app/engine/scanners/_legacy_adapter.py
index cbcbae4..bc32023 100644
--- a/backend/app/engine/scanners/_legacy_adapter.py
+++ b/backend/app/engine/scanners/_legacy_adapter.py
@@ -16,8 +16,23 @@ _ARRAY_TESTS: set[int] | None = None
 _BASE_HANDLED: list[str] | None = None
 _BASE_APPLIANCES: set[str] | None = None
 _CONTEXT_SIGNATURE: tuple[tuple[str, str], ...] | None = None
+_CONTEXT_CONFIG: tuple[bool, bool, bool] | None = None
 _CONTEXT_ANALYSIS = None
 
+_LEGACY_CONFIG = {
+    "resolve": False,
+    "decode_all": False,
+    "include_unusual": True,
+}
+
+
+def configure_legacy(
+    *, resolve: bool, decode_all: bool, include_unusual: bool = True
+) -> None:
+    _LEGACY_CONFIG["resolve"] = bool(resolve)
+    _LEGACY_CONFIG["decode_all"] = bool(decode_all)
+    _LEGACY_CONFIG["include_unusual"] = bool(include_unusual)
+
 
 def _load_legacy_module() -> object:
     global _LEGACY_MODULE
@@ -38,11 +53,7 @@ def _load_legacy_module() -> object:
     module = importlib.util.module_from_spec(spec)
     spec.loader.exec_module(module)
 
-    try:
-        module.logger.options["log"] = "none"
-        module.logger.options["nocolor"] = True
-    except Exception:
-        pass
+    _apply_legacy_options(module)
 
     _LEGACY_MODULE = module
     return module
@@ -75,6 +86,26 @@ def _load_test_catalog() -> tuple[dict[int, tuple[str, str]], set[int]]:
     return catalog, array_ids
 
 
+def _apply_legacy_options(module: object) -> None:
+    options = getattr(module, "options", None)
+    if isinstance(options, dict):
+        options["dont_resolve"] = not _LEGACY_CONFIG["resolve"]
+        options["nocolor"] = True
+        options["format"] = "json"
+        options["log"] = "none"
+        options["debug"] = False
+        options["verbose"] = False
+
+    try:
+        module.logger.options["log"] = "none"
+        module.logger.options["nocolor"] = True
+        module.logger.options["format"] = "json"
+        module.logger.options["debug"] = False
+        module.logger.options["verbose"] = False
+    except Exception:
+        pass
+
+
 def _reset_legacy_state(module: object) -> None:
     if _BASE_HANDLED is not None:
         module.SMTPHeadersAnalysis.Handled_Spam_Headers = list(_BASE_HANDLED)
@@ -97,22 +128,37 @@ def _build_text(headers: list[ParsedHeader]) -> str:
 
 
 def _get_analysis(headers: list[ParsedHeader]) -> object:
-    global _CONTEXT_SIGNATURE, _CONTEXT_ANALYSIS
+    global _CONTEXT_SIGNATURE, _CONTEXT_ANALYSIS, _CONTEXT_CONFIG
     module = _load_legacy_module()
-    _load_test_catalog()
+    catalog, _array_ids = _load_test_catalog()
+    _apply_legacy_options(module)
+
+    tests_to_run = sorted(catalog.keys())
+    config_signature = (
+        _LEGACY_CONFIG["resolve"],
+        _LEGACY_CONFIG["decode_all"],
+        _LEGACY_CONFIG["include_unusual"],
+    )
 
     signature = _headers_signature(headers)
-    if _CONTEXT_ANALYSIS is None or _CONTEXT_SIGNATURE != signature:
+    if (
+        _CONTEXT_ANALYSIS is None
+        or _CONTEXT_SIGNATURE != signature
+        or _CONTEXT_CONFIG != config_signature
+    ):
         _reset_legacy_state(module)
-        analyzer = module.SMTPHeadersAnalysis(module.logger, resolve=False, decode_all=False)
+        analyzer = module.SMTPHeadersAnalysis(
+            module.logger,
+            resolve=_LEGACY_CONFIG["resolve"],
+            decode_all=_LEGACY_CONFIG["decode_all"],
+            testsToRun=tests_to_run,
+            includeUnusual=_LEGACY_CONFIG["include_unusual"],
+        )
         analyzer.headers = [(h.index, h.name, h.value) for h in headers]
         analyzer.text = _build_text(headers)
-        try:
-            analyzer.logger.options["log"] = "none"
-            analyzer.logger.options["nocolor"] = True
-        except Exception:
-            pass
+        _apply_legacy_options(module)
         _CONTEXT_SIGNATURE = signature
+        _CONTEXT_CONFIG = config_signature
         _CONTEXT_ANALYSIS = analyzer
     return _CONTEXT_ANALYSIS
 
@@ -148,21 +194,51 @@ def _combine_payloads(payloads: list[tuple[str, str, str, str]]) -> tuple[str, s
     values: list[str] = []
     analyses: list[str] = []
     descriptions: list[str] = []
+    saw_header_dash = False
+    saw_value_dash = False
+    saw_header_empty = False
+    saw_value_empty = False
 
     for header, value, analysis, description in payloads:
-        if header and header != "-":
+        if header == "-":
+            saw_header_dash = True
+        elif header:
             headers.append(header)
-        if value and value != "-":
+        else:
+            saw_header_empty = True
+
+        if value == "-":
+            saw_value_dash = True
+        elif value:
             values.append(value)
+        else:
+            saw_value_empty = True
+
         if analysis:
             analyses.append(analysis)
         if description:
             descriptions.append(description)
 
-    header_name = ", ".join(dict.fromkeys(headers)) if headers else "-"
-    header_value = "\n".join(values) if values else "-"
-    analysis = "\n\n".join(analyses).strip()
-    description = "\n\n".join(descriptions).strip()
+    if headers:
+        header_name = ", ".join(dict.fromkeys(headers))
+    elif saw_header_dash:
+        header_name = "-"
+    elif saw_header_empty:
+        header_name = ""
+    else:
+        header_name = "-"
+
+    if values:
+        header_value = "\n".join(values)
+    elif saw_value_dash:
+        header_value = "-"
+    elif saw_value_empty:
+        header_value = ""
+    else:
+        header_value = "-"
+
+    analysis = "\n\n".join(analyses)
+    description = "\n\n".join(descriptions)
     return header_name, header_value, analysis, description
 
 
diff --git a/backend/tests/engine/test_cli_parity.py b/backend/tests/engine/test_cli_parity.py
new file mode 100644
index 0000000..8b6b7f3
--- /dev/null
+++ b/backend/tests/engine/test_cli_parity.py
@@ -0,0 +1,78 @@
+from __future__ import annotations
+
+import importlib.util
+from pathlib import Path
+
+from app.engine.analyzer import HeaderAnalyzer
+from app.engine.logger import Logger as EngineLogger
+from app.engine.models import AnalysisRequest, TestStatus
+
+
+FIXTURES_DIR = Path(__file__).resolve().parents[1] / "fixtures"
+
+
+def _load_legacy_module() -> object:
+    legacy_path = Path(__file__).resolve().parents[3] / "decode-spam-headers.py"
+    spec = importlib.util.spec_from_file_location(
+        "legacy_decode_spam_headers", legacy_path
+    )
+    if spec is None or spec.loader is None:
+        raise RuntimeError("Unable to load legacy decode-spam-headers module.")
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module
+
+
+def _legacy_output(raw_headers: str) -> dict[str, dict[str, str]]:
+    module = _load_legacy_module()
+    module.options["dont_resolve"] = True
+    module.options["nocolor"] = True
+    module.options["format"] = "json"
+    module.options["log"] = "none"
+    module.options["debug"] = False
+    module.options["verbose"] = False
+    module.logger = module.Logger(module.options)
+
+    base = module.SMTPHeadersAnalysis(module.logger, False, False, [], True)
+    standard, decode_all, array_tests = base.getAllTests()
+    max_test = max(int(test[0]) for test in (standard + decode_all + array_tests))
+    tests_to_run = list(range(max_test + 5))
+
+    analyzer = module.SMTPHeadersAnalysis(
+        module.logger, False, False, tests_to_run, True
+    )
+    output = analyzer.parse(raw_headers)
+    for payload in output.values():
+        for key in ("header", "value", "analysis", "description"):
+            if key in payload and isinstance(payload[key], str):
+                payload[key] = EngineLogger.noColors(payload[key])
+    return output
+
+
+def _engine_output(raw_headers: str) -> dict[str, dict[str, str]]:
+    analyzer = HeaderAnalyzer()
+    result = analyzer.analyze(
+        AnalysisRequest(
+            headers=raw_headers,
+            config={
+                "resolve": False,
+                "decode_all": False,
+                "test_ids": [],
+            },
+        )
+    )
+    return {
+        item.test_name: {
+            "header": item.header_name,
+            "value": item.header_value,
+            "analysis": item.analysis,
+            "description": item.description,
+        }
+        for item in result.results
+        if item.status == TestStatus.success
+    }
+
+
+def test_engine_matches_legacy_cli_output_for_sample_headers() -> None:
+    raw_headers = (FIXTURES_DIR / "sample_headers.txt").read_text(encoding="utf-8")
+    assert _engine_output(raw_headers) == _legacy_output(raw_headers)