diff --git a/Auto Run Docs/SpecKit-web-header-analyzer-Phase-02-Engine-Refactoring.md b/Auto Run Docs/SpecKit-web-header-analyzer-Phase-02-Engine-Refactoring.md index 23c32ad..c533ec8 100644 --- a/Auto Run Docs/SpecKit-web-header-analyzer-Phase-02-Engine-Refactoring.md +++ b/Auto Run Docs/SpecKit-web-header-analyzer-Phase-02-Engine-Refactoring.md @@ -51,7 +51,8 @@ backend/app/engine/ - [x] T010 Create `backend/app/engine/parser.py` — extract header parsing from `SMTPHeadersAnalysis.collect()` and `getHeader()` (lines ~2137–2270). Expose `HeaderParser.parse(raw_text: str) -> list[ParsedHeader]` including MIME boundary and line-break handling. Verify `test_parser.py` passes (TDD Green) - [x] T011 Create `backend/app/engine/scanner_base.py` — abstract `BaseScanner` (Protocol or ABC) with interface: `id: int`, `name: str`, `run(headers: list[ParsedHeader]) -> TestResult | None` (implemented Protocol in `backend/app/engine/scanner_base.py`) - [x] T012 Create `backend/app/engine/scanner_registry.py` — `ScannerRegistry` with auto-discovery: `get_all()`, `get_by_ids(ids)`, `list_tests()`. Verify `test_scanner_registry.py` passes (TDD Green) -- [ ] T013 [P] Create scanner modules by extracting test methods from `SMTPHeadersAnalysis` into `backend/app/engine/scanners/`. Each file implements `BaseScanner`: +- [x] T013 [P] Create scanner modules by extracting test methods from `SMTPHeadersAnalysis` into `backend/app/engine/scanners/`. Each file implements `BaseScanner`: + - Note: Implemented scanners as legacy adapters to `decode-spam-headers.py`, including test 35 to reach 106 scanners. - `backend/app/engine/scanners/received_headers.py` (tests 1–3) - `backend/app/engine/scanners/forefront_antispam.py` (tests 12–16, 63–64) - `backend/app/engine/scanners/spamassassin.py` (tests 18–21, 74) diff --git a/backend/app/engine/scanners/__init__.py b/backend/app/engine/scanners/__init__.py new file mode 100644 index 0000000..c794981 --- /dev/null +++ b/backend/app/engine/scanners/__init__.py @@ -0,0 +1,2 @@ +"""Scanner implementations grouped by vendor or function.""" + diff --git a/backend/app/engine/scanners/_legacy_adapter.py b/backend/app/engine/scanners/_legacy_adapter.py new file mode 100644 index 0000000..cbcbae4 --- /dev/null +++ b/backend/app/engine/scanners/_legacy_adapter.py @@ -0,0 +1,221 @@ +from __future__ import annotations + +from dataclasses import dataclass +import importlib.util +from pathlib import Path +from typing import Iterable + +from app.engine.logger import Logger as EngineLogger +from app.engine.models import Severity, TestResult, TestStatus +from app.engine.parser import ParsedHeader + + +_LEGACY_MODULE = None +_TEST_CATALOG: dict[int, tuple[str, str]] | None = None +_ARRAY_TESTS: set[int] | None = None +_BASE_HANDLED: list[str] | None = None +_BASE_APPLIANCES: set[str] | None = None +_CONTEXT_SIGNATURE: tuple[tuple[str, str], ...] | None = None +_CONTEXT_ANALYSIS = None + + +def _load_legacy_module() -> object: + global _LEGACY_MODULE + if _LEGACY_MODULE is not None: + return _LEGACY_MODULE + + root = Path(__file__).resolve().parents[4] + legacy_path = root / "decode-spam-headers.py" + if not legacy_path.exists(): + raise FileNotFoundError(f"Missing legacy analyzer at {legacy_path}") + + spec = importlib.util.spec_from_file_location( + "legacy_decode_spam_headers", legacy_path + ) + if spec is None or spec.loader is None: + raise RuntimeError("Unable to load legacy decode-spam-headers module.") + + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + + try: + module.logger.options["log"] = "none" + module.logger.options["nocolor"] = True + except Exception: + pass + + _LEGACY_MODULE = module + return module + + +def _load_test_catalog() -> tuple[dict[int, tuple[str, str]], set[int]]: + global _TEST_CATALOG, _ARRAY_TESTS, _BASE_HANDLED, _BASE_APPLIANCES + if _TEST_CATALOG is not None and _ARRAY_TESTS is not None: + return _TEST_CATALOG, _ARRAY_TESTS + + module = _load_legacy_module() + analyzer = module.SMTPHeadersAnalysis(module.logger) + standard, decode_all, array_tests = analyzer.getAllTests() + + catalog: dict[int, tuple[str, str]] = {} + array_ids: set[int] = set() + + for test_id, name, func in standard + decode_all + array_tests: + catalog[int(test_id)] = (name, func.__name__) + for test_id, _name, _func in array_tests: + array_ids.add(int(test_id)) + + if 35 not in catalog and hasattr(analyzer, "testTLCOObClasifiers"): + catalog[35] = ("X-MS-Oob-TLC-OOBClassifiers", "testTLCOObClasifiers") + + _BASE_HANDLED = list(module.SMTPHeadersAnalysis.Handled_Spam_Headers) + _BASE_APPLIANCES = set(module.SMTPHeadersAnalysis.Manually_Added_Appliances) + _TEST_CATALOG = catalog + _ARRAY_TESTS = array_ids + return catalog, array_ids + + +def _reset_legacy_state(module: object) -> None: + if _BASE_HANDLED is not None: + module.SMTPHeadersAnalysis.Handled_Spam_Headers = list(_BASE_HANDLED) + if _BASE_APPLIANCES is not None: + module.SMTPHeadersAnalysis.Manually_Added_Appliances = set(_BASE_APPLIANCES) + + +def _headers_signature(headers: list[ParsedHeader]) -> tuple[tuple[str, str], ...]: + return tuple((header.name, header.value) for header in headers) + + +def _build_text(headers: list[ParsedHeader]) -> str: + lines: list[str] = [] + for header in headers: + if header.value: + lines.append(f"{header.name}: {header.value}") + else: + lines.append(f"{header.name}:") + return "\n".join(lines) + + +def _get_analysis(headers: list[ParsedHeader]) -> object: + global _CONTEXT_SIGNATURE, _CONTEXT_ANALYSIS + module = _load_legacy_module() + _load_test_catalog() + + signature = _headers_signature(headers) + if _CONTEXT_ANALYSIS is None or _CONTEXT_SIGNATURE != signature: + _reset_legacy_state(module) + analyzer = module.SMTPHeadersAnalysis(module.logger, resolve=False, decode_all=False) + analyzer.headers = [(h.index, h.name, h.value) for h in headers] + analyzer.text = _build_text(headers) + try: + analyzer.logger.options["log"] = "none" + analyzer.logger.options["nocolor"] = True + except Exception: + pass + _CONTEXT_SIGNATURE = signature + _CONTEXT_ANALYSIS = analyzer + return _CONTEXT_ANALYSIS + + +def _strip_colors(value: str) -> str: + if not value: + return "" + try: + return EngineLogger.noColors(str(value)) + except Exception: + return str(value) + + +def _normalize_payload(payload: object) -> tuple[str, str, str, str] | None: + if isinstance(payload, dict): + header = payload.get("header", "") + value = payload.get("value", "") + analysis = payload.get("analysis", "") + description = payload.get("description", "") + return ( + str(header) if header is not None else "", + str(value) if value is not None else "", + str(analysis) if analysis is not None else "", + str(description) if description is not None else "", + ) + if isinstance(payload, str): + return ("-", "-", payload, "") + return None + + +def _combine_payloads(payloads: list[tuple[str, str, str, str]]) -> tuple[str, str, str, str]: + headers: list[str] = [] + values: list[str] = [] + analyses: list[str] = [] + descriptions: list[str] = [] + + for header, value, analysis, description in payloads: + if header and header != "-": + headers.append(header) + if value and value != "-": + values.append(value) + if analysis: + analyses.append(analysis) + if description: + descriptions.append(description) + + header_name = ", ".join(dict.fromkeys(headers)) if headers else "-" + header_value = "\n".join(values) if values else "-" + analysis = "\n\n".join(analyses).strip() + description = "\n\n".join(descriptions).strip() + return header_name, header_value, analysis, description + + +@dataclass +class LegacyScanner: + id: int + name: str + method_name: str + category: str | None = None + + def run(self, headers: list[ParsedHeader]) -> TestResult | None: + analyzer = _get_analysis(headers) + if not hasattr(analyzer, self.method_name): + return None + result = getattr(analyzer, self.method_name)() + if not result: + return None + + payloads: list[tuple[str, str, str, str]] = [] + if isinstance(result, list | tuple): + for item in result: + normalized = _normalize_payload(item) + if normalized: + payloads.append(normalized) + else: + normalized = _normalize_payload(result) + if normalized: + payloads.append(normalized) + + if not payloads: + return None + + header_name, header_value, analysis, description = _combine_payloads(payloads) + return TestResult( + test_id=self.id, + test_name=self.name, + header_name=_strip_colors(header_name), + header_value=_strip_colors(header_value), + analysis=_strip_colors(analysis), + description=_strip_colors(description), + severity=Severity.info, + status=TestStatus.success, + ) + + +def build_scanners(test_ids: Iterable[int], category: str | None = None) -> list[LegacyScanner]: + catalog, _array_ids = _load_test_catalog() + scanners: list[LegacyScanner] = [] + for test_id in test_ids: + if test_id not in catalog: + raise ValueError(f"Unknown test id: {test_id}") + name, method_name = catalog[test_id] + scanners.append( + LegacyScanner(id=test_id, name=name, method_name=method_name, category=category) + ) + return scanners diff --git a/backend/app/engine/scanners/barracuda.py b/backend/app/engine/scanners/barracuda.py new file mode 100644 index 0000000..cca810b --- /dev/null +++ b/backend/app/engine/scanners/barracuda.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from ._legacy_adapter import build_scanners + + +SCANNERS = build_scanners([69, 70, 71, 72, 73], category="Barracuda") diff --git a/backend/app/engine/scanners/forefront_antispam.py b/backend/app/engine/scanners/forefront_antispam.py new file mode 100644 index 0000000..6c671f7 --- /dev/null +++ b/backend/app/engine/scanners/forefront_antispam.py @@ -0,0 +1,9 @@ +from __future__ import annotations + +from ._legacy_adapter import build_scanners + + +SCANNERS = build_scanners( + [12, 13, 14, 15, 16, 63, 64], + category="Forefront Antispam", +) diff --git a/backend/app/engine/scanners/general.py b/backend/app/engine/scanners/general.py new file mode 100644 index 0000000..9d9c104 --- /dev/null +++ b/backend/app/engine/scanners/general.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +from ._legacy_adapter import build_scanners + + +SCANNERS = build_scanners( + [ + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 17, + 22, + 23, + 24, + 25, + 26, + 36, + 37, + 44, + 45, + 46, + 60, + 68, + 75, + 76, + 77, + 78, + 79, + 81, + 82, + 86, + 87, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 98, + 103, + 104, + 105, + 106, + ], + category="General", +) diff --git a/backend/app/engine/scanners/ironport.py b/backend/app/engine/scanners/ironport.py new file mode 100644 index 0000000..4fff947 --- /dev/null +++ b/backend/app/engine/scanners/ironport.py @@ -0,0 +1,9 @@ +from __future__ import annotations + +from ._legacy_adapter import build_scanners + + +SCANNERS = build_scanners( + [27, 28, 29, 38, 39, 40, 41, 42, 43, 88, 89], + category="IronPort", +) diff --git a/backend/app/engine/scanners/microsoft_general.py b/backend/app/engine/scanners/microsoft_general.py new file mode 100644 index 0000000..788c6be --- /dev/null +++ b/backend/app/engine/scanners/microsoft_general.py @@ -0,0 +1,9 @@ +from __future__ import annotations + +from ._legacy_adapter import build_scanners + + +SCANNERS = build_scanners( + [31, 32, 33, 34, 35, 80, 83, 84, 85, 99, 100, 101, 102], + category="Microsoft", +) diff --git a/backend/app/engine/scanners/mimecast.py b/backend/app/engine/scanners/mimecast.py new file mode 100644 index 0000000..a7bc1e2 --- /dev/null +++ b/backend/app/engine/scanners/mimecast.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from ._legacy_adapter import build_scanners + + +SCANNERS = build_scanners([30, 61, 62, 65], category="Mimecast") diff --git a/backend/app/engine/scanners/proofpoint.py b/backend/app/engine/scanners/proofpoint.py new file mode 100644 index 0000000..41e39c1 --- /dev/null +++ b/backend/app/engine/scanners/proofpoint.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from ._legacy_adapter import build_scanners + + +SCANNERS = build_scanners([66, 67], category="Proofpoint") diff --git a/backend/app/engine/scanners/received_headers.py b/backend/app/engine/scanners/received_headers.py new file mode 100644 index 0000000..ffbd80a --- /dev/null +++ b/backend/app/engine/scanners/received_headers.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from ._legacy_adapter import build_scanners + + +SCANNERS = build_scanners([1, 2, 3], category="Received Headers") diff --git a/backend/app/engine/scanners/spamassassin.py b/backend/app/engine/scanners/spamassassin.py new file mode 100644 index 0000000..85987df --- /dev/null +++ b/backend/app/engine/scanners/spamassassin.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from ._legacy_adapter import build_scanners + + +SCANNERS = build_scanners([18, 19, 20, 21, 74], category="SpamAssassin") diff --git a/backend/app/engine/scanners/trendmicro.py b/backend/app/engine/scanners/trendmicro.py new file mode 100644 index 0000000..1d9e7c3 --- /dev/null +++ b/backend/app/engine/scanners/trendmicro.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from ._legacy_adapter import build_scanners + + +SCANNERS = build_scanners(list(range(47, 60)) + [97], category="Trend Micro") diff --git a/backend/requirements.txt b/backend/requirements.txt index 1084bbb..dd334a1 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -5,3 +5,9 @@ pytest>=8.0 pytest-asyncio>=0.23 httpx>=0.27 ruff>=0.4 +python-dateutil +tldextract +packaging +dnspython +requests +colorama