diff --git a/.gitignore b/.gitignore index ec12c43..8b33c13 100644 --- a/.gitignore +++ b/.gitignore @@ -44,7 +44,7 @@ htmlcov/ .coverage.* .cache nosetests.xml -coverage.xml +coverage.* *.cover *.py,cover .hypothesis/ @@ -159,6 +159,9 @@ cython_debug/ # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ +# VSCode +.vscode/ + # uv uv.lock diff --git a/README.md b/README.md index 32ec87a..74de9da 100644 --- a/README.md +++ b/README.md @@ -6,9 +6,125 @@ An LSP for Docassemble YAML Interviews ```bash pip install . -python3 -m dayamlchecker `find . -name "*.yml" -path "*/questions/*" snot -path "*/.venv/*" -not -path "*/build/*"` # i.e. a space separated list of files +dayaml check # defaults to the current project; usually scans ./docassemble +dayaml format # defaults to the current project; usually scans ./docassemble +dayaml check path/to/yaml-or-dir +dayaml check --show-experimental path/to/yaml-or-dir +dayaml check --ignore-codes E410,E301 path/to/yaml-or-dir +dayaml check --format-on-success --no-url-check path/to/yaml-or-dir +dayaml format path/to/interview.yml + +# Backwards-compatible entry points +python3 -m dayamlchecker # defaults to the current project; usually scans ./docassemble +dayamlchecker # defaults to the current project; usually scans ./docassemble +python3 -m dayamlchecker `find . -name "*.yml" -path "*/questions/*" -not -path "*/.venv/*" -not -path "*/build/*"` # i.e. a space separated list of files +dayamlchecker `find . -name "*.yml" -path "*/questions/*" -not -path "*/.venv/*" -not -path "*/build/*"` +dayamlchecker-fmt path/to/interview.yml ``` +## Message Codes + +Validation output now includes stable message codes in the style of tools like pylint: + +```text +[E301] At interview.yml:12: Keys that shouldn't exist! ['not_a_real_key'] +``` + +Use `dayaml check --show-experimental ...` to include the legacy `REAL ERROR:` +prefix for non-experimental errors. + +Use `dayaml check --ignore-codes E410,E301 ...` to suppress specific +diagnostic codes when you need to waive known findings. + +`dayaml` also reads optional project settings from `pyproject.toml`: + +```toml +[tool.dayaml] +ignore_codes = ["E503", "E410"] +yaml_path = "docassemble" +args = ["--no-url-check"] +``` + +When you pass a project root that contains `pyproject.toml`, `dayaml` scans the +configured `yaml_path` relative to that file. If `yaml_path` is omitted, it +defaults to `docassemble`. + +When you omit file arguments, `dayaml check`, `dayaml format`, `dayamlchecker`, +and `dayamlchecker-fmt` all start from the current working directory. If that +directory contains a `pyproject.toml` (or is inside a project that does), they +then resolve `tool.dayaml.yaml_path` from the nearest project config. In the +common case where `yaml_path` is not set, the practical result is that running +these commands from a project root will scan `./docassemble`. + +`tool.dayaml.args` lets you set checker CLI defaults in the project config. +Those args are applied before the actual command-line args, so an explicit CLI +flag still wins. For example, `args = ["--no-url-check"]` disables URL checks +by default, while `dayaml check --url-check ...` turns them back on for one run. + +`dayaml check --format-on-success ...` validates each file first and then runs +the formatter on files that have no error-severity findings after ignore-code +filtering. This uses the already-read file content in memory, so it avoids a +separate checker-then-formatter pass over the same file. Formatting happens +before the later URL-check phase, so a run can still exit nonzero for URL +errors after formatting changes have already been written. Use +`--no-url-check` with this mode if you want the combined YAML-check-and-format +behavior without the later repository URL scan. + +### Real Errors + +| Code | Meaning | +| --- | --- | +| `E101` | Duplicate YAML key | +| `E102` | YAML parsing error | +| `E103` | Value should be a YAML string | +| `E111` | Invalid Mako syntax | +| `E112` | Mako compile error | +| `E121` | Python code block must be a YAML string | +| `E122` | Python syntax error | +| `E201` | Jinja2 syntax error | +| `E202` | Jinja2 template error | +| `E203` | JavaScript modifier must be a string | +| `E204` | Invalid JavaScript syntax | +| `E205` | JavaScript modifier must contain at least one `val()` call | +| `E206` | `val()` references a field not defined on this screen | +| `E207` | `val()` argument must be a quoted string literal | +| `E301` | Unknown YAML keys | +| `E302` | Malformed `show if` shorthand | +| `E303` | `show if: code` must be a YAML string | +| `E304` | `show if: code` has a Python syntax error | +| `E305` | `show if` dict must include `variable` or `code` | +| `E306` | No recognized block type found | +| `E307` | Block matches too many exclusive interview types | +| `E308` | Interview-order block is missing a matching show/hide guard | +| `E309` | Show/hide visibility logic is nested too deeply | +| `E401` | Python variable reference must be a YAML string | +| `E402` | Python variable reference cannot contain whitespace | +| `E403` | `objects` block must be a list or dict | +| `E404` | `fields: code` must be a YAML string | +| `E405` | Bare `fields` dict has no recognized field or `code` key | +| `E406` | `fields` must be a list or dict | +| `E407` | Field modifier `variable` must be a string | +| `E408` | Field modifier `variable` references a field not defined on this screen | +| `E409` | Field modifier `code` contains another validation error | +| `E410` | `show if: code` references a variable defined on the same screen | +| `E411` | Field modifier dict must include `variable` or `code` | +| `E412` | Field modifier shorthand references a field not defined on this screen | +| `E501` | Combobox widget is not accessible | +| `E502` | Field label is missing on a multi-field screen | +| `E503` | DOCX attachment is missing `tagged pdf` | +| `E504` | Bootstrap theme CSS has low contrast | +| `E505` | Image is missing alt text | +| `E506` | Markdown heading levels skip | +| `E507` | HTML heading levels skip | +| `E508` | Link has no accessible text | +| `E509` | Link text is too generic | + +### Conventions + +| Code | Meaning | +| --- | --- | +| `C101` | `validation code` should call `validation_error()` | + ## WCAG checks The checker includes WCAG-style checks for clear static accessibility failures in interview source. These checks run by default; use `--no-wcag` to disable them. @@ -16,7 +132,7 @@ The checker includes WCAG-style checks for clear static accessibility failures i ```bash python3 -m dayamlchecker path/to/interview.yml # WCAG checks on (default) python3 -m dayamlchecker --no-wcag path/to/interview.yml # WCAG checks off -python3 -m dayamlchecker --accessibility-error-on-widget combobox path/to/interview.yml # opt into combobox failures +python3 -m dayamlchecker --accessibility-error-on-widget combobox path/to/interview.yml # opt into combobox accessibility errors ``` Some accessibility checks are behind runtime options while the rules are still being evaluated. Right now `combobox` failures are default-off and can be enabled with `--accessibility-error-on-widget combobox`. @@ -41,7 +157,7 @@ Optional runtime-gated accessibility checks: - `combobox` usage, including `datatype: combobox` when `--accessibility-error-on-widget combobox` is enabled -Accessibility informational notes are also emitted for likely PDF accessibility issues: +Accessibility errors are also emitted for likely PDF accessibility issues: - DOCX attachments missing `tagged pdf: True` (set this in `features` or on the attachment) diff --git a/pyproject.toml b/pyproject.toml index 0bfb7fd..4269171 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,6 +6,7 @@ readme = "README.md" requires-python = ">=3.12" dependencies = [ "esprima>=4.0.1", + "jinja2>=3.1.6", "mako>=1.3.10", "black>=24.0.0", "ruamel.yaml>=0.18.0", @@ -21,21 +22,46 @@ authors = [ { name="Bryce Willey", email="bryce.steven.willey@gmail.com" }, ] -[tool.setuptools] -package-dir = { "" = "src" } +[dependency-groups] +dev = [ + "mypy>=1.19.1", + "pytest>=9.0.2", + "pytest-cov>=7.1.0", + "pytest-xdist>=3.8.0", + "types-requests>=2.32.0", +] -[tool.setuptools.package-data] -dayamlchecker = ["py.typed"] +[project.scripts] +dayaml = "dayamlchecker.cli:main" +dayamlchecker = "dayamlchecker.yaml_structure:main" +dayamlchecker-fmt = "dayamlchecker.code_formatter:main" [build-system] requires = ["setuptools >= 77.0.3"] build-backend = "setuptools.build_meta" -[project.scripts] -dayamlchecker = "dayamlchecker.yaml_structure:main" -dayamlchecker-fmt = "dayamlchecker.code_formatter:main" - [tool.mypy] mypy_path = "src" exclude = ["^build/"] explicit_package_bases = true + +[tool.setuptools] +package-dir = { "" = "src" } + +[tool.setuptools.package-data] +dayamlchecker = ["py.typed"] + +[tool.coverage.run] + branch = true + omit = [ + "tests/*", + ] + relative_files = true + source = [ + "src", + ] + +[tool.coverage.report] + omit = [ + "tests/*", + ] diff --git a/requirements.txt b/requirements.txt index 0ab72df..237fcdc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ esprima>=4.0.1 +jinja2>=3.0 mako>=1.3.10 black>=24.0.0 ruamel.yaml>=0.18.0 diff --git a/src/dayamlchecker/__main__.py b/src/dayamlchecker/__main__.py index fd1e456..f4d811f 100644 --- a/src/dayamlchecker/__main__.py +++ b/src/dayamlchecker/__main__.py @@ -1,5 +1,4 @@ from .yaml_structure import main -import sys - -sys.exit(main()) +if __name__ == "__main__": # pragma: no cover + raise SystemExit(main()) diff --git a/src/dayamlchecker/_files.py b/src/dayamlchecker/_files.py new file mode 100644 index 0000000..9a17f94 --- /dev/null +++ b/src/dayamlchecker/_files.py @@ -0,0 +1,182 @@ +"""Shared file-collection utilities used by both the formatter and the checker.""" + +from __future__ import annotations + +from dataclasses import dataclass +import os +from pathlib import Path +import shlex +import tomllib + + +@dataclass(frozen=True) +class DayamlProjectConfig: + project_root: Path + yaml_path: Path + ignore_codes: frozenset[str] + cli_args: tuple[str, ...] + + +def _normalize_ignore_codes(raw_codes: object) -> frozenset[str]: + if isinstance(raw_codes, str): + values = raw_codes.split(",") + elif isinstance(raw_codes, (list, tuple)): + values = [] + for item in raw_codes: + if isinstance(item, str): + values.extend(item.split(",")) + else: + return frozenset() + return frozenset(code.strip().upper() for code in values if code.strip()) + + +def _normalize_cli_args(raw_args: object) -> tuple[str, ...]: + if isinstance(raw_args, str): + return tuple(shlex.split(raw_args)) + if isinstance(raw_args, (list, tuple)): + return tuple(item for item in raw_args if isinstance(item, str)) + return () + + +def _load_dayaml_project_config(project_dir: Path) -> DayamlProjectConfig | None: + pyproject_path = project_dir / "pyproject.toml" + if not pyproject_path.is_file(): + return None + + with pyproject_path.open("rb") as stream: + pyproject = tomllib.load(stream) + + tool_section = pyproject.get("tool") + dayaml_section = ( + tool_section.get("dayaml", {}) if isinstance(tool_section, dict) else {} + ) + if not isinstance(dayaml_section, dict): + dayaml_section = {} + + yaml_path_value = dayaml_section.get("yaml_path", "docassemble") + yaml_path = Path("docassemble") + if isinstance(yaml_path_value, str) and yaml_path_value.strip(): + yaml_path = Path(yaml_path_value) + if not yaml_path.is_absolute(): + yaml_path = project_dir / yaml_path + + return DayamlProjectConfig( + project_root=project_dir, + yaml_path=yaml_path, + ignore_codes=_normalize_ignore_codes(dayaml_section.get("ignore_codes", ())), + cli_args=( + _normalize_cli_args(dayaml_section.get("args", ())) + + _normalize_cli_args(dayaml_section.get("check_args", ())) + ), + ) + + +def _find_nearest_pyproject_dir(path: Path) -> Path | None: + candidate = path if path.is_dir() else path.parent + for directory in (candidate, *candidate.parents): + if (directory / "pyproject.toml").is_file(): + return directory + return None + + +def _collect_dayaml_ignore_codes(paths: list[Path]) -> frozenset[str]: + ignore_codes: set[str] = set() + seen_projects: set[Path] = set() + + for path in paths: + project_dir = _find_nearest_pyproject_dir(path.resolve()) + if project_dir is None: + continue + project_dir = project_dir.resolve() + if project_dir in seen_projects: + continue + seen_projects.add(project_dir) + project_config = _load_dayaml_project_config(project_dir) + if project_config is not None: + ignore_codes.update(project_config.ignore_codes) + + return frozenset(ignore_codes) + + +def _collect_dayaml_cli_args(paths: list[Path]) -> tuple[str, ...]: + cli_args: list[str] = [] + seen_projects: set[Path] = set() + + for path in paths: + project_dir = _find_nearest_pyproject_dir(path.resolve()) + if project_dir is None: + continue + project_dir = project_dir.resolve() + if project_dir in seen_projects: + continue + seen_projects.add(project_dir) + project_config = _load_dayaml_project_config(project_dir) + if project_config is not None: + cli_args.extend(project_config.cli_args) + + return tuple(cli_args) + + +def _resolve_collection_path(path: Path) -> Path: + if not path.is_dir(): + return path + + project_config = _load_dayaml_project_config(path.resolve()) + if project_config is None: + return path + return project_config.yaml_path + + +def _is_default_ignored_dir(dirname: str) -> bool: + """Return True for directory names that should be skipped by default.""" + return ( + dirname.startswith(".git") + or dirname.startswith(".github") + or dirname.startswith(".venv") + or dirname == "build" + or dirname == "dist" + or dirname == "node_modules" + or dirname == "sources" + ) + + +def _collect_yaml_files( + paths: list[Path], + check_all: bool = False, + include_default_ignores: bool | None = None, +) -> list[Path]: + """Expand paths to a de-duplicated, sorted list of YAML files. + + - Files are included if they have .yml or .yaml extension + - Directories are recursively searched for YAML files + """ + if include_default_ignores is None: + include_default_ignores = not check_all + + yaml_files: list[Path] = [] + for path in paths: + path = _resolve_collection_path(path) + if path.is_dir(): + # Recursively find all YAML files, pruning ignored directories + for root, dirnames, filenames in os.walk(path, topdown=True): + root_path = Path(root) + if include_default_ignores: + if _is_default_ignored_dir(root_path.name): + dirnames[:] = [] + continue + dirnames[:] = [ + d for d in dirnames if not _is_default_ignored_dir(d) + ] + for filename in filenames: + if filename.lower().endswith((".yml", ".yaml")): + yaml_files.append(root_path / filename) + elif path.suffix.lower() in (".yml", ".yaml"): + yaml_files.append(path) + seen = set() + result = [] + for f in yaml_files: + resolved = f.resolve() + if resolved not in seen: + seen.add(resolved) + result.append(f) + return sorted(result) diff --git a/src/dayamlchecker/_jinja.py b/src/dayamlchecker/_jinja.py new file mode 100644 index 0000000..88e2ec1 --- /dev/null +++ b/src/dayamlchecker/_jinja.py @@ -0,0 +1,148 @@ +""" +Jinja2 detection, header validation, and pre-processing utilities for +docassemble YAML files. + +Per docassemble documentation, Jinja2 template processing must be explicitly +enabled by placing ``# use jinja`` as the very first line of a YAML file. +Files that contain Jinja syntax without that header are considered invalid. + +Reference: https://docassemble.org/docs/interviews.html#jinja2 +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass + +import jinja2 + +from dayamlchecker.messages import MessageCode + +__all__ = [ + "_contains_jinja_syntax", + "_has_jinja_header", + "preprocess_jinja", + "JinjaError", +] + + +@dataclass(frozen=True, slots=True) +class JinjaError: + """Structured error returned by :func:`preprocess_jinja`.""" + + code: str + message: str + + +_JINJA_SYNTAX_RE = re.compile( + r"({{.*?}}|{%-?.*?-?%}|{#.*?#})", + re.DOTALL, +) + + +def _contains_jinja_syntax(content: str) -> bool: + """Return True if *content* contains any Jinja template syntax. + + Looks for properly delimited Jinja constructs (``{{ ... }}``, ``{% ... %}``, + ``{# ... #}``) rather than simple substring checks to avoid false positives + from Python dicts or other curly-brace usage. + """ + return _JINJA_SYNTAX_RE.search(content) is not None + + +def _has_jinja_header(content: str) -> bool: + """Return True if the first line of *content* is exactly ``# use jinja``. + + Per docassemble documentation this header must appear on the very first line + of the file (written exactly this way) to enable Jinja2 template processing. + Leading whitespace before the ``#`` disqualifies the line. + """ + first_line = content.split("\n", 1)[0].rstrip() + return first_line == "# use jinja" + + +class _SilentUndefined(jinja2.Undefined): + """Undefined template variables render as empty string instead of raising. + + Docassemble interview templates reference runtime variables (user answers, + DA objects) that are unavailable during static analysis. This class makes + every undefined reference silently produce an empty value so that the + rendered YAML can still be structurally validated. + """ + + def __str__(self) -> str: + return "" + + def __iter__(self): + return iter([]) + + def __len__(self) -> int: + return 0 + + # Allow arbitrary attribute / item / call chaining without raising. + def __getattr__(self, name: str) -> "_SilentUndefined": + # Guard: don't swallow Python's own dunder / private lookups. + if name.startswith("_"): + raise AttributeError(name) + return _SilentUndefined() + + def __getitem__(self, key: object) -> "_SilentUndefined": # type: ignore[override] + # jinja2.Undefined declares __getitem__ as returning Never (it always + # raises UndefinedError). We intentionally return a _SilentUndefined + # instead so that subscript access on an undefined value chains + # silently rather than blowing up during static analysis. + return _SilentUndefined() + + def __call__(self, *args: object, **kwargs: object) -> "_SilentUndefined": # type: ignore[override] + # Same reasoning as __getitem__: jinja2.Undefined declares __call__ + # as returning Never, but we return _SilentUndefined so that calling + # an undefined value (e.g. {{ some_macro() }}) produces an empty + # value instead of raising during static analysis. + return _SilentUndefined() + + +def preprocess_jinja(content: str) -> tuple[str, list[JinjaError]]: + """Render *content* as a Jinja2 template and return ``(rendered, errors)``. + + Uses :class:`_SilentUndefined` so that template variables unavailable at + static-analysis time produce empty strings rather than raising errors. + Jinja2 *syntax* errors are captured and returned in *errors*; in that case + *rendered* equals *content* unchanged so the caller can still report the + problem against the original source. + + The ``# use jinja`` header line (if present) is passed through Jinja2 + unchanged — it is a plain YAML comment, not a Jinja2 construct. + + Args: + content: Raw file content, including the ``# use jinja`` header. + + Returns: + Tuple of ``(rendered_text, errors)``. A non-empty + *errors* list means the template had render errors. + """ + env = jinja2.Environment( + undefined=_SilentUndefined, + keep_trailing_newline=True, + ) + + try: + template = env.from_string(content) + except jinja2.exceptions.TemplateSyntaxError as ex: + return content, [ + JinjaError( + code=MessageCode.JINJA2_SYNTAX_ERROR, + message=f"Jinja2 syntax error at line {ex.lineno}: {ex.message}", + ) + ] + + try: + rendered = template.render() + except jinja2.exceptions.TemplateError as ex: + return content, [ + JinjaError( + code=MessageCode.JINJA2_TEMPLATE_ERROR, + message=f"Jinja2 template error: {ex}", + ) + ] + + return rendered, [] diff --git a/src/dayamlchecker/accessibility.py b/src/dayamlchecker/accessibility.py index e9e1144..eae9d63 100644 --- a/src/dayamlchecker/accessibility.py +++ b/src/dayamlchecker/accessibility.py @@ -3,6 +3,8 @@ import re from typing import Any, Optional +from dayamlchecker.messages import MessageCode + TEXT_SECTION_KEYS = ("question", "subquestion", "under", "help", "note", "html") FIELD_NON_LABEL_KEYS = { "code", @@ -44,6 +46,7 @@ @dataclass(frozen=True) class AccessibilityFinding: rule_id: str + code: str message: str line_number: int @@ -181,6 +184,7 @@ def _check_combobox_usage( findings.append( AccessibilityFinding( rule_id="combobox-not-accessible", + code=MessageCode.ACCESSIBILITY_COMBOBOX_NOT_ACCESSIBLE, message="Accessibility: screen uses `combobox`, which is not allowed for accessibility reasons", line_number=line_number, ) @@ -198,6 +202,7 @@ def _check_combobox_usage( findings.append( AccessibilityFinding( rule_id="combobox-not-accessible", + code=MessageCode.ACCESSIBILITY_COMBOBOX_NOT_ACCESSIBLE, message=( "Accessibility: field uses `datatype: combobox`, which is not allowed for accessibility reasons: " f"{field_label}" @@ -219,7 +224,10 @@ def _check_multifield_no_label_usage( return [] findings: list[AccessibilityFinding] = [] - for field in fields: + for field_index, field in enumerate(fields, start=1): + if _is_code_only_field(field): + continue + field_line = ( document_start_line + field.get("__line__", doc.get("__line__", 1)) - 1 ) @@ -233,10 +241,11 @@ def _check_multifield_no_label_usage( if not (has_no_label or label_is_blank or missing_label): continue - field_name = _extract_field_variable(field) or "" + field_name = _extract_field_variable(field) or f"field #{field_index}" findings.append( AccessibilityFinding( rule_id="no-label-on-multi-field-screen", + code=MessageCode.ACCESSIBILITY_NO_LABEL_MULTI_FIELD, message=( "Accessibility: `no label` or empty/missing field label is only allowed on single-field screens; " f"screen has {len(fields)} fields: {field_name}" @@ -282,8 +291,9 @@ def _check_tagged_pdf_for_docx( findings.append( AccessibilityFinding( rule_id="tagged-pdf-not-enabled", + code=MessageCode.ACCESSIBILITY_TAGGED_PDF_NOT_ENABLED, message=( - "Info: Accessibility: DOCX attachment detected without `tagged pdf: True`; " + "Accessibility: DOCX attachment detected without `tagged pdf: True`; " "set it on `features` or the attachment to improve generated PDF accessibility" ), line_number=line_number, @@ -333,6 +343,7 @@ def _check_theme_css_contrast( findings.append( AccessibilityFinding( rule_id="theme-contrast-too-low", + code=MessageCode.ACCESSIBILITY_THEME_CONTRAST_TOO_LOW, message=( "Accessibility: bootstrap theme CSS has low contrast for " f"{component} (ratio {ratio:.2f}:1, expected at least {WCAG_MIN_CONTRAST_RATIO:.1f}:1)" @@ -382,6 +393,7 @@ def _check_missing_alt_text( findings.append( AccessibilityFinding( rule_id="image-missing-alt-text", + code=MessageCode.ACCESSIBILITY_IMAGE_MISSING_ALT_TEXT, message=( f"Accessibility: markdown image in {section.location} is missing alt text: {snippet}" ), @@ -402,6 +414,7 @@ def _check_missing_alt_text( findings.append( AccessibilityFinding( rule_id="image-missing-alt-text", + code=MessageCode.ACCESSIBILITY_IMAGE_MISSING_ALT_TEXT, message=( f"Accessibility: [FILE ...] image in {section.location} is missing alt text: {snippet}" ), @@ -421,6 +434,7 @@ def _check_missing_alt_text( findings.append( AccessibilityFinding( rule_id="image-missing-alt-text", + code=MessageCode.ACCESSIBILITY_IMAGE_MISSING_ALT_TEXT, message=( f"Accessibility: HTML image in {section.location} is missing alt text: {img_tag.strip()}" ), @@ -449,6 +463,7 @@ def _check_markdown_heading_order( findings.append( AccessibilityFinding( rule_id="markdown-heading-level-skip", + code=MessageCode.ACCESSIBILITY_MARKDOWN_HEADING_LEVEL_SKIP, message=( "Accessibility: markdown heading levels skip " f"from H{previous_level} to H{current_level} in {section.location}: {line_text}" @@ -479,6 +494,7 @@ def _check_html_heading_order( findings.append( AccessibilityFinding( rule_id="html-heading-level-skip", + code=MessageCode.ACCESSIBILITY_HTML_HEADING_LEVEL_SKIP, message=( "Accessibility: HTML heading levels skip " f"from H{previous_level} to H{current_level} in {section.location}: {snippet}" @@ -510,6 +526,7 @@ def _check_empty_link_text( findings.append( AccessibilityFinding( rule_id="empty-link-text", + code=MessageCode.ACCESSIBILITY_EMPTY_LINK_TEXT, message=( f"Accessibility: link in {section.location} has no accessible text: {link['snippet']}" ), @@ -535,6 +552,7 @@ def _check_non_descriptive_link_text( findings.append( AccessibilityFinding( rule_id="non-descriptive-link-text", + code=MessageCode.ACCESSIBILITY_NON_DESCRIPTIVE_LINK_TEXT, message=( f"Accessibility: link text in {section.location} is too generic: {link['text'].strip() or link['snippet']}" ), @@ -592,10 +610,29 @@ def _iter_fields(doc: dict[str, Any]) -> list[dict[str, Any]]: return [field for field in fields if isinstance(field, dict)] +def _is_code_only_field(field: dict[str, Any]) -> bool: + keys = {str(key).strip() for key in field if key != "__line__"} + return keys == {"code"} + + def _extract_field_variable(field: dict[str, Any]) -> str: explicit = str(field.get("field") or "").strip() if explicit: return explicit + no_label_value = field.get("no label") + if isinstance(no_label_value, str): + candidate = no_label_value.strip() + if candidate and candidate.lower() not in { + "true", + "false", + "yes", + "no", + "1", + "0", + "on", + "off", + }: + return candidate for key, value in field.items(): if key in FIELD_NON_LABEL_KEYS: continue diff --git a/src/dayamlchecker/check_questions_urls.py b/src/dayamlchecker/check_questions_urls.py index a68b168..836d26f 100644 --- a/src/dayamlchecker/check_questions_urls.py +++ b/src/dayamlchecker/check_questions_urls.py @@ -4,6 +4,7 @@ from __future__ import annotations import argparse +import asyncio import os import pathlib import re @@ -665,6 +666,16 @@ def collect_urls( _DEAD_STATUS_CODES: frozenset[int] = frozenset({404, 410}) +_MAX_CONCURRENT_URL_CHECKS = 10 + + +@dataclass(frozen=True) +class _URLCheckOutcome: + url: str + status_code: int | None + was_unreachable: bool = False + repaired: bool = False + skipped: bool = False def _check_single_url( @@ -685,47 +696,112 @@ def _check_single_url( return None, True -def check_urls( +async def _check_single_url_async( session: requests.Session, - urls: Iterable[str], + url: str, timeout: int, - repair_candidates: dict[str, set[str]] | None = None, -) -> tuple[list[tuple[str, int]], list[str]]: - """Return (broken, unreachable) for the given *urls*. + *, + report_unreachable: bool = True, +) -> tuple[int | None, bool]: + return await asyncio.to_thread( + _check_single_url, + session, + url, + timeout, + report_unreachable=report_unreachable, + ) - *broken* contains ``(url, status_code)`` pairs for dead pages. - *unreachable* lists URLs that could not be fetched at all. - """ - broken: list[tuple[str, int]] = [] - unreachable: list[str] = [] - for url in sorted(urls): - # Skip whitelisted URLs (e.g., API endpoints requiring authentication) + +async def _check_url_with_repairs( + session: requests.Session, + url: str, + timeout: int, + repair_candidates: dict[str, set[str]], + semaphore: asyncio.Semaphore, +) -> _URLCheckOutcome: + async with semaphore: if is_whitelisted_url(url): - continue + return _URLCheckOutcome(url=url, status_code=None, skipped=True) - status_code, was_unreachable = _check_single_url(session, url, timeout) + status_code, was_unreachable = await _check_single_url_async( + session, url, timeout + ) if was_unreachable: - unreachable.append(url) - continue + return _URLCheckOutcome( + url=url, + status_code=status_code, + was_unreachable=True, + ) if status_code not in _DEAD_STATUS_CODES: - continue + return _URLCheckOutcome(url=url, status_code=status_code) - repaired = False - for candidate in sorted((repair_candidates or {}).get(url, ())): + for candidate in sorted(repair_candidates.get(url, ())): if is_whitelisted_url(candidate): - repaired = True - break - candidate_status, candidate_unreachable = _check_single_url( + return _URLCheckOutcome( + url=url, + status_code=status_code, + repaired=True, + ) + candidate_status, candidate_unreachable = await _check_single_url_async( session, candidate, timeout, report_unreachable=False, ) if not candidate_unreachable and candidate_status not in _DEAD_STATUS_CODES: - repaired = True - break - if not repaired: - broken.append((url, status_code)) + return _URLCheckOutcome( + url=url, + status_code=status_code, + repaired=True, + ) + + return _URLCheckOutcome(url=url, status_code=status_code) + + +async def _check_urls_async( + session: requests.Session, + urls: Iterable[str], + timeout: int, + repair_candidates: dict[str, set[str]], +) -> tuple[_URLCheckOutcome, ...]: + semaphore = asyncio.Semaphore(_MAX_CONCURRENT_URL_CHECKS) + tasks = [ + _check_url_with_repairs( + session, + url, + timeout, + repair_candidates, + semaphore, + ) + for url in sorted(urls) + ] + return tuple(await asyncio.gather(*tasks)) + + +def check_urls( + session: requests.Session, + urls: Iterable[str], + timeout: int, + repair_candidates: dict[str, set[str]] | None = None, +) -> tuple[list[tuple[str, int]], list[str]]: + """Return (broken, unreachable) for the given *urls*. + + *broken* contains ``(url, status_code)`` pairs for dead pages. + *unreachable* lists URLs that could not be fetched at all. + """ + outcomes = asyncio.run( + _check_urls_async(session, urls, timeout, repair_candidates or {}) + ) + broken: list[tuple[str, int]] = [] + unreachable: list[str] = [] + for outcome in outcomes: + if outcome.skipped: + continue + if outcome.was_unreachable: + unreachable.append(outcome.url) + continue + if outcome.status_code in _DEAD_STATUS_CODES and not outcome.repaired: + broken.append((outcome.url, outcome.status_code)) return broken, unreachable diff --git a/src/dayamlchecker/cli.py b/src/dayamlchecker/cli.py new file mode 100644 index 0000000..f9a420b --- /dev/null +++ b/src/dayamlchecker/cli.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +import sys +from typing import TextIO + +from dayamlchecker.code_formatter import main as format_main +from dayamlchecker.yaml_structure import main as check_main + + +def _print_help(output_stream: TextIO) -> None: + print( + "usage: dayaml []\n\n" + "Commands:\n" + " check Validate Docassemble YAML files (defaults to the current working directory)\n" + " format Format Python code blocks in Docassemble YAML files (defaults to the current working directory)\n\n" + "Use 'dayaml --help' for command-specific options.\n" + "The 'check' command supports '--show-experimental'/\"--no-show-experimental\" to control inclusion of the legacy REAL ERROR prefix.", + file=output_stream, + ) + + +def main(argv: list[str] | None = None) -> int: + args = sys.argv[1:] if argv is None else argv + + if not args: + _print_help(sys.stderr) + return 2 + + if args[0] in {"-h", "--help"}: + _print_help(sys.stdout) + return 0 + + command, *command_args = args + if command == "check": + return check_main(command_args) + if command == "format": + return format_main(command_args) + + print(f"Unknown command: {command}", file=sys.stderr) + _print_help(sys.stderr) + return 2 + + +if __name__ == "__main__": # pragma: no cover + raise SystemExit(main()) diff --git a/src/dayamlchecker/code_formatter.py b/src/dayamlchecker/code_formatter.py index 26f2e4d..4261a62 100644 --- a/src/dayamlchecker/code_formatter.py +++ b/src/dayamlchecker/code_formatter.py @@ -15,9 +15,9 @@ from __future__ import annotations +import argparse import re import sys -import os from dataclasses import dataclass, field from pathlib import Path from typing import Any @@ -26,6 +26,12 @@ from ruamel.yaml import YAML from ruamel.yaml.comments import CommentedMap, CommentedSeq +from dayamlchecker._files import _collect_yaml_files +from dayamlchecker._jinja import ( + _contains_jinja_syntax, + _has_jinja_header, +) + __all__ = [ "format_yaml_file", "format_yaml_string", @@ -70,9 +76,7 @@ def _strip_common_indent(lines: list[str]) -> tuple[list[str], int]: indents = [] for line in lines: if line.strip(): # Skip empty/whitespace-only lines - match = re.match(r"^( *)", line) - if match: - indents.append(len(match.group(1))) + indents.append(len(line) - len(line.lstrip(" "))) if not indents: return lines, 0 @@ -254,16 +258,13 @@ def _collect_text_replacements_for_doc( replacements: list[tuple[int, int, str, tuple[str, ...]]] = [] if isinstance(doc, CommentedMap): - for key, value in list(doc.items()): + has_lc_key = hasattr(doc.lc, "key") + for key, value in doc.items(): key_str = str(key) current_path = path + (key_str,) # Only consider keys we care about and plain strings - if ( - key_str in config.python_keys - and isinstance(value, str) - and hasattr(doc.lc, "key") - ): + if key_str in config.python_keys and isinstance(value, str) and has_lc_key: try: key_line, _ = doc.lc.key(key) except Exception: @@ -308,6 +309,92 @@ def _collect_text_replacements_for_doc( return replacements +def _code_key_re(python_keys: set[str]) -> re.Pattern[str]: + """Build a regex matching block-scalar header lines for the given keys. + + Examples of matched lines:: + + code: | + validation code: > + code: |- + """ + # Sort longest-first so that "validation code" is attempted before "code". + alternatives = "|".join( + re.escape(k) for k in sorted(python_keys, key=len, reverse=True) + ) + return re.compile( + rf"^([ \t]*)({alternatives}):\s*[|>]", + re.MULTILINE, + ) + + +def _format_jinja_yaml_string( + yaml_content: str, + config: FormatterConfig, +) -> tuple[str, bool]: + """Format Python code blocks inside a '# use jinja' YAML file. + + Because Jinja syntax (``{{ }}``, ``{% %}``, ``{# #}``) is not valid YAML, + the file cannot be parsed by the normal YAML path. Instead this function + works directly on the raw text: + + 1. Locate every ``code:`` / ``validation code:`` block header line using a + regex. + 2. Use :func:`_find_block_body_span` to determine the exact line range of + each block body. + 3. **Skip** any body that itself contains Jinja syntax — those blocks + cannot be safely reformatted because the Jinja expressions may not be + valid Python. + 4. Format the remaining bodies with Black via :func:`format_python_code`. + 5. Apply replacements bottom-up so line indices stay valid. + + Returns the same ``(result, changed)`` tuple as :func:`format_yaml_string`. + """ + lines = yaml_content.splitlines(keepends=True) + replacements: list[tuple[int, int, str]] = [] + pattern = _code_key_re(config.python_keys) + + for line_idx, line in enumerate(lines): + if not pattern.match(line): + continue + + body_start, body_end, _body_indent = _find_block_body_span(lines, line_idx) + if body_end < body_start: + # Empty block body — nothing to format. + continue + + body = "".join(lines[body_start : body_end + 1]) + + # Don't try to format a code block that itself contains Jinja syntax; + # the expressions would make the Python invalid for Black. + if _contains_jinja_syntax(body): + continue + + try: + formatted = format_python_code(body, config) + except Exception: + # If Black can't parse the block, leave it as-is. + continue + + if _normalize_newlines(formatted) != _normalize_newlines(body): + replacements.append((body_start, body_end, formatted)) + + if not replacements: + return yaml_content, False + + # Apply from bottom to top so earlier indices are not invalidated. + replacements.sort(key=lambda t: t[0], reverse=True) + for start, end, new_text in replacements: + new_lines = new_text.splitlines(keepends=True) + # Preserve the absence of a trailing newline on the last body line. + if end >= start and not lines[end].endswith("\n") and new_lines: + new_lines[-1] = new_lines[-1].rstrip("\n") + lines[start : end + 1] = new_lines + + result = "".join(lines) + return result, result != yaml_content + + def format_yaml_string( yaml_content: str, config: FormatterConfig | None = None, @@ -325,6 +412,10 @@ def format_yaml_string( Returns: Tuple of (formatted YAML string, whether any changes were made) + + Raises: + Exception: If YAML parsing fails. Files with the ``# use jinja`` header + are handled through a separate text-based formatter path. """ if config is None: config = FormatterConfig() @@ -334,6 +425,16 @@ def format_yaml_string( yaml.width = 4096 # Prevent line wrapping in strings # Use ruamel's parser to obtain position metadata; we'll replace text + # Files that explicitly opt in via '# use jinja' must be pre-processed + # through Jinja2 before the YAML parser sees them, because Jinja syntax is + # not valid YAML. Files without the header are passed straight to the + # YAML parser; if Jinja-like syntax happens to appear (e.g. as literal + # example text in a template field) but is valid YAML, it will be handled + # normally. If it is not valid YAML the parser will surface a regular + # parse error. + if _has_jinja_header(yaml_content): + return _format_jinja_yaml_string(yaml_content, config) + # Load as a stream to handle multi-document YAML documents = list(yaml.load_all(yaml_content)) @@ -359,7 +460,8 @@ def format_yaml_string( lines[start : end + 1] = new_lines - return "".join(lines), bool(all_replacements) + result = "".join(lines) + return result, result != yaml_content def format_yaml_file( @@ -389,66 +491,9 @@ def format_yaml_file( return formatted, changed -def _collect_yaml_files( - paths: list[Path], - check_all: bool = False, - include_default_ignores: bool | None = None, -) -> list[Path]: - """ - Expand paths to a list of YAML files. - - - Files are included if they have .yml or .yaml extension - - Directories are recursively searched for YAML files - """ - - def _is_default_ignored_dir(dirname: str) -> bool: - return ( - dirname.startswith(".git") - or dirname.startswith(".github") - or dirname.startswith(".venv") - or dirname == "build" - or dirname == "dist" - or dirname == "node_modules" - or dirname == "sources" - ) - - if include_default_ignores is None: - include_default_ignores = not check_all - - yaml_files: list[Path] = [] - for path in paths: - if path.is_dir(): - # Recursively find all YAML files, pruning ignored directories - for root, dirnames, filenames in os.walk(path, topdown=True): - if include_default_ignores and _is_default_ignored_dir(Path(root).name): - dirnames[:] = [] - continue - if include_default_ignores: - dirnames[:] = [ - dirname - for dirname in dirnames - if not _is_default_ignored_dir(dirname) - ] - root_path = Path(root) - for filename in filenames: - if filename.lower().endswith((".yml", ".yaml")): - yaml_files.append(root_path / filename) - elif path.suffix.lower() in (".yml", ".yaml"): - yaml_files.append(path) - seen = set() - result = [] - for f in yaml_files: - resolved = f.resolve() - if resolved not in seen: - seen.add(resolved) - result.append(f) - return sorted(result) - - -def main() -> int: +def main(argv: list[str] | None = None) -> int: """CLI entry point.""" - import argparse - + raw_argv = sys.argv[1:] if argv is None else argv parser = argparse.ArgumentParser( description="Format Python code blocks in docassemble YAML files", formatter_class=argparse.RawDescriptionHelpFormatter, @@ -464,7 +509,7 @@ def main() -> int: ) parser.add_argument( "files", - nargs="+", + nargs="*", type=Path, help="YAML files or directories to format (directories are searched recursively)", ) @@ -488,7 +533,7 @@ def main() -> int: "-q", "--quiet", action="store_true", - help="Only output errors", + help="Suppress all output except errors", ) parser.add_argument( "--check-all", @@ -498,14 +543,51 @@ def main() -> int: "(.git*, .github*, sources)" ), ) + parser.add_argument( + "--no-summary", + action="store_true", + help="Do not print the summary line after processing", + ) - args = parser.parse_args() + args = parser.parse_args(raw_argv) + if not args.files: + args.files = [Path.cwd()] config = FormatterConfig( black_line_length=args.line_length, convert_indent_4_to_2=not args.no_indent_conversion, ) + cwd = Path.cwd().resolve() + + def _display(file_path: Path) -> Path: + resolved = file_path.resolve() + try: + return resolved.relative_to(cwd) + except ValueError: + pass + return resolved + + def _emit_error(message: str) -> None: + print(message, file=sys.stderr) + + progress_line_active = False + + def _emit_dot() -> None: + nonlocal progress_line_active + print(".", end="", flush=True) + progress_line_active = True + + def _finish_progress_line() -> None: + nonlocal progress_line_active + if progress_line_active: + print() + progress_line_active = False + + def _emit_line(message: str) -> None: + _finish_progress_line() + print(message) + # Collect all YAML files from paths (handles directories recursively) yaml_files = _collect_yaml_files(args.files, check_all=args.check_all) if not yaml_files: @@ -519,44 +601,47 @@ def main() -> int: for file_path in yaml_files: if not file_path.exists(): - print(f"Error: File not found: {file_path}", file=sys.stderr) files_error += 1 exit_code = 1 + _emit_error(f"error: {_display(file_path)} (file not found)") continue try: - _, changed = format_yaml_file( - file_path, - config=config, - write=not args.check, - ) + content = file_path.read_text(encoding="utf-8") + + formatted, changed = format_yaml_string(content, config) + if changed and not args.check: + file_path.write_text(formatted, encoding="utf-8") if changed: files_changed += 1 if args.check: - print(f"Would reformat: {file_path}") + _emit_line(f"Would reformat: {_display(file_path)}") exit_code = 1 elif not args.quiet: - print(f"Reformatted: {file_path}") + _emit_line(f"reformatted: {_display(file_path)}") else: files_unchanged += 1 if not args.quiet: - print(f"Unchanged: {file_path}") + _emit_dot() except Exception as e: - print(f"Error processing {file_path}: {e}", file=sys.stderr) files_error += 1 exit_code = 1 + _finish_progress_line() + _emit_error(f"error: {_display(file_path)}: {e}") - if not args.quiet: + if not args.check and not args.quiet and not args.no_summary: + _finish_progress_line() total = files_changed + files_unchanged + files_error - print() print( f"Summary: {files_changed} reformatted, {files_unchanged} unchanged, {files_error} errors ({total} total)" ) + elif not args.quiet: + _finish_progress_line() return exit_code -if __name__ == "__main__": +if __name__ == "__main__": # pragma: no cover sys.exit(main()) diff --git a/src/dayamlchecker/messages.py b/src/dayamlchecker/messages.py new file mode 100644 index 0000000..75f24fe --- /dev/null +++ b/src/dayamlchecker/messages.py @@ -0,0 +1,318 @@ +from __future__ import annotations + +from dataclasses import dataclass +from enum import StrEnum + + +@dataclass(frozen=True, slots=True) +class MessageDefinition: + code: str + summary: str + template: str + experimental: bool = True + + +class MessageCode(StrEnum): + YAML_DUPLICATE_KEY = "E101" + YAML_PARSE_ERROR = "E102" + JINJA2_SYNTAX_ERROR = "E201" + JINJA2_TEMPLATE_ERROR = "E202" + UNKNOWN_KEYS = "E301" + + YAML_STRING_TYPE = "E103" + MAKO_SYNTAX_ERROR = "E111" + MAKO_COMPILE_ERROR = "E112" + PYTHON_CODE_TYPE = "E121" + PYTHON_SYNTAX_ERROR = "E122" + + JS_MODIFIER_TYPE = "E203" + JS_INVALID_SYNTAX = "E204" + JS_MISSING_VAL_CALL = "E205" + JS_UNKNOWN_SCREEN_FIELD = "E206" + JS_VAL_ARG_NOT_QUOTED = "E207" + + SHOW_IF_MALFORMED = "E302" + SHOW_IF_CODE_TYPE = "E303" + SHOW_IF_CODE_SYNTAX = "E304" + SHOW_IF_DICT_KEYS = "E305" + NO_POSSIBLE_TYPES = "E306" + TOO_MANY_TYPES = "E307" + INTERVIEW_ORDER_UNMATCHED_GUARD = "E308" + NESTED_VISIBILITY_LOGIC = "E309" + + PYTHON_VAR_TYPE = "E401" + PYTHON_VAR_WHITESPACE = "E402" + OBJECTS_BLOCK_TYPE = "E403" + FIELDS_CODE_TYPE = "E404" + FIELDS_DICT_KEYS = "E405" + FIELDS_TYPE = "E406" + FIELD_MODIFIER_VARIABLE_TYPE = "E407" + FIELD_MODIFIER_UNKNOWN_VARIABLE_DICT = "E408" + FIELD_MODIFIER_CODE_ERROR = "E409" + FIELD_MODIFIER_SAME_SCREEN_CODE = "E410" + FIELD_MODIFIER_DICT_KEYS = "E411" + FIELD_MODIFIER_UNKNOWN_VARIABLE_STRING = "E412" + OBJECT_FIELD_CHOICES_CODE_DICT = "E413" + + ACCESSIBILITY_COMBOBOX_NOT_ACCESSIBLE = "E501" + ACCESSIBILITY_NO_LABEL_MULTI_FIELD = "E502" + ACCESSIBILITY_TAGGED_PDF_NOT_ENABLED = "E503" + ACCESSIBILITY_THEME_CONTRAST_TOO_LOW = "E504" + ACCESSIBILITY_IMAGE_MISSING_ALT_TEXT = "E505" + ACCESSIBILITY_MARKDOWN_HEADING_LEVEL_SKIP = "E506" + ACCESSIBILITY_HTML_HEADING_LEVEL_SKIP = "E507" + ACCESSIBILITY_EMPTY_LINK_TEXT = "E508" + ACCESSIBILITY_NON_DESCRIPTIVE_LINK_TEXT = "E509" + + VALIDATION_CODE_MISSING_VALIDATION_ERROR = "C101" + + +MESSAGE_DEFINITIONS: dict[str, MessageDefinition] = { + MessageCode.YAML_DUPLICATE_KEY: MessageDefinition( + code=MessageCode.YAML_DUPLICATE_KEY, + summary="Duplicate YAML key", + template="duplicate key '{key_name}'", + experimental=False, + ), + MessageCode.YAML_PARSE_ERROR: MessageDefinition( + code=MessageCode.YAML_PARSE_ERROR, + summary="YAML parsing error", + template="{error}", + experimental=False, + ), + MessageCode.JINJA2_SYNTAX_ERROR: MessageDefinition( + code=MessageCode.JINJA2_SYNTAX_ERROR, + summary="Jinja2 syntax error", + template="Jinja2 syntax error at line {line_number}: {message}", + experimental=False, + ), + MessageCode.JINJA2_TEMPLATE_ERROR: MessageDefinition( + code=MessageCode.JINJA2_TEMPLATE_ERROR, + summary="Jinja2 template error", + template="Jinja2 template error: {error}", + experimental=False, + ), + MessageCode.UNKNOWN_KEYS: MessageDefinition( + code=MessageCode.UNKNOWN_KEYS, + summary="Unknown YAML keys", + template="Keys that shouldn't exist! {keys}", + experimental=False, + ), + MessageCode.YAML_STRING_TYPE: MessageDefinition( + code=MessageCode.YAML_STRING_TYPE, + summary="Expected YAML string", + template="{value} isn't a string", + ), + MessageCode.MAKO_SYNTAX_ERROR: MessageDefinition( + code=MessageCode.MAKO_SYNTAX_ERROR, + summary="Invalid Mako syntax", + template="{error}", + ), + MessageCode.MAKO_COMPILE_ERROR: MessageDefinition( + code=MessageCode.MAKO_COMPILE_ERROR, + summary="Mako compile error", + template="{error}", + ), + MessageCode.PYTHON_CODE_TYPE: MessageDefinition( + code=MessageCode.PYTHON_CODE_TYPE, + summary="Expected Python code as YAML string", + template="code block must be a YAML string, is {value_type}", + ), + MessageCode.PYTHON_SYNTAX_ERROR: MessageDefinition( + code=MessageCode.PYTHON_SYNTAX_ERROR, + summary="Python syntax error", + template="Python syntax error: {message}", + ), + MessageCode.JS_MODIFIER_TYPE: MessageDefinition( + code=MessageCode.JS_MODIFIER_TYPE, + summary="JavaScript modifier must be string", + template="{modifier_key} must be a string, is {value_type}", + ), + MessageCode.JS_INVALID_SYNTAX: MessageDefinition( + code=MessageCode.JS_INVALID_SYNTAX, + summary="Invalid JavaScript syntax", + template="Invalid JavaScript syntax in {modifier_key}: {error}", + ), + MessageCode.JS_MISSING_VAL_CALL: MessageDefinition( + code=MessageCode.JS_MISSING_VAL_CALL, + summary="Missing val() call in JavaScript modifier", + template="{modifier_key} must contain at least one val() call to reference an on-screen field", + ), + MessageCode.JS_UNKNOWN_SCREEN_FIELD: MessageDefinition( + code=MessageCode.JS_UNKNOWN_SCREEN_FIELD, + summary="val() references field not defined on this screen", + template='{modifier_key} references val("{var_name}"), but "{var_name}" is not defined on this screen{caveat}', + ), + MessageCode.JS_VAL_ARG_NOT_QUOTED: MessageDefinition( + code=MessageCode.JS_VAL_ARG_NOT_QUOTED, + summary="val() argument must be quoted string literal", + template='val() argument must be a quoted string literal, not "{bad_arg}". Use val("...") or val(\'...\') instead', + ), + MessageCode.SHOW_IF_MALFORMED: MessageDefinition( + code=MessageCode.SHOW_IF_MALFORMED, + summary="Malformed show if shorthand", + template='show if value "{value}" appears to be malformed. Use YAML dict syntax: show if: {{ variable: var_name, is: value }} or show if: {{ code: ... }}', + ), + MessageCode.SHOW_IF_CODE_TYPE: MessageDefinition( + code=MessageCode.SHOW_IF_CODE_TYPE, + summary="show if code must be YAML string", + template="show if: code must be a YAML string", + ), + MessageCode.SHOW_IF_CODE_SYNTAX: MessageDefinition( + code=MessageCode.SHOW_IF_CODE_SYNTAX, + summary="show if code has Python syntax error", + template="show if: code has Python syntax error: {message}", + ), + MessageCode.SHOW_IF_DICT_KEYS: MessageDefinition( + code=MessageCode.SHOW_IF_DICT_KEYS, + summary="show if dict missing variable/code", + template='show if dict must have either "variable" key or "code" key', + ), + MessageCode.PYTHON_VAR_TYPE: MessageDefinition( + code=MessageCode.PYTHON_VAR_TYPE, + summary="Python variable reference must be YAML string", + template="The python var needs to be a YAML string, is {value}", + ), + MessageCode.PYTHON_VAR_WHITESPACE: MessageDefinition( + code=MessageCode.PYTHON_VAR_WHITESPACE, + summary="Python variable reference cannot contain whitespace", + template="The python var cannot have whitespace (is {value})", + ), + MessageCode.OBJECTS_BLOCK_TYPE: MessageDefinition( + code=MessageCode.OBJECTS_BLOCK_TYPE, + summary="Objects block must be list or dict", + template="Objects block needs to be a list or a dict, is {value}", + ), + MessageCode.FIELDS_CODE_TYPE: MessageDefinition( + code=MessageCode.FIELDS_CODE_TYPE, + summary="fields code must be YAML string", + template="fields: code must be a YAML string, is {value_type}", + ), + MessageCode.FIELDS_DICT_KEYS: MessageDefinition( + code=MessageCode.FIELDS_DICT_KEYS, + summary="fields must be a list unless using fields: {code: ...}", + template='fields must be a YAML list of field items. A dict is only valid for "fields: {{code: ...}}". {detail}', + ), + MessageCode.FIELDS_TYPE: MessageDefinition( + code=MessageCode.FIELDS_TYPE, + summary="fields must be list or dict", + template="fields should be a list or dict, is {value}", + ), + MessageCode.FIELD_MODIFIER_VARIABLE_TYPE: MessageDefinition( + code=MessageCode.FIELD_MODIFIER_VARIABLE_TYPE, + summary="field modifier variable must be string", + template="{modifier_key}: variable must be a string, got {value_type}", + ), + MessageCode.FIELD_MODIFIER_UNKNOWN_VARIABLE_DICT: MessageDefinition( + code=MessageCode.FIELD_MODIFIER_UNKNOWN_VARIABLE_DICT, + summary="field modifier variable references off-screen field", + template="{modifier_key}: variable: {ref_var} is not defined on this screen. Use {modifier_key}: {{ code: ... }} instead for variables from previous screens", + ), + MessageCode.FIELD_MODIFIER_CODE_ERROR: MessageDefinition( + code=MessageCode.FIELD_MODIFIER_CODE_ERROR, + summary="field modifier code has validation error", + template="{modifier_key}: code has {error}", + ), + MessageCode.FIELD_MODIFIER_SAME_SCREEN_CODE: MessageDefinition( + code=MessageCode.FIELD_MODIFIER_SAME_SCREEN_CODE, + summary="show if code references same-screen field", + template="{modifier_key}: code references variable(s) defined on this screen ({references}). Use {modifier_key}: or {modifier_key}: {{ variable: , is: ... }} instead", + ), + MessageCode.FIELD_MODIFIER_DICT_KEYS: MessageDefinition( + code=MessageCode.FIELD_MODIFIER_DICT_KEYS, + summary="field modifier dict missing variable/code", + template='{modifier_key} dict must have either "variable" or "code"', + ), + MessageCode.FIELD_MODIFIER_UNKNOWN_VARIABLE_STRING: MessageDefinition( + code=MessageCode.FIELD_MODIFIER_UNKNOWN_VARIABLE_STRING, + summary="field modifier shorthand references off-screen field", + template="{modifier_key}: {modifier_value} is not defined on this screen. Use {modifier_key}: {{ code: ... }} instead for variables from previous screens", + ), + MessageCode.OBJECT_FIELD_CHOICES_CODE_DICT: MessageDefinition( + code=MessageCode.OBJECT_FIELD_CHOICES_CODE_DICT, + summary="Object field choices cannot use nested code block", + template='Object-style fields cannot use "choices: {{ code: ... }}". Use a direct choices expression instead, for example "choices: some_object.choices()"', + ), + MessageCode.NO_POSSIBLE_TYPES: MessageDefinition( + code=MessageCode.NO_POSSIBLE_TYPES, + summary="No recognized block type found", + template="Couldn't identify a block type: no valid combination of keys found (looking for keys like question, include, metadata, code, objects, etc. See https://docassemble.org/docs.html)", + ), + MessageCode.TOO_MANY_TYPES: MessageDefinition( + code=MessageCode.TOO_MANY_TYPES, + summary="Block matches multiple exclusive types", + template="Too many types this block could be: {possible_types}", + ), + MessageCode.INTERVIEW_ORDER_UNMATCHED_GUARD: MessageDefinition( + code=MessageCode.INTERVIEW_ORDER_UNMATCHED_GUARD, + summary="Interview-order block missing matching guard", + template='interview-order style block references "{field_var}" without a matching guard for that field\'s show/hide logic; this can cause the interview to get stuck', + ), + MessageCode.NESTED_VISIBILITY_LOGIC: MessageDefinition( + code=MessageCode.NESTED_VISIBILITY_LOGIC, + summary="Visibility logic is nested too deeply", + template="show if/hide if visibility logic is nested {depth} levels on this screen (more than 2)", + ), + MessageCode.ACCESSIBILITY_COMBOBOX_NOT_ACCESSIBLE: MessageDefinition( + code=MessageCode.ACCESSIBILITY_COMBOBOX_NOT_ACCESSIBLE, + summary="Combobox widget is not accessible", + template="Accessibility: combobox widgets are not allowed for accessibility reasons", + ), + MessageCode.ACCESSIBILITY_NO_LABEL_MULTI_FIELD: MessageDefinition( + code=MessageCode.ACCESSIBILITY_NO_LABEL_MULTI_FIELD, + summary="Field label missing on multi-field screen", + template="Accessibility: no label or empty/missing field label is only allowed on single-field screens", + ), + MessageCode.ACCESSIBILITY_TAGGED_PDF_NOT_ENABLED: MessageDefinition( + code=MessageCode.ACCESSIBILITY_TAGGED_PDF_NOT_ENABLED, + summary="DOCX attachment may need tagged PDF enabled", + template="Accessibility: DOCX attachment detected without tagged pdf enabled", + ), + MessageCode.ACCESSIBILITY_THEME_CONTRAST_TOO_LOW: MessageDefinition( + code=MessageCode.ACCESSIBILITY_THEME_CONTRAST_TOO_LOW, + summary="Bootstrap theme CSS has low contrast", + template="Accessibility: bootstrap theme CSS has low contrast", + ), + MessageCode.ACCESSIBILITY_IMAGE_MISSING_ALT_TEXT: MessageDefinition( + code=MessageCode.ACCESSIBILITY_IMAGE_MISSING_ALT_TEXT, + summary="Image is missing alt text", + template="Accessibility: image is missing alt text", + ), + MessageCode.ACCESSIBILITY_MARKDOWN_HEADING_LEVEL_SKIP: MessageDefinition( + code=MessageCode.ACCESSIBILITY_MARKDOWN_HEADING_LEVEL_SKIP, + summary="Markdown heading levels skip", + template="Accessibility: markdown heading levels skip", + ), + MessageCode.ACCESSIBILITY_HTML_HEADING_LEVEL_SKIP: MessageDefinition( + code=MessageCode.ACCESSIBILITY_HTML_HEADING_LEVEL_SKIP, + summary="HTML heading levels skip", + template="Accessibility: HTML heading levels skip", + ), + MessageCode.ACCESSIBILITY_EMPTY_LINK_TEXT: MessageDefinition( + code=MessageCode.ACCESSIBILITY_EMPTY_LINK_TEXT, + summary="Link has no accessible text", + template="Accessibility: link has no accessible text", + ), + MessageCode.ACCESSIBILITY_NON_DESCRIPTIVE_LINK_TEXT: MessageDefinition( + code=MessageCode.ACCESSIBILITY_NON_DESCRIPTIVE_LINK_TEXT, + summary="Link text is too generic", + template="Accessibility: link text is too generic", + ), + MessageCode.VALIDATION_CODE_MISSING_VALIDATION_ERROR: MessageDefinition( + code=MessageCode.VALIDATION_CODE_MISSING_VALIDATION_ERROR, + summary="validation code should call validation_error()", + template="validation code does not call validation_error(); consider calling validation_error(...) to provide user-facing error messages", + ), +} + + +def format_message(code: str, **kwargs: object) -> str: + if code not in MESSAGE_DEFINITIONS: + raise ValueError(f"Unknown message code: {code!r}") + return MESSAGE_DEFINITIONS[code].template.format(**kwargs) + + +def is_experimental_code(code: str) -> bool: + if code not in MESSAGE_DEFINITIONS: + raise ValueError(f"Unknown message code: {code!r}") + return MESSAGE_DEFINITIONS[code].experimental diff --git a/src/dayamlchecker/yaml_structure.py b/src/dayamlchecker/yaml_structure.py index a85baac..ab65172 100644 --- a/src/dayamlchecker/yaml_structure.py +++ b/src/dayamlchecker/yaml_structure.py @@ -1,25 +1,37 @@ # Each doc, apply this to each block -import ast import argparse +import ast +from collections.abc import Mapping from dataclasses import dataclass, field from pathlib import Path import re import sys +from typing import Any, Callable, Literal, Optional, cast -from typing import Any, Optional -from dayamlchecker.accessibility import ( - AccessibilityLintOptions, - find_accessibility_findings, -) -from mako.template import Template as MakoTemplate # type: ignore[import-untyped] +import esprima # type: ignore[import-untyped] from mako.exceptions import ( # type: ignore[import-untyped] - SyntaxException, CompileException, + SyntaxException, ) -import esprima # type: ignore[import-untyped] +from mako.template import Template as MakoTemplate # type: ignore[import-untyped] from ruamel.yaml import YAML from ruamel.yaml.comments import CommentedMap, CommentedSeq +from ruamel.yaml.constructor import DuplicateKeyError from ruamel.yaml.error import MarkedYAMLError + +from dayamlchecker._files import ( + _collect_dayaml_cli_args, + _collect_dayaml_ignore_codes, + _collect_yaml_files, +) +from dayamlchecker._jinja import ( + _has_jinja_header, + preprocess_jinja, +) +from dayamlchecker.accessibility import ( + AccessibilityLintOptions, + find_accessibility_findings, +) from dayamlchecker.check_questions_urls import ( infer_package_dirs, infer_root as infer_url_check_root, @@ -27,6 +39,12 @@ print_url_check_report, run_url_check, ) +from dayamlchecker.code_formatter import FormatterConfig, format_yaml_string +from dayamlchecker.messages import MessageCode, format_message, is_experimental_code + +_RuamelYAML = YAML +_RuamelDuplicateKeyError = DuplicateKeyError +_RuamelMarkedYAMLError = MarkedYAMLError # TODO(brycew): # * DA is fine with mixed case it looks like (i.e. Subquestion, vs subquestion) @@ -36,11 +54,12 @@ # * is "gathered" a valid attr? # * handle "response" # * labels above fields? -# * if "# use jinja" at top, process whole file with Jinja: -# https://docassemble.org/docs/interviews.html#jinja2 -__all__ = ["find_errors_from_string", "find_errors", "_collect_yaml_files"] +__all__ = [ + "find_errors_from_string", + "find_errors", +] DEFAULT_LINT_MODE = "default" ACCESSIBILITY_LINT_MODE = "accessibility" @@ -56,6 +75,26 @@ def accessibility_options(self) -> AccessibilityLintOptions: ) +class _ProgressOutput: + def __init__(self) -> None: + self._line_active = False + + def dot(self) -> None: + print(".", end="", flush=True) + self._line_active = True + + def line(self, message: str) -> None: + if self._line_active: + print() + self._line_active = False + print(message) + + def finish(self) -> None: + if self._line_active: + print() + self._line_active = False + + # Global identifiers for _extract_conditional_fields_from_doc below. Should cover all show/hide style modifiers _IDENTIFIER_RE = re.compile(r"[A-Za-z_]\w*") _SIMPLE_IDENTIFIER_RE = re.compile(r"^[A-Za-z_]\w*$") @@ -78,6 +117,104 @@ def accessibility_options(self) -> AccessibilityLintOptions: # Ensure that if there's a space in the str, it's between quotes. space_in_str = re.compile("^[^ ]*['\"].* .*['\"][^ ]*$") +# ValidatorError is a 3-tuple of (error_message, line_number, message_code) +# where message_code is from MessageCode constants. +ValidatorError = tuple[str, int, str] + + +def parse_ignore_codes(raw_codes: str) -> frozenset[str]: + return frozenset( + code.strip().upper() for code in raw_codes.split(",") if code.strip() + ) + + +def _message_severity(code: str | None) -> Literal["error", "warning", "convention"]: + if code is None: + return "error" + if code.startswith("E"): + return "error" + if code.startswith("W"): + return "warning" + if code.startswith("C"): + return "convention" + return "error" + + +def _normalize_validator_error(err: object) -> ValidatorError: + if not isinstance(err, tuple): + raise TypeError( + "Validator errors must be tuples of " + "(message: str, line_number: int, code: str); " + f"got {type(err).__name__}: {err!r}" + ) + if len(err) != 3: + raise ValueError( + "Validator errors must be 3-tuples of " + "(message: str, line_number: int, code: str); " + f"got length {len(err)}: {err!r}" + ) + + err_msg, err_line, err_code = err + if not isinstance(err_msg, str): + raise TypeError( + "Validator error message must be a string; " + f"got {type(err_msg).__name__}: {err!r}" + ) + if not isinstance(err_line, int): + raise TypeError( + "Validator error line number must be an int; " + f"got {type(err_line).__name__}: {err!r}" + ) + if not isinstance(err_code, str): + raise TypeError( + "Validator error code must be a string; " + f"got {type(err_code).__name__}: {err!r}" + ) + + return (err_msg, err_line, err_code) + + +def _validator_error(code: str, line_number: int = 1, **kwargs: Any) -> ValidatorError: + return (format_message(code, **kwargs), line_number, code) + + +def _yaml_error( + *, + code: str, + line_number: int, + file_name: str, + err_str: str | None = None, + **kwargs: Any, +) -> "YAMLError": + return YAMLError( + err_str=err_str if err_str is not None else format_message(code, **kwargs), + line_number=line_number, + file_name=file_name, + experimental=is_experimental_code(code), + code=code, + ) + + +def _variable_candidates(var_expr: str) -> set[str]: + expr = var_expr.strip() + candidates = {expr} + if "." in expr: + parts = expr.split(".") + for i in range(len(parts), 0, -1): + candidates.add(".".join(parts[:i])) + expanded = set() + for candidate in candidates: + candidate = candidate.strip() + if not candidate: + continue + expanded.add(candidate) + # Accept both full indexed paths and their base paths, e.g.: + # children[i].parents["Other"] -> children[i].parents + while candidate.endswith("]") and "[" in candidate: # pragma: no branch + candidate = candidate[: candidate.rfind("[")].strip() + if candidate: # pragma: no branch + expanded.add(candidate) + return expanded class YAMLStr: @@ -86,7 +223,7 @@ class YAMLStr: def __init__(self, x): self.errors = [] if not isinstance(x, str): - self.errors = [(f"{x} isn't a string", 1)] + self.errors = [_validator_error(MessageCode.YAML_STRING_TYPE, value=x)] class MakoText: @@ -99,9 +236,21 @@ def __init__(self, x): x, strict_undefined=True, input_encoding="utf-8" ) except SyntaxException as ex: - self.errors = [(ex, ex.lineno)] + self.errors = [ + _validator_error( + MessageCode.MAKO_SYNTAX_ERROR, + ex.lineno, + error=str(ex), + ) + ] except CompileException as ex: - self.errors = [(ex, ex.lineno)] + self.errors = [ + _validator_error( + MessageCode.MAKO_COMPILE_ERROR, + ex.lineno, + error=str(ex), + ) + ] class MakoMarkdownText(MakoText): @@ -123,7 +272,10 @@ def __init__(self, x): self.errors = [] if not isinstance(x, str): self.errors = [ - (f"code block must be a YAML string, is {type(x).__name__}", 1) + _validator_error( + MessageCode.PYTHON_CODE_TYPE, + value_type=type(x).__name__, + ) ] return try: @@ -132,7 +284,69 @@ def __init__(self, x): # ex.lineno gives line number within the code block lineno = ex.lineno or 1 msg = ex.msg or str(ex) - self.errors = [(f"Python syntax error: {msg}", lineno)] + self.errors = [ + _validator_error(MessageCode.PYTHON_SYNTAX_ERROR, lineno, message=msg) + ] + + +class AcceptFieldValue: + """Validates the ``accept`` modifier on a Docassemble file-upload field. + + DA evaluates ``accept`` as a Python expression at runtime, so the YAML + value must be a Python string literal. This means the MIME type string + needs an extra layer of quoting: + + * ``accept: "'application/pdf,image/jpeg'"`` (YAML double-quotes wrapping + a Python single-quoted string) + * ``accept: |`` followed by ``"application/pdf,image/jpeg"`` on the next + line (block scalar whose content is a double-quoted Python string) + + A common mistake is writing the MIME string bare, e.g. + ``accept: application/pdf`` — YAML delivers ``application/pdf`` to DA, + which parses as Python division (``application / pdf``) and raises a + ``NameError`` at runtime. + """ + + _HINT = ( + "accept must be a Python string literal. " + "Wrap the MIME types in quotes so DA can eval them: " + "accept: \"'application/pdf,image/jpeg,image/png,image/tiff'\"" + ) + + def __init__(self, x): + self.errors = [] + if not isinstance(x, str): + self.errors = [ + _validator_error( + MessageCode.PYTHON_CODE_TYPE, + value_type=type(x).__name__, + ) + ] + return + try: + tree = ast.parse(x.strip(), mode="eval") + except SyntaxError as ex: + lineno = ex.lineno or 1 + msg = ex.msg or str(ex) + self.errors = [ + ( + f"{self._HINT}. Parser message: {msg}", + lineno, + MessageCode.PYTHON_SYNTAX_ERROR, + ) + ] + return + if not ( + isinstance(tree.body, ast.Constant) and isinstance(tree.body.value, str) + ): + self.errors = [ + ( + f"{self._HINT}. " + f"Got a {type(tree.body).__name__} expression instead of a string literal.", + 1, + MessageCode.PYTHON_SYNTAX_ERROR, + ) + ] class ValidationCode(PythonText): @@ -191,10 +405,7 @@ def __init__(self, x): # Otherwise, emit a warning suggesting use of validation_error(). # Use line number 1 because we don't have a more specific mapping here self.errors.append( - ( - "validation code does not call validation_error(); consider calling validation_error(...) to provide user-facing error messages", - 1, - ) + _validator_error(MessageCode.VALIDATION_CODE_MISSING_VALIDATION_ERROR) ) @@ -221,12 +432,23 @@ class JSShowIf: 3) That val() calls use quoted string literals for variable names """ - def __init__(self, x, modifier_key="js show if", screen_variables=None): + def __init__( + self, + x, + modifier_key="js show if", + screen_variables=None, + has_dynamic_fields: bool = False, + ): self.errors = [] self.screen_variables = screen_variables or set() + self._has_dynamic_fields = has_dynamic_fields if not isinstance(x, str): self.errors = [ - (f"{modifier_key} must be a string, is {type(x).__name__}", 1) + _validator_error( + MessageCode.JS_MODIFIER_TYPE, + modifier_key=modifier_key, + value_type=type(x).__name__, + ) ] return @@ -239,9 +461,11 @@ def __init__(self, x, modifier_key="js show if", screen_variables=None): parsed = esprima.parseScript(js_to_check, tolerant=False, loc=True).toDict() except esprima.Error as ex: self.errors.append( - ( - f"Invalid JavaScript syntax in {modifier_key}: {ex}", + _validator_error( + MessageCode.JS_INVALID_SYNTAX, getattr(ex, "lineNumber", 1) or 1, + modifier_key=modifier_key, + error=str(ex), ) ) return @@ -260,14 +484,14 @@ def __init__(self, x, modifier_key="js show if", screen_variables=None): ): val_calls.append(node) stack.extend(v for v in node.values() if isinstance(v, (dict, list))) - elif isinstance(node, list): + elif isinstance(node, list): # pragma: no branch stack.extend(node) if not val_calls: self.errors.append( - ( - f"{modifier_key} must contain at least one val() call to reference an on-screen field", - 1, + _validator_error( + MessageCode.JS_MISSING_VAL_CALL, + modifier_key=modifier_key, ) ) @@ -284,10 +508,18 @@ def __init__(self, x, modifier_key="js show if", screen_variables=None): if self.screen_variables and not self._references_screen_variable( var_name ): + caveat = ( + " (unable to fully validate screen variables because this screen uses fields: code)" + if self._has_dynamic_fields + else "" + ) self.errors.append( - ( - f'{modifier_key} references val("{var_name}"), but "{var_name}" is not defined on this screen', + _validator_error( + MessageCode.JS_UNKNOWN_SCREEN_FIELD, (call.get("loc", {}).get("start", {}).get("line", 1) or 1), + modifier_key=modifier_key, + var_name=var_name, + caveat=caveat, ) ) continue @@ -300,41 +532,21 @@ def __init__(self, x, modifier_key="js show if", screen_variables=None): or first_arg.get("type", "") ) self.errors.append( - ( - f'val() argument must be a quoted string literal, not "{bad_arg}". Use val("...") or val(\'...\') instead', + _validator_error( + MessageCode.JS_VAL_ARG_NOT_QUOTED, (call.get("loc", {}).get("start", {}).get("line", 1) or 1), + bad_arg=bad_arg, ) ) def _references_screen_variable(self, var_expr): if not isinstance(var_expr, str): return False - for candidate in self._variable_candidates(var_expr): + for candidate in _variable_candidates(var_expr): if candidate in self.screen_variables: return True return False - def _variable_candidates(self, var_expr): - expr = var_expr.strip() - candidates = {expr} - if "." in expr: - parts = expr.split(".") - for i in range(len(parts), 0, -1): - candidates.add(".".join(parts[:i])) - expanded = set() - for candidate in candidates: - candidate = candidate.strip() - if not candidate: - continue - expanded.add(candidate) - # Accept both full indexed paths and their base paths, e.g.: - # children[i].parents["Other"] -> children[i].parents - while candidate.endswith("]") and "[" in candidate: - candidate = candidate[: candidate.rfind("[")].strip() - if candidate: - expanded.add(candidate) - return expanded - class ShowIf: """Validator for show if field modifier (non-js variants) @@ -349,21 +561,20 @@ def __init__(self, x, context=None): if isinstance(x, str): # Shorthand form: show if: variable_name # This is only valid if variable_name refers to a yes/no field on the same screen - if ":" not in x and " " not in x: # Simple variable name + if ":" not in x and " " not in x: # pragma: no branch # We can't validate this here without screen context # This will be validated at a higher level with fields context pass - elif x.startswith("variable:") or x.startswith("code:"): + elif x.startswith("variable:") or x.startswith( + "code:" + ): # pragma: no branch # Malformed - these should be YAML dict format self.errors.append( - ( - f'show if value "{x}" appears to be malformed. Use YAML dict syntax: show if: {{ variable: var_name, is: value }} or show if: {{ code: ... }}', - 1, - ) + _validator_error(MessageCode.SHOW_IF_MALFORMED, value=x) ) - elif isinstance(x, dict): + elif isinstance(x, dict): # pragma: no branch # YAML dict form - if "variable" in x: + if "variable" in x: # pragma: no branch # First method: show if: { variable: field_name, is: value } # Can only reference fields on the same screen - we'll validate in context pass @@ -372,12 +583,7 @@ def __init__(self, x, context=None): # Validate Python syntax for the provided code block code_block = x.get("code") if not isinstance(code_block, str): - self.errors.append( - ( - f"show if: code must be a YAML string", - 1, - ) - ) + self.errors.append(_validator_error(MessageCode.SHOW_IF_CODE_TYPE)) else: try: ast.parse(code_block) @@ -385,15 +591,14 @@ def __init__(self, x, context=None): lineno = ex.lineno or 1 msg = ex.msg or str(ex) self.errors.append( - ( - f"show if: code has Python syntax error: {msg}", + _validator_error( + MessageCode.SHOW_IF_CODE_SYNTAX, lineno, + message=msg, ) ) else: - self.errors.append( - (f'show if dict must have either "variable" key or "code" key', 1) - ) + self.errors.append(_validator_error(MessageCode.SHOW_IF_DICT_KEYS)) class DAPythonVar: @@ -402,9 +607,9 @@ class DAPythonVar: def __init__(self, x): self.errors = [] if not isinstance(x, str): - self.errors = [(f"The python var needs to be a YAML string, is {x}", 1)] + self.errors = [_validator_error(MessageCode.PYTHON_VAR_TYPE, value=x)] elif " " in x and not space_in_str.search(x): - self.errors = [(f"The python var cannot have whitespace (is {x})", 1)] + self.errors = [_validator_error(MessageCode.PYTHON_VAR_WHITESPACE, value=x)] class DAType: @@ -420,7 +625,7 @@ def __init__(self, x): # The full typing desc of the var: TODO: how to use this? self.errors = [] if not (isinstance(x, list) or isinstance(x, dict)): - self.errors = [f"Objects block needs to be a list or a dict, is {x}"] + self.errors = [_validator_error(MessageCode.OBJECTS_BLOCK_TYPE, value=x)] # for entry in x: # ... # if not isinstance(x, Union[list[dict[DAPythonVar, DAType]], dict[DAPythonVar, DAType]]): @@ -428,6 +633,8 @@ def __init__(self, x): class DAFields: + object_field_keys = {"object", "object multiselect", "object radio"} + modifier_keys = { "code", "default", @@ -446,7 +653,6 @@ class DAFields: "disable if", "js enable if", "js disable if", - "__line__", } mako_keys = {"default", "hint", "label", "note"} @@ -454,37 +660,77 @@ class DAFields: js_modifier_keys = ("js show if", "js hide if", "js enable if", "js disable if") py_modifier_keys = ("show if", "hide if", "enable if", "disable if") + # Keys that are valid at the field-item level (i.e. inside a single field dict). + # When `fields` is written as a bare dict (single-field shorthand rather than a + # list), at least one of these keys must be present for it to look like a valid + # field descriptor rather than a malformed code-reference dict. + _field_item_keys = modifier_keys | { + "field", + "input type", + "note", + "html", + "raw html", + "address autocomplete", + "object", + "object multiselect", + "object radio", + "uncheck others", + "shuffle", + "required", + "read only", + "min", + "max", + "accept", + } + def __init__(self, x): self.errors = [] self.has_dynamic_fields_code = False if isinstance(x, dict): - if "code" not in x: - self.errors = [(f'fields dict must have "code" key, is {x}', 1)] + if "code" in x: + # Code-reference form: fields: {code: some_python_list_var} + if not isinstance(x.get("code"), str): + self.errors = [ + _validator_error( + MessageCode.FIELDS_CODE_TYPE, + value_type=type(x.get("code")).__name__, + ) + ] return - if not isinstance(x.get("code"), str): + if x.keys() & self._field_item_keys: self.errors = [ - ( - f'fields: code must be a YAML string, is {type(x.get("code")).__name__}', - 1, + _validator_error( + MessageCode.FIELDS_DICT_KEYS, + detail='This looks like a single field written as a dict; even one field must be written as a list item starting with "-".', ) ] + return + self.errors = [ + _validator_error( + MessageCode.FIELDS_DICT_KEYS, + detail=( + 'This dict is missing both a field entry and a "code" key: ' + f"{x}" + ), + ) + ] return if not isinstance(x, list): - self.errors = [(f"fields should be a list or dict, is {x}", 1)] + self.errors = [_validator_error(MessageCode.FIELDS_TYPE, value=x)] return self._validate_field_modifiers(x) def _line_for(self, field_item, code_line=1): - field_line = 1 - if isinstance(field_item, dict): - field_line = field_item.get("__line__", 1) - return field_line + max(code_line - 1, 0) + return _lc_line(field_item) + max(code_line - 1, 0) + + def _key_line_for(self, field_item, key, code_line=1): + return _lc_key_line(field_item, key) + max(code_line - 1, 0) def _extract_field_name(self, field_item): if not isinstance(field_item, dict): return None for key, value in field_item.items(): - if key in self.modifier_keys: + if key in self.modifier_keys: # pragma: no branch continue if isinstance(value, str): return value @@ -494,9 +740,9 @@ def _validate_python_modifier( self, modifier_key, modifier_value, field_item, screen_variables ): def references_screen_variable(var_expr): - if not isinstance(var_expr, str): + if not isinstance(var_expr, str): # pragma: no cover return False - candidates = self._variable_candidates(var_expr) + candidates = _variable_candidates(var_expr) if any(candidate in screen_variables for candidate in candidates): return True # In generic-object screens, x. often aliases another object path @@ -520,26 +766,33 @@ def references_screen_variable(var_expr): ref_var = modifier_value.get("variable") if not isinstance(ref_var, str): self.errors.append( - ( - f"{modifier_key}: variable must be a string, got {type(ref_var).__name__}", + _validator_error( + MessageCode.FIELD_MODIFIER_VARIABLE_TYPE, self._line_for(field_item), + modifier_key=modifier_key, + value_type=type(ref_var).__name__, ) ) elif not references_screen_variable(ref_var): self.errors.append( - ( - f"{modifier_key}: variable: {ref_var} is not defined on this screen. Use {modifier_key}: {{ code: ... }} instead for variables from previous screens", + _validator_error( + MessageCode.FIELD_MODIFIER_UNKNOWN_VARIABLE_DICT, self._line_for(field_item), + modifier_key=modifier_key, + ref_var=ref_var, ) ) elif "code" in modifier_value: code_text = modifier_value.get("code") validator = PythonText(code_text) for err in validator.errors: + err_msg, err_line, _err_code = _normalize_validator_error(err) self.errors.append( - ( - f"{modifier_key}: code has {err[0].lower()}", - self._line_for(field_item, err[1]), + _validator_error( + MessageCode.FIELD_MODIFIER_CODE_ERROR, + self._line_for(field_item, err_line), + modifier_key=modifier_key, + error=err_msg.lower(), ) ) if ( @@ -553,24 +806,31 @@ def references_screen_variable(var_expr): if same_screen_refs: refs = ", ".join(sorted(same_screen_refs)) self.errors.append( - ( - f"{modifier_key}: code references variable(s) defined on this screen ({refs}). Use {modifier_key}: or {modifier_key}: {{ variable: , is: ... }} instead", + _validator_error( + MessageCode.FIELD_MODIFIER_SAME_SCREEN_CODE, self._line_for(field_item), + modifier_key=modifier_key, + references=refs, ) ) else: self.errors.append( - ( - f'{modifier_key} dict must have either "variable" or "code"', + _validator_error( + MessageCode.FIELD_MODIFIER_DICT_KEYS, self._line_for(field_item), + modifier_key=modifier_key, ) ) - elif isinstance(modifier_value, str) and ":" not in modifier_value: + elif ( + isinstance(modifier_value, str) and ":" not in modifier_value + ): # pragma: no branch if not references_screen_variable(modifier_value): self.errors.append( - ( - f"{modifier_key}: {modifier_value} is not defined on this screen. Use {modifier_key}: {{ code: ... }} instead for variables from previous screens", + _validator_error( + MessageCode.FIELD_MODIFIER_UNKNOWN_VARIABLE_STRING, self._line_for(field_item), + modifier_key=modifier_key, + modifier_value=modifier_value, ) ) @@ -591,12 +851,32 @@ def _find_screen_variable_references_in_code(self, code_text, screen_variables): matches.add(screen_var) return matches + def _validate_object_field_choices(self, field_item): + datatype = field_item.get("datatype") + is_object_style_field = ( + isinstance(datatype, str) and datatype.lower().startswith("object") + ) or any(key in field_item for key in self.object_field_keys) + if not is_object_style_field: + return + + choices_value = field_item.get("choices") + if isinstance(choices_value, Mapping) and ( + set(choices_value.keys()) - {"__line__"} == {"code"} + ): + self.errors.append( + _validator_error( + MessageCode.OBJECT_FIELD_CHOICES_CODE_DICT, + self._key_line_for(field_item, "choices"), + ) + ) + def _validate_field_modifiers(self, fields_list): - self.has_dynamic_fields_code = any( # looking for any example in the field list. Note that there can be `code` and traditional non-code mixed in the same field list + self.has_dynamic_fields_code = any( # pragma: no branch + # looking for any example in the field list. Note that there can be `code` and traditional non-code mixed in the same field list isinstance(field_item, dict) and "code" in field_item and len(set(field_item.keys()) - {"code", "__line__"}) - == 0 # "code" is the only key other than __line__, so this is a dynamic fields block + == 0 # "code" is the only key, so this is a dynamic fields block for field_item in fields_list ) screen_variables = set() @@ -609,6 +889,20 @@ def _validate_field_modifiers(self, fields_list): if not isinstance(field_item, dict): continue + self._validate_object_field_choices(field_item) + + if "accept" in field_item: + validator = AcceptFieldValue(field_item["accept"]) + for err in validator.errors: + err_msg, err_line, err_code = _normalize_validator_error(err) + self.errors.append( + ( + err_msg, + self._key_line_for(field_item, "accept", err_line), + err_code, + ) + ) + for field_key in field_item: if isinstance(field_key, str) and field_key != "__line__": if ( @@ -619,15 +913,20 @@ def _validate_field_modifiers(self, fields_list): ( f'Invalid field key "{field_key}". docassemble field modifier keys are case-sensitive; use "{field_key.lower()}"', self._line_for(field_item), + MessageCode.FIELD_MODIFIER_DICT_KEYS, ) ) if field_key in self.mako_keys: the_mako = MakoText(str(field_item[field_key])) for err in the_mako.errors: + err_msg, err_line, err_code = _normalize_validator_error( + err + ) self.errors.append( ( - f"{field_key} value has {err[0]}", - self._line_for(field_item, err[1]), + f"{field_key} value has {err_msg}", + self._line_for(field_item, err_line), + err_code, ) ) @@ -637,20 +936,12 @@ def _validate_field_modifiers(self, fields_list): field_item[js_key], modifier_key=js_key, screen_variables=screen_variables, + has_dynamic_fields=self.has_dynamic_fields_code, ) for err in validator.errors: - err_msg = err[0] - if ( - self.has_dynamic_fields_code - and "not defined on this screen" in err_msg.lower() - ): - err_msg = ( - "Warning: " - + err_msg - + " (unable to fully validate screen variables because this screen uses fields: code)" - ) + err_msg, err_line, err_code = _normalize_validator_error(err) self.errors.append( - (err_msg, self._line_for(field_item, err[1])) + (err_msg, self._line_for(field_item, err_line), err_code) ) for py_key in self.py_modifier_keys: @@ -659,27 +950,6 @@ def _validate_field_modifiers(self, fields_list): py_key, field_item[py_key], field_item, screen_variables ) - def _variable_candidates(self, var_expr): - expr = var_expr.strip() - candidates = {expr} - if "." in expr: - parts = expr.split(".") - for i in range(len(parts), 0, -1): - candidates.add(".".join(parts[:i])) - expanded = set() - for candidate in candidates: - candidate = candidate.strip() - if not candidate: - continue - expanded.add(candidate) - # Accept both full indexed paths and their base paths, e.g.: - # children[i].parents["Other"] -> children[i].parents - while candidate.endswith("]") and "[" in candidate: - candidate = candidate[: candidate.rfind("[")].strip() - if candidate: - expanded.add(candidate) - return expanded - # type notes what the value for that dictionary key is, @@ -752,6 +1022,9 @@ def _variable_candidates(self, var_expr): "default validation messages": {}, "machine learning storage": {}, "scan for variables": {}, + "show if": { + "type": ShowIf, + }, "if": {}, "sets": {}, "initial": {}, @@ -773,7 +1046,6 @@ def _variable_candidates(self, var_expr): "on change": {}, "image sets": {}, "images": {}, - "interview help": {}, "continue button field": { "type": DAPythonVar, }, @@ -1111,21 +1383,37 @@ def __init__( line_number: int, file_name: str, experimental: bool = True, + code: str | None = None, ): self.err_str = err_str self.line_number = line_number self.file_name = file_name self.experimental = experimental - pass + self.code = code + + @property + def severity(self) -> Literal["error", "warning", "convention"]: + if self.code is not None: + return _message_severity(self.code) + lowered = self.err_str.lower() + if lowered.startswith("info:"): + return "convention" + if lowered.startswith("warning:"): + return "warning" + return "error" def __str__(self): - if not self.experimental: - return f"REAL ERROR: At {self.file_name}:{self.line_number}: {self.err_str}" - return f"At {self.file_name}:{self.line_number}: {self.err_str}" + return self.format() + + def format(self, *, show_experimental: bool = True) -> str: + code_prefix = f"[{self.code}] " if self.code else "" + if not self.experimental and show_experimental: + return f"REAL ERROR: {code_prefix}At {self.file_name}:{self.line_number}: {self.err_str}" + return f"{code_prefix}At {self.file_name}:{self.line_number}: {self.err_str}" def _make_yaml_parser() -> YAML: - yaml = YAML() + yaml = _RuamelYAML() yaml.allow_duplicate_keys = False return yaml @@ -1147,6 +1435,38 @@ def _normalize_expr(expr: str) -> str: return normalized.replace('"', "'") +def _lc_line(obj: Any) -> int: + """Return a 1-indexed line number from a ruamel.yaml object's position metadata. + + Falls back to 1 when no position data is available (e.g. plain dicts in tests). + """ + lc = getattr(obj, "lc", None) + if lc is not None: + line = getattr(lc, "line", None) + if line is not None: + return line + 1 + return 1 + + +def _lc_key_line(obj: Any, key: Any) -> int: + """Return a 1-indexed line number for a mapping key when available.""" + lc = getattr(obj, "lc", None) + if lc is not None: + key_getter = getattr(lc, "key", None) + if callable(key_getter): + try: + line_info = key_getter(key) + except (AttributeError, KeyError, TypeError): + line_info = None + if isinstance(line_info, tuple) and len(line_info) >= 1: + line = line_info[0] + if isinstance(line, int): + return line + 1 + # If key-specific location data is unavailable (e.g. no metadata for this key, + # unexpected type/shape, or non-integer line), fall back to the object's line. + return _lc_line(obj) + + def _contains_interview_order_marker(value: Any) -> bool: if isinstance(value, str): lowered = value.lower() @@ -1173,7 +1493,7 @@ def _extract_field_var_name(field_item: Any) -> Optional[str]: return None modifier_keys = DAFields.modifier_keys for key, value in field_item.items(): - if key in modifier_keys: + if key in modifier_keys: # pragma: no branch continue if isinstance(value, str): return value @@ -1307,7 +1627,7 @@ def _extract_conditional_fields_from_doc( { "field_var": field_var, "guards": guards, - "line_number": line_number + field_item.get("__line__", 1), + "line_number": line_number + _lc_line(field_item), } ) return conditional_fields @@ -1483,6 +1803,34 @@ def find_errors_from_string( if not input_file: input_file = "" + # Pre-process Jinja2 templates before YAML parsing only when the file + # explicitly opts in with '# use jinja' on the first line. + if _has_jinja_header(full_content): + rendered, render_errors = preprocess_jinja(full_content) + if render_errors: + return [ + _yaml_error( + code=e.code, + line_number=1, + file_name=input_file, + err_str=e.message, + ) + for e in render_errors + ] + # Strip the '# use jinja' header from the rendered output so the + # recursive call does not re-enter this branch. Add 1 to every + # returned line number to compensate for the removed header line. + _, _sep, rendered_body = rendered.partition("\n") + errors = find_errors_from_string( + rendered_body, + input_file=input_file, + lint_mode=lint_mode, + runtime_options=runtime_options, + ) + for err in errors: + err.line_number += 1 + return errors + exclusive_keys = [ key for key in types_of_blocks.keys() @@ -1492,25 +1840,52 @@ def find_errors_from_string( prior_conditional_fields: list[dict[str, Any]] = [] line_number = 1 for source_code in document_match.split(full_content): - lines_in_code = sum(l == "\n" for l in source_code) + lines_in_code = sum(source_line == "\n" for source_line in source_code) source_code = remove_trailing_dots.sub("", source_code) source_code = fix_tabs.sub(" ", source_code) try: doc = _with_line_metadata(yaml_parser.load(source_code)) except Exception as errMess: - if isinstance(errMess, MarkedYAMLError): + if isinstance(errMess, DuplicateKeyError): + # Extract just the key name from ruamel's verbose problem string: + # 'found duplicate key "foo" with value ... (original value: ...)' + key_match = re.match( + r'found duplicate key "([^"]+)"', errMess.problem or "" + ) + key_name = key_match.group(1) if key_match else "unknown" + dup_line = line_number + if errMess.problem_mark is not None: + dup_line = line_number + errMess.problem_mark.line + all_errors.append( + _yaml_error( + code=MessageCode.YAML_DUPLICATE_KEY, + line_number=dup_line, + file_name=input_file, + key_name=key_name, + ) + ) + elif isinstance(errMess, MarkedYAMLError): if errMess.context_mark is not None: errMess.context_mark.line += line_number - 1 if errMess.problem_mark is not None: errMess.problem_mark.line += line_number - 1 - all_errors.append( - YAMLError( - err_str=str(errMess), - line_number=line_number, - file_name=input_file, - experimental=False, + all_errors.append( + _yaml_error( + code=MessageCode.YAML_PARSE_ERROR, + line_number=line_number, + file_name=input_file, + error=str(errMess), + ) + ) + else: + all_errors.append( + _yaml_error( + code=MessageCode.YAML_PARSE_ERROR, + line_number=line_number, + file_name=input_file, + error=str(errMess), + ) ) - ) line_number += lines_in_code continue @@ -1536,6 +1911,8 @@ def find_errors_from_string( err_str=finding.message, line_number=finding.line_number, file_name=input_file, + experimental=is_experimental_code(finding.code), + code=finding.code, ) ) @@ -1557,13 +1934,11 @@ def find_errors_from_string( ] if len(any_types) == 0: all_errors.append( - YAMLError( - err_str=( - "Couldn't identify a block type: no valid combination of keys found " - "(looking for keys like question, include, metadata, code, objects, etc. See https://docassemble.org/docs.html)" - ), + _yaml_error( + code=MessageCode.NO_POSSIBLE_TYPES, line_number=line_number, file_name=input_file, + document=doc, ) ) posb_types = [block for block in exclusive_keys if block in doc_keys_lower] @@ -1574,10 +1949,11 @@ def find_errors_from_string( pass else: all_errors.append( - YAMLError( - err_str=f"Too many types this block could be: {posb_types}", + _yaml_error( + code=MessageCode.TOO_MANY_TYPES, line_number=line_number, file_name=input_file, + possible_types=posb_types, ) ) @@ -1592,11 +1968,11 @@ def find_errors_from_string( weird_keys.append(attr) if len(weird_keys) > 0: all_errors.append( - YAMLError( - err_str=f"Keys that shouldn't exist! {weird_keys}", + _yaml_error( + code=MessageCode.UNKNOWN_KEYS, line_number=line_number, file_name=input_file, - experimental=False, + keys=weird_keys, ) ) for key in doc.keys(): @@ -1606,10 +1982,12 @@ def find_errors_from_string( if lower_key in big_dict and "type" in big_dict[lower_key]: test = big_dict[lower_key]["type"](doc[key]) for err in test.errors: + err_msg, err_line, err_code = _normalize_validator_error(err) all_errors.append( - YAMLError( - err_str=f"{err[0]}", - line_number=err[1] + doc["__line__"] + line_number, + _yaml_error( + code=err_code, + err_str=err_msg, + line_number=err_line + _lc_line(doc) + line_number, file_name=input_file, ) ) @@ -1619,26 +1997,22 @@ def find_errors_from_string( ) for field_var, ref_line in unmatched_refs: all_errors.append( - YAMLError( - err_str=( - f'interview-order style block references "{field_var}" without a matching guard ' - f"for that field's show/hide logic; this can cause the interview to get stuck" - ), - line_number=doc["__line__"] + line_number + ref_line, + _yaml_error( + code=MessageCode.INTERVIEW_ORDER_UNMATCHED_GUARD, + line_number=_lc_line(doc) + line_number + ref_line, file_name=input_file, + field_var=field_var, ) ) nesting_depth = _max_screen_visibility_nesting_depth(doc) if nesting_depth > 2: all_errors.append( - YAMLError( - err_str=( - f"Warning: show if/hide if visibility logic is nested {nesting_depth} levels " - "on this screen (more than 2)" - ), - line_number=doc["__line__"] + line_number, + _yaml_error( + code=MessageCode.NESTED_VISIBILITY_LOGIC, + line_number=_lc_line(doc) + line_number, file_name=input_file, + depth=nesting_depth, ) ) @@ -1657,8 +2031,9 @@ def find_errors( ) -> list[YAMLError]: """Return list of YAMLError found in the given input_file - If the file has Docassemble's optional Jinja2 preprocessor directive at the top, - it is ignored and an empty list is returned. + If the file starts with the ``# use jinja`` header, the content is + pre-processed through Jinja2 (with undefined variables rendered as empty + strings) and the rendered output is then validated as normal YAML. Args: input_file (str): Path to the YAML file to check. @@ -1666,14 +2041,9 @@ def find_errors( Returns: list[YAMLError]: List of YAMLError instances found in the file. """ - with open(input_file, "r") as f: + with open(input_file, "r", encoding="utf-8") as f: full_content = f.read() - if full_content[:12] == "# use jinja\n": - print() - print(f"Ah Jinja! ignoring {input_file}") - return [] - return find_errors_from_string( full_content, input_file=input_file, @@ -1682,31 +2052,36 @@ def find_errors( ) -def _collect_yaml_files( - paths: list[Path], include_default_ignores: bool = True -) -> list[Path]: - from dayamlchecker.code_formatter import _collect_yaml_files as _formatter_collect - - return _formatter_collect(paths, include_default_ignores=include_default_ignores) - - -def _message_level(err_str: str) -> str: - lowered = err_str.lower() - if lowered.startswith("info:"): - return "info" - if lowered.startswith("warning:"): - return "warning" - return "error" - - def process_file( input_file, + quiet: bool = False, + display_path: str | None = None, + show_experimental: bool = False, lint_mode: str = DEFAULT_LINT_MODE, runtime_options: Optional[RuntimeOptions] = None, -) -> int: - """ + ignore_codes: frozenset[str] = frozenset(), + format_on_success: bool = False, + formatter_config: FormatterConfig | None = None, + ok_reporter: Callable[[], None] | None = None, + line_reporter: Callable[[str], None] | None = None, +) -> Literal["ok", "warning", "error", "skipped"]: + """Process a single file and report its validation status. + + Args: + input_file: Path to the YAML file to check. + quiet: If True, suppress output for successful and skipped files. + Errors are still printed. + display_path: Optional path string to use in output instead of the + full ``input_file`` path (e.g. a relative path). + show_experimental: If True, prefix non-experimental errors with + ``REAL ERROR:``. The default is False. Returns: - int: the number of errors found in the input_file + A string indicating the result of processing: + - "ok": The file was checked and no errors were found. + - "warning": The file was checked and only warnings/conventions were found. + - "error": The file was checked and one or more errors were found. + - "skipped": The file was not checked because it matches a known + pattern of files to ignore. """ for dumb_da_file in [ "pgcodecache.yml", @@ -1717,52 +2092,111 @@ def process_file( "examples.yml", ]: if input_file.endswith(dumb_da_file): - print() - print(f"ignoring {dumb_da_file}") - return 0 + if not quiet: + message = f"skipped: {display_path or input_file}" + if line_reporter is not None: + line_reporter(message) + else: + print(message) + return "skipped" - all_errors = find_errors( - input_file, lint_mode=lint_mode, runtime_options=runtime_options + with open(input_file, "r", encoding="utf-8") as f: + full_content = f.read() + + is_jinja = _has_jinja_header(full_content) + + all_errors = find_errors_from_string( + full_content, + input_file=display_path or input_file, + lint_mode=lint_mode, + runtime_options=runtime_options, ) + all_errors = [ + err + for err in all_errors + if err.code is None or err.code.upper() not in ignore_codes + ] - if len(all_errors) == 0: - print(".", end="") - return 0 - error_count = 0 - warning_count = 0 - info_count = 0 - for err in all_errors: - level = _message_level(err.err_str) - if level == "info": - info_count += 1 - elif level == "warning": - warning_count += 1 - else: - error_count += 1 - blocking_count = error_count + warning_count - print() - if info_count > 0: - print( - f"Found {len(all_errors)} issues ({error_count} errors, {warning_count} warnings, {info_count} infos):" + error_findings = [err for err in all_errors if err.severity == "error"] + warning_findings = [err for err in all_errors if err.severity == "warning"] + convention_findings = [err for err in all_errors if err.severity == "convention"] + + reformatted = False + if format_on_success and not error_findings: + formatted, changed = format_yaml_string( + full_content, + config=formatter_config, ) - elif warning_count > 0: - print( - f"Found {len(all_errors)} issues ({error_count} errors, {warning_count} warnings):" + if changed: + with open(input_file, "w", encoding="utf-8") as f: + f.write(formatted) + reformatted = True + + if len(all_errors) == 0: + if not quiet: + if reformatted: + message = f"reformatted: {display_path or input_file}" + if line_reporter is not None: + line_reporter(message) + else: + print(message) + elif ok_reporter is not None: + ok_reporter() + else: + label = "ok (jinja)" if is_jinja else "ok" + print(f"{label}: {display_path or input_file}") + return "ok" + + jinja_note = " (jinja)" if is_jinja else "" + + if error_findings: + header = ( + f"errors ({len(error_findings)}){jinja_note}: {display_path or input_file}" ) - else: - print(f"Found {len(all_errors)} errors:") - for err in all_errors: - print(f"{err}") - return blocking_count + if line_reporter is not None: + line_reporter(header) + else: + print(header) + for err in error_findings: + print(f" {err.format(show_experimental=show_experimental)}") + + if not quiet and warning_findings: + header = f"warnings ({len(warning_findings)}){jinja_note}: {display_path or input_file}" + if line_reporter is not None: + line_reporter(header) + else: + print(header) + for err in warning_findings: + print(f" {err.format(show_experimental=show_experimental)}") + + if not quiet and convention_findings: + header = f"conventions ({len(convention_findings)}){jinja_note}: {display_path or input_file}" + if line_reporter is not None: + line_reporter(header) + else: + print(header) + for err in convention_findings: + print(f" {err.format(show_experimental=show_experimental)}") + + if reformatted and not quiet: + message = f"reformatted: {display_path or input_file}" + if line_reporter is not None: + line_reporter(message) + else: + print(message) + if error_findings: + return "error" + return "warning" -def main(argv: Optional[list[str]] = None) -> int: + +def _build_arg_parser(*, require_files: bool = True) -> argparse.ArgumentParser: parser = argparse.ArgumentParser( description="Validate Docassemble YAML files", ) parser.add_argument( "files", - nargs="+", + nargs="+" if require_files else "*", type=Path, help="YAML files or directories to validate (directories are searched recursively)", ) @@ -1774,6 +2208,35 @@ def main(argv: Optional[list[str]] = None) -> int: "(.git*, .github*, build, dist, node_modules, sources)" ), ) + parser.add_argument( + "-q", + "--quiet", + action="store_true", + help="Suppress all output except errors", + ) + parser.add_argument( + "--no-summary", + action="store_true", + help="Do not print the summary line after processing", + ) + parser.add_argument( + "--format-on-success", + action="store_true", + help="Format files that pass YAML validation before running URL checks", + ) + parser.add_argument( + "--ignore-codes", + default="", + help=( + "Comma-separated diagnostic codes to suppress, " 'for example: "E410,E301"' + ), + ) + parser.add_argument( + "--show-experimental", + action=argparse.BooleanOptionalAction, + default=False, + help='Prefix non-experimental errors with "REAL ERROR:" (default: off)', + ) parser.add_argument( "--no-wcag", dest="wcag", @@ -1860,9 +2323,25 @@ def main(argv: Optional[list[str]] = None) -> int: default="warning", help="How to report URLs that could not be reached at all (default: warning)", ) - args = parser.parse_args(argv) + return parser + + +def main(argv: list[str] | None = None) -> int: + raw_argv = sys.argv[1:] if argv is None else argv + bootstrap_parser = _build_arg_parser(require_files=False) + bootstrap_args, _ = bootstrap_parser.parse_known_args(raw_argv) + bootstrap_files = bootstrap_args.files or [Path.cwd()] + config_cli_args = _collect_dayaml_cli_args(bootstrap_files) + + parser = _build_arg_parser(require_files=False) + args = parser.parse_args([*config_cli_args, *raw_argv]) + if not args.files: + args.files = [Path.cwd()] lint_mode = ACCESSIBILITY_LINT_MODE if args.wcag else DEFAULT_LINT_MODE + ignore_codes = _collect_dayaml_ignore_codes(args.files) | parse_ignore_codes( + args.ignore_codes + ) runtime_options = RuntimeOptions( accessibility_error_on_widgets=frozenset( widget.strip().lower() @@ -1870,6 +2349,17 @@ def main(argv: Optional[list[str]] = None) -> int: if widget.strip() ) ) + formatter_config = FormatterConfig() if args.format_on_success else None + + cwd = Path.cwd().resolve() + + def _display(file_path: Path) -> Path: + resolved = file_path.resolve() + try: + return resolved.relative_to(cwd) + except ValueError: + pass + return resolved yaml_files = _collect_yaml_files( args.files, include_default_ignores=not args.check_all @@ -1878,16 +2368,36 @@ def main(argv: Optional[list[str]] = None) -> int: print("No YAML files found.", file=sys.stderr) return 1 - failed = False + files_ok = 0 + files_warning = 0 + files_error = 0 + files_skipped = 0 + progress = _ProgressOutput() if not args.quiet else None + for input_file in yaml_files: - error_count = process_file( + status = process_file( str(input_file), + quiet=args.quiet, + display_path=str(_display(input_file)), + show_experimental=args.show_experimental, lint_mode=lint_mode, runtime_options=runtime_options, + ignore_codes=ignore_codes, + format_on_success=args.format_on_success, + formatter_config=formatter_config, + ok_reporter=progress.dot if progress is not None else None, + line_reporter=progress.line if progress is not None else None, ) - if error_count > 0: - failed = True + if status == "ok": + files_ok += 1 + elif status == "warning": + files_warning += 1 + elif status == "error": + files_error += 1 + else: + files_skipped += 1 + url_check_failed = False if args.url_check: url_check_root = ( args.url_check_root.resolve() @@ -1905,12 +2415,23 @@ def main(argv: Optional[list[str]] = None) -> int: document_severity=args.document_url_severity, unreachable_severity=args.unreachable_url_severity, ) - print_url_check_report(url_check_result) + if not args.quiet: + cast(_ProgressOutput, progress).finish() + print_url_check_report(url_check_result) if url_check_result.has_errors(): - failed = True + url_check_failed = True + + if not args.quiet and not args.no_summary: + cast(_ProgressOutput, progress).finish() + total = files_ok + files_warning + files_error + files_skipped + print( + f"Summary: {files_ok} ok, {files_warning} warnings, {files_error} errors, {files_skipped} skipped ({total} total)" + ) + elif progress is not None: + progress.finish() - return 1 if failed else 0 + return 1 if files_error > 0 or url_check_failed else 0 -if __name__ == "__main__": +if __name__ == "__main__": # pragma: no cover sys.exit(main()) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..3407aec --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +"""Test package markers for shared test helpers.""" diff --git a/tests/_cli_helpers.py b/tests/_cli_helpers.py new file mode 100644 index 0000000..c93bf58 --- /dev/null +++ b/tests/_cli_helpers.py @@ -0,0 +1,10 @@ +from dataclasses import dataclass + + +@dataclass +class RunResult: + """Shared helper capturing CLI invocation results for tests.""" + + returncode: int + stdout: str + stderr: str diff --git a/tests/fixtures/large_invalid_interview.yml b/tests/fixtures/large_invalid_interview.yml new file mode 100644 index 0000000..a0b5c07 --- /dev/null +++ b/tests/fixtures/large_invalid_interview.yml @@ -0,0 +1,282 @@ +# Intentionally broken docassemble interview fixture. +# +# This file covers all currently non-Jinja message codes from the registry. +# Each document has an id so the expected error code is obvious at a glance. + +--- +id: E301_unknown_keys +question: | + Fixture screen with unknown keys. +field: user_name +not_a_real_key: true +another_bad_key: nope + +--- +id: E101_yaml_duplicate_key +question: | + First version. +question: | + Duplicate version. + +--- +id: E102_yaml_parse_error +question: Hello +field: x +bad: [unclosed + +--- +id: W101_yaml_string_type +ga id: + - not + - a string +question: | + Non-string ga id should trigger YAMLStr validation. +field: ga_id_value + +--- +id: W111_mako_syntax_error +question: Hello +subquestion: | + % endfor +field: x + +--- +id: W112_mako_compile_error +question: Hello +subquestion: | + % if True +field: x + +--- +id: W121_python_code_type +code: + - some_list_item + +--- +id: W122_python_syntax_error +code: | + if True + x = 1 + +--- +id: W201_js_modifier_type +question: Test +fields: + - A: a + - B: b + js show if: + - nope + +--- +id: W204_js_unknown_screen_field +question: Test +fields: + - A: a + - B: b + js show if: | + val("missing_field") === true + +--- +id: W301_show_if_malformed +question: Test +field: top_level_show_if_malformed +show if: variable:a + +--- +id: W302_show_if_code_type +question: Test +field: top_level_show_if_code_type +show if: + code: + - a + - b + +--- +id: W303_show_if_code_syntax +question: Test +field: top_level_show_if_code_syntax +show if: + code: | + if True + x = 1 + +--- +id: W304_show_if_dict_keys +question: Test +field: top_level_show_if_dict_keys +show if: + unknown_key: something + +--- +id: W401_python_var_type +def: 123 +code: | + x = 1 + +--- +id: W402_python_var_whitespace +question: | + Fixture screen with a top-level field name containing whitespace. +field: some var name + +--- +id: W403_invalid_objects_block +objects: "MyObj: DAObject" + +--- +id: W404_fields_code_type +question: | + Fixture screen with a non-string fields: code reference. +fields: + code: 123 + +--- +id: W405_invalid_fields_dict +question: | + Fixture screen with an invalid fields dict. +fields: + not_code: value + +--- +id: W406_fields_scalar +question: | + Fixture screen with scalar fields. +fields: nope + +--- +id: W407_field_modifier_variable_type +question: Test +fields: + - A: a + - B: b + show if: + variable: + - a + is: yes + +--- +id: W408_field_modifier_unknown_variable_dict +question: Test +fields: + - A: a + - B: b + show if: + variable: prior_var + is: yes + +--- +id: W409_field_modifier_code_error +question: Test +fields: + - A: a + - B: b + show if: + code: | + if True + x = 1 + +--- +id: W410_field_modifier_same_screen_code +question: Test +fields: + - A: a + - B: b + show if: + code: | + a == 1 + +--- +id: W411_field_modifier_dict_keys +question: | + Fixture screen with malformed field metadata. +fields: + - Name: user name + datatype: text + - Follow up: follow_up + show if: + unknown_key: something + +--- +id: W412_field_modifier_unknown_variable_string +question: Test +fields: + - A: a + - B: b + show if: prior_var + +--- +id: W413_object_field_choices_code_dict +question: | + Choose enclosures +fields: + - no label: s3_enclosure_object.selected_enclosures + datatype: object_checkboxes + choices: + code: | + s3_enclosure_object.choices() + +--- +id: W601_no_possible_types +buttons: + - continue: true + +--- +id: W602_too_many_types +question: hi +code: | + x = 1 + +--- +id: W603_interview_order_source +question: Test +fields: + - First: a + datatype: yesnoradio + - Second: b + show if: a + +--- +id: W603_interview_order_unmatched_guard +mandatory: True +code: | + a + b + +--- +id: W604_W202_W203_W205_nested_js_visibility +question: | + Fixture screen with nested visibility and JavaScript modifier issues. +fields: + - Fruit: fruit + datatype: yesnoradio + - Vegetable: vegetable + datatype: yesnoradio + show if: fruit + - Grain: grain + datatype: yesnoradio + show if: vegetable + - Dairy: dairy + datatype: yesnoradio + show if: grain + - Broken JS syntax: broken_js_syntax + js show if: | + (val("fruit") === "apple" + - Missing val call: missing_val_call + js show if: | + true && false + - Unquoted val arg: unquoted_val + js show if: | + val(fruit) === "apple" + +--- +id: C101_missing_validation_error +question: | + Fixture screen with validation code that never calls validation_error(). +fields: + - Apples: number_apples + datatype: integer + - Oranges: number_oranges + datatype: integer +validation code: | + if number_apples + number_oranges != 10: + raise Exception("Bad total") diff --git a/tests/fixtures/large_invalid_jinja_syntax.yml b/tests/fixtures/large_invalid_jinja_syntax.yml new file mode 100644 index 0000000..ef03d91 --- /dev/null +++ b/tests/fixtures/large_invalid_jinja_syntax.yml @@ -0,0 +1,4 @@ +# use jinja +--- +id: E201_jinja_syntax_error +question: {% if %} diff --git a/tests/fixtures/large_invalid_jinja_template.yml b/tests/fixtures/large_invalid_jinja_template.yml new file mode 100644 index 0000000..e0d011a --- /dev/null +++ b/tests/fixtures/large_invalid_jinja_template.yml @@ -0,0 +1,4 @@ +# use jinja +--- +id: E202_jinja_template_error +question: {{ [1]|map("missing_filter")|list }} diff --git a/tests/fixtures/large_valid_interview.yml b/tests/fixtures/large_valid_interview.yml new file mode 100644 index 0000000..3a60e34 --- /dev/null +++ b/tests/fixtures/large_valid_interview.yml @@ -0,0 +1,68 @@ +# Intentionally valid docassemble interview fixture. +# +# Each block has an id so it is easy to refer to a specific section while +# reviewing validator behavior. All of these blocks should produce no errors. + +--- +id: VALID_simple_question +question: | + What is your full name? +field: user_name + +--- +id: VALID_fields_with_show_if +question: | + Tell us about your household. +fields: + - Do you have children?: has_children + datatype: yesnoradio + - How many children?: child_count + datatype: integer + show if: has_children + - Do you have pets?: has_pets + datatype: yesnoradio + - What kind of pets?: pet_types + datatype: text + show if: has_pets + +--- +id: VALID_js_show_if +question: | + Tell us about your transportation. +fields: + - Do you have a car?: has_car + datatype: yesnoradio + - Car make: car_make + js show if: | + val("has_car") === true + - Do you bike to work?: bikes_to_work + datatype: yesnoradio + - Bike brand: bike_brand + js show if: | + val("bikes_to_work") === true + +--- +id: VALID_objects_block +objects: + - client: Individual + - household: DAList + +--- +id: VALID_fields_dict_code_reference +question: | + Dynamic fields example. +fields: + code: generated_question_fields + +--- +id: VALID_validation_code +question: | + There should be ten fruit total. +fields: + - Apples: number_apples + datatype: integer + - Oranges: number_oranges + datatype: integer +validation code: | + if number_apples + number_oranges != 10: + validation_error("The numbers must add up to 10!") diff --git a/tests/test_accessibility_helpers.py b/tests/test_accessibility_helpers.py new file mode 100644 index 0000000..87b0865 --- /dev/null +++ b/tests/test_accessibility_helpers.py @@ -0,0 +1,295 @@ +from pathlib import Path +from unittest.mock import patch + +import pytest + +import dayamlchecker.accessibility as accessibility + + +def test_find_accessibility_findings_deduplicates_identical_results( + monkeypatch: pytest.MonkeyPatch, +) -> None: + finding = accessibility.AccessibilityFinding( + rule_id="duplicate-rule", + code="W799", + message="Accessibility: duplicate", + line_number=3, + ) + monkeypatch.setattr( + accessibility, + "_check_multifield_no_label_usage", + lambda doc, document_start_line: [finding, finding], + ) + monkeypatch.setattr( + accessibility, "_check_combobox_usage", lambda *args, **kwargs: [] + ) + monkeypatch.setattr(accessibility, "_check_tagged_pdf_for_docx", lambda *args: []) + monkeypatch.setattr( + accessibility, "_check_theme_css_contrast", lambda *args, **kwargs: [] + ) + monkeypatch.setattr(accessibility, "_iter_text_sections", lambda *args: []) + + assert accessibility.find_accessibility_findings( + doc={}, source_code="", document_start_line=1 + ) == [finding] + + +def test_combobox_check_skips_non_combobox_fields_when_enabled() -> None: + findings = accessibility._check_combobox_usage( + {"fields": [{"Name": "user_name", "datatype": "text"}]}, + "question: Hi\nfields:\n - Name: user_name\n", + 1, + options=accessibility.AccessibilityLintOptions( + error_on_widgets=frozenset({"combobox"}) + ), + ) + + assert findings == [] + + +def test_tagged_pdf_attachment_edge_cases() -> None: + dict_attachment = accessibility._check_tagged_pdf_for_docx( + {"attachments": {"template file": "letter.docx"}}, + "attachments:\n template file: letter.docx\n", + 1, + ) + skipped_attachments = accessibility._check_tagged_pdf_for_docx( + { + "attachments": [ + "not a dict", + {"name": "No template"}, + {"docx template file": "letter.docx", "tagged pdf": "yes"}, + ] + }, + "attachments: []\n", + 1, + ) + + assert len(dict_attachment) == 1 + assert skipped_attachments == [] + + +def test_text_sections_and_accessible_media_are_skipped() -> None: + source = """question: | + ![Logo](logo.png) + [FILE logo.png, 100px, Logo] + Logo +under: + content: +help: + label: Help text +""" + sections = accessibility._iter_text_sections( + { + "question": '![Logo](logo.png)\n[FILE logo.png, 100px, Logo]\nLogo', + "under": {"content": ''}, + "help": {"label": "Help text"}, + }, + source, + ) + + assert [section.location for section in sections] == [ + "question", + "under.content", + "help.label", + ] + assert accessibility._check_missing_alt_text(sections[0], source, 1) == [] + assert accessibility._check_empty_link_text(sections[1], source, 1) == [] + + +def test_line_field_truthy_and_attachment_helpers() -> None: + assert accessibility._find_top_level_key_line("question: Hi\n", "missing") is None + assert accessibility._absolute_line_number("question: Hi\n", 10, 1, "absent") == 10 + assert accessibility._find_snippet_line("question: Hi\n", "") is None + assert accessibility._find_snippet_line("question: Hi\n", "absent") is None + assert accessibility._iter_fields({"fields": {"Name": "user_name"}}) == [ + {"Name": "user_name"} + ] + assert accessibility._extract_field_variable({"Name": "user_name"}) == "user_name" + assert ( + accessibility._extract_field_variable({"Age": 42, "Name": "user_name"}) + == "user_name" + ) + assert ( + accessibility._extract_field_variable({"no label": "court_county"}) + == "court_county" + ) + assert accessibility._extract_field_variable({"no label": True}) == "" + assert accessibility._extract_field_variable({"no label": "true"}) == "" + assert accessibility._extract_field_label({"label": "Explicit"}) == "Explicit" + assert accessibility._is_truthy(1) + assert not accessibility._is_truthy(0) + assert accessibility._is_truthy("yes") + assert not accessibility._is_truthy("no") + assert accessibility._attachment_uses_docx({"template": "letter.docx"}) + assert accessibility._attachment_uses_docx({"filename": "letter.docx"}) + assert not accessibility._attachment_uses_docx({"filename": "letter.pdf"}) + + +def test_bootstrap_theme_css_loader_paths_and_errors( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + theme = tmp_path / "theme.css" + theme.write_text("body { color: #000; background: #fff; }", encoding="utf-8") + interview = tmp_path / "questions" / "interview.yml" + interview.parent.mkdir() + relative_theme = interview.parent / "relative.css" + relative_theme.write_text( + "body { color: #111; background: #fff; }", encoding="utf-8" + ) + monkeypatch.chdir(tmp_path) + + assert accessibility._load_bootstrap_theme_css("", input_file=None) is None + assert ( + accessibility._load_bootstrap_theme_css( + "https://example.com/theme.css", input_file=None + ) + is None + ) + assert accessibility._load_bootstrap_theme_css( + str(theme), input_file=None + ) == theme.read_text(encoding="utf-8") + assert accessibility._load_bootstrap_theme_css( + "relative.css", input_file=str(interview) + ) == relative_theme.read_text(encoding="utf-8") + assert ( + accessibility._load_bootstrap_theme_css( + "missing.css", input_file=str(interview) + ) + is None + ) + + with patch.object(accessibility.Path, "exists", side_effect=OSError): + assert ( + accessibility._load_bootstrap_theme_css(str(theme), input_file=None) is None + ) + + +def test_theme_contrast_skips_missing_css_and_unmatched_components( + monkeypatch: pytest.MonkeyPatch, +) -> None: + missing_css = accessibility._check_theme_css_contrast( + {"features": {"bootstrap theme": "missing.css"}}, + "features:\n bootstrap theme: missing.css\n", + 1, + ) + assert missing_css == [] + + monkeypatch.setattr( + accessibility, + "_load_bootstrap_theme_css", + lambda *args, **kwargs: ".unrelated { color: #777; background: #777; }", + ) + unmatched = accessibility._check_theme_css_contrast( + {"features": {"bootstrap theme": "theme.css"}}, + "features:\n bootstrap theme: theme.css\n", + 1, + ) + assert unmatched == [] + + +def test_css_parsing_and_color_helpers() -> None: + selector_props, variables = accessibility._parse_css_rules( + ".empty { } :root { --fg: #abc; } .body { color: var(--fg); }" + ) + assert variables == {"--fg": "#abc"} + assert selector_props + assert accessibility._best_component_color_pair( + [ + (["body"], {"color": "#000"}), + (["body"], {"background-color": "#fff"}), + ], + {}, + ["body"], + ) == ((0.0, 0.0, 0.0), (1.0, 1.0, 1.0)) + assert ( + accessibility._best_component_color_pair( + [([".other"], {"color": "#000"})], {}, ["body"] + ) + is None + ) + assert accessibility._parse_css_declarations( + "color: #000; background: #fff; ignored" + ) == { + "color": "#000", + "background": "#fff", + } + assert accessibility._parse_css_declarations(": missing; color: #000") == { + "color": "#000" + } + + assert accessibility._resolve_css_color(None, {}) is None + assert accessibility._resolve_css_color("var(--missing, #fff)", {}) == ( + 1.0, + 1.0, + 1.0, + ) + assert accessibility._resolve_css_color("var(--missing)", {}) is None + assert accessibility._resolve_css_color("not-a-color", {}) is None + assert accessibility._resolve_css_color("#abc", {}) == pytest.approx( + (170 / 255, 187 / 255, 204 / 255) + ) + assert accessibility._resolve_css_color("rgb(10, 20, 30)", {}) == pytest.approx( + (10 / 255, 20 / 255, 30 / 255) + ) + assert accessibility._resolve_css_color("rgb(10, 20)", {}) is None + assert accessibility._resolve_css_color("rgb(nope, 20, 30)", {}) is None + assert accessibility._resolve_css_color("black", {}) == (0.0, 0.0, 0.0) + assert accessibility._resolve_css_color("white", {}) == (1.0, 1.0, 1.0) + assert accessibility._resolve_css_color("rebeccapurple", {}) is None + + assert accessibility._extract_color_token("#abc") == "#abc" + assert accessibility._extract_color_token("rgb(1, 2, 3)") == "rgb(1, 2, 3)" + assert accessibility._extract_color_token("var(--fg)") == "var(--fg)" + assert accessibility._extract_color_token("border: #abcdef,") == "#abcdef" + assert accessibility._extract_color_token("color rgb(4,5,6)") == "rgb(4,5,6)" + assert accessibility._extract_color_token("color black") == "black" + assert accessibility._extract_color_token("none") is None + + assert accessibility._parse_rgb_channel("50%") == 0.5 + assert accessibility._parse_rgb_channel("300") == 1.0 + assert accessibility._parse_rgb_channel("-5") == 0.0 + assert accessibility._relative_luminance((0.0, 0.0, 0.0)) == 0.0 + + +def test_extract_color_token_loop_rgb_branch(monkeypatch: pytest.MonkeyPatch) -> None: + real_rgb_re = accessibility._RGB_COLOR_RE + + class DelayedRgbRegex: + def __init__(self) -> None: + self.calls = 0 + + def search(self, value: str): + self.calls += 1 + if self.calls == 1: + return None + return real_rgb_re.search(value) + + monkeypatch.setattr(accessibility, "_RGB_COLOR_RE", DelayedRgbRegex()) + + assert accessibility._extract_color_token("color rgb(4,5,6)") == "rgb(4,5,6)" + + +def test_extract_links_from_text_includes_html_attributes() -> None: + links = accessibility._extract_links_from_text( + '[Label](https://example.com) Next' + ) + + assert links == [ + { + "kind": "markdown", + "text": "Label", + "target": "https://example.com", + "aria_label": "", + "title": "", + "snippet": "[Label](https://example.com)", + }, + { + "kind": "html", + "text": "Next", + "target": "/next", + "aria_label": "", + "title": "Next title", + "snippet": 'Next', + }, + ] diff --git a/tests/test_check_questions_urls.py b/tests/test_check_questions_urls.py index e9aabe9..28ab104 100644 --- a/tests/test_check_questions_urls.py +++ b/tests/test_check_questions_urls.py @@ -1,14 +1,35 @@ +import runpy +import sys +import threading from pathlib import Path +from types import SimpleNamespace from typing import cast +import pytest +import requests from linkify_it import LinkifyIt # type: ignore[attr-defined,import-untyped] from requests import Session import dayamlchecker.check_questions_urls as check_questions_urls from dayamlchecker.check_questions_urls import ( + URLCheckResult, + URLIssue, + URLSourceCollection, + build_session, check_urls, + collect_urls, + collect_urls_from_files, extract_text_from_pdf, + extract_text_from_docx, extract_urls_from_file, + infer_root, + iter_document_files, + iter_question_files, + parse_args, + parse_ignore_urls, + parse_url_token, + print_url_check_report, + run_url_check, ) @@ -232,6 +253,28 @@ def __init__(self, _: Path) -> None: ) +def test_extract_text_from_pdf_skips_empty_page_text( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + file_path = tmp_path / "example.pdf" + file_path.write_bytes(b"%PDF-1.4\n") + + class FakePage: + def __init__(self, text: str) -> None: + self.text = text + + def extract_text(self) -> str: + return self.text + + class FakeReader: + def __init__(self, _: Path) -> None: + self.pages = [FakePage(""), FakePage("Kept text")] + + monkeypatch.setattr(check_questions_urls, "PdfReader", FakeReader) + + assert extract_text_from_pdf(file_path) == "Kept text" + + def test_extract_wrapped_pdf_url_repairs_finds_joined_candidate() -> None: repairs = check_questions_urls._extract_wrapped_pdf_url_repairs( "Visit https://www.courts.michigan.gov/49752a/siteassets/forms/scao-\n" @@ -245,6 +288,21 @@ def test_extract_wrapped_pdf_url_repairs_finds_joined_candidate() -> None: } +def test_extract_wrapped_pdf_url_repairs_skips_non_repair_lines() -> None: + assert ( + check_questions_urls._extract_wrapped_pdf_url_repairs( + "Visit https://example.org/complete\nnext line" + ) + == {} + ) + assert ( + check_questions_urls._extract_wrapped_pdf_url_repairs( + "Visit https://example.org/path-\nhttps://example.org/other" + ) + == {} + ) + + def test_check_urls_tries_pdf_repair_candidates_after_dead_link() -> None: class FakeResponse: def __init__(self, status_code: int) -> None: @@ -286,3 +344,720 @@ def get(self, url: str, **kwargs) -> FakeResponse: "https://www.courts.michigan.gov/49752a/siteassets/forms/scao-", "https://www.courts.michigan.gov/49752a/siteassets/forms/scao-approved/dhs1201d.pdf", ] + + +def test_check_urls_runs_primary_requests_concurrently() -> None: + class FakeResponse: + status_code = 200 + + def __enter__(self) -> "FakeResponse": + return self + + def __exit__(self, exc_type, exc, tb) -> None: + return None + + class FakeSession: + def __init__(self) -> None: + self.barrier = threading.Barrier(2, timeout=1) + self.lock = threading.Lock() + self.calls: list[str] = [] + + def get(self, url: str, **kwargs) -> FakeResponse: + with self.lock: + self.calls.append(url) + self.barrier.wait() + return FakeResponse() + + session = FakeSession() + broken, unreachable = check_urls( + cast(Session, session), + ["https://b.test", "https://a.test"], + timeout=10, + ) + + assert broken == [] + assert unreachable == [] + assert set(session.calls) == {"https://a.test", "https://b.test"} + + +def test_url_result_helpers_report_warnings_and_unique_urls() -> None: + result = URLCheckResult( + checked_url_count=1, + ignored_url_count=0, + issues=( + URLIssue( + severity="warning", + category="unreachable", + source_kind="yaml", + url="https://down.example", + sources=("question.yml",), + ), + ), + ) + collection = URLSourceCollection( + yaml_urls={"https://a.example": {"question.yml"}}, + document_urls={ + "https://a.example": {"template.docx"}, + "https://b.example": {"template.docx"}, + }, + yaml_concatenated={}, + document_concatenated={}, + yaml_repairs={}, + document_repairs={}, + ) + + assert result.has_warnings() + assert collection.unique_url_count == 2 + + +def test_parse_args_reads_all_url_checker_options( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr( + sys, + "argv", + [ + "check_questions_urls", + "--root", + "project", + "--timeout", + "3", + "--skip-templates", + "--ignore-urls", + "https://ignore.example", + "--question-url-severity", + "warning", + "--template-url-severity", + "error", + "--document-url-severity", + "ignore", + "--unreachable-url-severity", + "error", + ], + ) + + args = parse_args() + + assert args.root == "project" + assert args.timeout == 3 + assert args.skip_templates is True + assert args.ignore_urls == "https://ignore.example" + assert args.yaml_url_severity == "warning" + assert args.document_url_severity == "ignore" + assert args.unreachable_url_severity == "error" + + +def test_package_dir_iterators_handle_supplied_and_discovered_dirs( + tmp_path: Path, +) -> None: + missing_root = tmp_path / "missing-root" + assert list(check_questions_urls._iter_package_dirs(missing_root)) == [] + + package_dir = tmp_path / "docassemble" / "Demo" + package_dir.mkdir(parents=True) + (tmp_path / "docassemble" / "not-a-package.txt").write_text("x", encoding="utf-8") + assert list(check_questions_urls._iter_package_dirs(tmp_path)) == [package_dir] + + supplied_missing = tmp_path / "missing" + assert list( + check_questions_urls._iter_package_dirs( + tmp_path, package_dirs=[package_dir, package_dir, supplied_missing] + ) + ) == [package_dir.resolve()] + + +def test_question_and_document_file_iterators_filter_suffixes(tmp_path: Path) -> None: + package_dir = tmp_path / "docassemble" / "Demo" + questions = package_dir / "data" / "questions" + templates = package_dir / "data" / "templates" + questions.mkdir(parents=True) + templates.mkdir(parents=True) + question_file = questions / "interview.yml" + ignored_question = questions / "image.png" + template_pdf = templates / "letter.pdf" + template_text = templates / "letter.md" + ignored_template = templates / "archive.zip" + for file_path in [ + question_file, + ignored_question, + template_pdf, + template_text, + ignored_template, + ]: + file_path.write_text("x", encoding="utf-8") + + assert list(iter_question_files(tmp_path)) == [question_file] + assert set(iter_document_files(tmp_path)) == {template_pdf, template_text} + + empty_package = tmp_path / "docassemble" / "Empty" + empty_package.mkdir() + assert list(iter_question_files(tmp_path, package_dirs=[empty_package])) == [] + assert list(iter_document_files(tmp_path, package_dirs=[empty_package])) == [] + + +def test_infer_root_uses_docassemble_parent_common_parent_fallback_and_cwd( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + package_file = tmp_path / "docassemble" / "Demo" / "data" / "questions" / "a.yml" + package_file.parent.mkdir(parents=True) + package_file.write_text("question: Hi", encoding="utf-8") + outside_root = tmp_path.parent / f"{tmp_path.name}-outside-root" + outside_one = outside_root / "outside" / "one.yml" + outside_two = outside_root / "outside" / "nested" / "two.yml" + outside_one.parent.mkdir(parents=True) + outside_two.parent.mkdir() + outside_one.write_text("x", encoding="utf-8") + outside_two.write_text("x", encoding="utf-8") + + assert infer_root([package_file]) == tmp_path + assert infer_root([outside_one, outside_two]) == outside_one.parent + assert infer_root([], fallback=outside_one) == outside_one.resolve() + monkeypatch.chdir(tmp_path) + assert infer_root([]) == tmp_path + + +def test_build_session_sets_retrying_user_agent() -> None: + session = build_session() + + assert "ALActions-da_build-url-checker" in session.headers["User-Agent"] + assert "https://" in session.adapters + + +def test_extract_text_from_pdf_and_docx_error_paths( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] +) -> None: + pdf_path = tmp_path / "bad.pdf" + docx_path = tmp_path / "bad.docx" + pdf_path.write_bytes(b"not really a pdf") + docx_path.write_bytes(b"not really a docx") + + class BrokenReader: + def __init__(self, _: Path) -> None: + raise RuntimeError("pdf exploded") + + def broken_docx(_: Path) -> object: + raise RuntimeError("docx exploded") + + monkeypatch.setattr(check_questions_urls, "PdfReader", BrokenReader) + monkeypatch.setattr(check_questions_urls, "docx2python", broken_docx) + + assert extract_text_from_pdf(pdf_path) == "" + assert extract_text_from_docx(docx_path) == "" + err = capsys.readouterr().err + assert "could not extract text from PDF" in err + assert "could not extract text from DOCX" in err + + +def test_extract_text_from_docx_success( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + docx_path = tmp_path / "ok.docx" + docx_path.write_bytes(b"docx") + + monkeypatch.setattr( + check_questions_urls, + "docx2python", + lambda _: SimpleNamespace(text="Text from document"), + ) + + assert extract_text_from_docx(docx_path) == "Text from document" + + +@pytest.mark.parametrize( + ("raw_url", "expected"), + [ + ("", (None, False)), + ("www.example.com", (None, False)), + ("https://a.example/pathhttps://b.example", (None, True)), + ('https://a.example/"quoted"', (None, False)), + ("https:///missing-host", (None, False)), + ( + "https://a.example/form?next=https://nested.example/path#frag", + ("https://a.example/form", False), + ), + ("https://a.example/path).", ("https://a.example/path", False)), + ], +) +def test_parse_url_token_edge_cases( + raw_url: str, expected: tuple[str | None, bool] +) -> None: + assert parse_url_token(raw_url) == expected + + +def test_parse_ignore_urls_skips_empty_and_malformed_values() -> None: + assert parse_ignore_urls("") == set() + assert parse_ignore_urls( + "\nhttps://keep.example, https://bad.examplehttps://other.example, not-a-url" + ) == {"https://keep.example"} + + +def test_comment_strippers_handle_token_errors_and_empty_blocks() -> None: + incomplete_python = "value = '''unterminated" + + assert ( + check_questions_urls._strip_python_comments(incomplete_python) + == incomplete_python + ) + assert check_questions_urls._extract_wrapped_pdf_url_repairs("one line only") == {} + assert ( + check_questions_urls._strip_python_comments_from_indented_block(["\n", " \n"]) + == "\n \n" + ) + + +def test_yaml_comment_stripping_handles_escaped_quotes_and_block_boundaries() -> None: + text = "\n".join( + [ + 'title: "He said \\"# not a comment\\"" # comment', + "code: |", + " # python comment", + " live = 'https://live.example'", + "next: value # trailing", + "", + ] + ) + + stripped = check_questions_urls._strip_yaml_comments(text) + + assert "# python comment" not in stripped + assert "https://live.example" in stripped + assert "# trailing" not in stripped + assert check_questions_urls._is_unescaped_double_quote('x = "quoted"', 4) + assert not check_questions_urls._is_unescaped_double_quote('x = \\"escaped"', 5) + + +def test_prepare_and_extract_urls_from_non_text_document_cases( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + linkify = LinkifyIt(options={"fuzzy_link": False}) + binary_path = tmp_path / "binary.yml" + binary_path.write_bytes(b"\xff\xfe") + empty_pdf = tmp_path / "empty.pdf" + empty_pdf.write_bytes(b"pdf") + docx_path = tmp_path / "links.docx" + docx_path.write_bytes(b"docx") + text_path = tmp_path / "links.txt" + text_path.write_text( + "https://example.com/reserved https://ok.test/path https://bad.test/ahttps://bad.test/b", + encoding="utf-8", + ) + + monkeypatch.setattr(check_questions_urls, "extract_text_from_pdf", lambda _: "") + monkeypatch.setattr( + check_questions_urls, "extract_text_from_docx", lambda _: "https://doc.test" + ) + + assert ( + check_questions_urls._prepare_text_for_url_extraction(text_path, "a # b") + == "a # b" + ) + assert check_questions_urls._extract_urls_from_file_detailed( + binary_path, linkify + ) == ([], [], {}) + assert check_questions_urls._extract_urls_from_file_detailed( + empty_pdf, linkify + ) == ([], [], {}) + assert check_questions_urls._extract_urls_from_file_detailed( + docx_path, linkify + ) == ( + ["https://doc.test"], + [], + {}, + ) + urls, concatenated, repairs = check_questions_urls._extract_urls_from_file_detailed( + text_path, linkify + ) + assert urls == ["https://ok.test/path"] + assert concatenated == ["https://bad.test/ahttps://bad.test/b"] + assert repairs == {} + + +def test_extract_urls_from_file_detailed_skips_matches_without_valid_urls( + tmp_path: Path, +) -> None: + text_path = tmp_path / "links.txt" + text_path.write_text("placeholder", encoding="utf-8") + + class FakeLinkify: + def match(self, text: str): + return [SimpleNamespace(url="mailto:help@example.org")] + + assert check_questions_urls._extract_urls_from_file_detailed( + text_path, cast(LinkifyIt, FakeLinkify()) + ) == ([], [], {}) + + +def test_collect_urls_from_files_tracks_sources_concatenated_and_repairs( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + first = tmp_path / "first.yml" + second = tmp_path / "second.yml" + first.write_text("x", encoding="utf-8") + second.write_text("x", encoding="utf-8") + + def fake_extract( + file_path: Path, linkify: LinkifyIt + ) -> tuple[list[str], list[str], dict[str, set[str]]]: + if file_path == first: + return ( + ["https://one.test"], + ["https://bad.testhttps://other.test"], + {"https://one.test": {"https://fixed.test"}}, + ) + return ["https://one.test", "https://two.test"], [], {} + + monkeypatch.setattr( + check_questions_urls, "_extract_urls_from_file_detailed", fake_extract + ) + + urls, concatenated, repairs = collect_urls_from_files([first, second], tmp_path) + + assert urls == { + "https://one.test": {"first.yml", "second.yml"}, + "https://two.test": {"second.yml"}, + } + assert concatenated == {"https://bad.testhttps://other.test": {"first.yml"}} + assert repairs == {"https://one.test": {"https://fixed.test"}} + + +def test_collect_urls_defaults_question_files_and_can_skip_documents( + tmp_path: Path, +) -> None: + package_dir = tmp_path / "docassemble" / "Demo" + questions = package_dir / "data" / "questions" + templates = package_dir / "data" / "templates" + questions.mkdir(parents=True) + templates.mkdir(parents=True) + (questions / "interview.yml").write_text( + "question: https://question.test", encoding="utf-8" + ) + (templates / "letter.md").write_text("https://template.test", encoding="utf-8") + + with_documents = collect_urls(tmp_path) + without_documents = collect_urls(tmp_path, check_documents=False) + + assert with_documents.yaml_urls == { + "https://question.test": {"docassemble/Demo/data/questions/interview.yml"} + } + assert with_documents.document_urls == { + "https://template.test": {"docassemble/Demo/data/templates/letter.md"} + } + assert without_documents.document_urls == {} + + +def test_check_single_url_reports_or_suppresses_unreachable( + capsys: pytest.CaptureFixture[str], +) -> None: + class FailingSession: + def get(self, url: str, **kwargs) -> object: + raise requests.Timeout("too slow") + + status, unreachable = check_questions_urls._check_single_url( + cast(Session, FailingSession()), "https://slow.test", 1 + ) + assert (status, unreachable) == (None, True) + assert "could not check https://slow.test" in capsys.readouterr().err + + status, unreachable = check_questions_urls._check_single_url( + cast(Session, FailingSession()), + "https://slow.test", + 1, + report_unreachable=False, + ) + assert (status, unreachable) == (None, True) + assert capsys.readouterr().err == "" + + +def test_check_urls_covers_skip_unreachable_broken_and_repair_paths() -> None: + class FakeResponse: + def __init__(self, status_code: int) -> None: + self.status_code = status_code + + def __enter__(self) -> "FakeResponse": + return self + + def __exit__(self, exc_type, exc, tb) -> None: + return None + + class FakeSession: + def get(self, url: str, **kwargs) -> FakeResponse: + if url == "https://unreachable.test": + raise requests.ConnectionError("offline") + if url == "https://ok.test": + return FakeResponse(200) + if url in { + "https://broken.test", + "https://repair-whitelisted.test", + "https://repair-bad.test", + "https://repair-unreachable.test", + "https://still-bad.test", + }: + return FakeResponse(404) + if url == "https://repair-candidate-unreachable.test": + raise requests.Timeout("offline") + raise AssertionError(url) + + broken, unreachable = check_urls( + cast(Session, FakeSession()), + [ + "https://api.openai.com/v1/chat/completions", + "https://broken.test", + "https://ok.test", + "https://repair-bad.test", + "https://repair-unreachable.test", + "https://repair-whitelisted.test", + "https://unreachable.test", + ], + timeout=1, + repair_candidates={ + "https://repair-whitelisted.test": {"https://api.openai.com/v1/models"}, + "https://repair-bad.test": {"https://still-bad.test"}, + "https://repair-unreachable.test": { + "https://repair-candidate-unreachable.test" + }, + }, + ) + + assert broken == [ + ("https://broken.test", 404), + ("https://repair-bad.test", 404), + ("https://repair-unreachable.test", 404), + ] + assert unreachable == ["https://unreachable.test"] + + +def test_append_issue_respects_ignore_and_document_severity() -> None: + issues: list[URLIssue] = [] + + check_questions_urls._append_issue( + issues, + category="broken", + source_kind="yaml", + url="https://ignored.test", + sources={"question.yml"}, + yaml_severity="ignore", + document_severity="warning", + unreachable_severity="warning", + ) + check_questions_urls._append_issue( + issues, + category="broken", + source_kind="template", + url="https://template.test", + sources={"template.docx"}, + yaml_severity="error", + document_severity="warning", + unreachable_severity="error", + status_code=410, + ) + + assert issues == [ + URLIssue( + severity="warning", + category="broken", + source_kind="template", + url="https://template.test", + sources=("template.docx",), + status_code=410, + ) + ] + + +def test_run_url_check_builds_sorted_issues_for_all_categories( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + collected = URLSourceCollection( + yaml_urls={ + "https://ignored.test": {"question.yml"}, + "https://broken-yaml.test": {"question.yml"}, + "https://unreachable-yaml.test": {"question.yml"}, + "https://shared-broken.test": {"question.yml"}, + }, + document_urls={ + "https://ignored.test": {"template.docx"}, + "https://broken-template.test": {"template.docx"}, + "https://unreachable-template.test": {"template.docx"}, + "https://shared-broken.test": {"template.docx"}, + }, + yaml_concatenated={"https://bad-yaml.testhttps://other.test": {"question.yml"}}, + document_concatenated={ + "https://bad-template.testhttps://other.test": {"template.docx"} + }, + yaml_repairs={"https://broken-yaml.test": {"https://fixed-yaml.test"}}, + document_repairs={ + "https://broken-template.test": {"https://fixed-template.test"} + }, + ) + captured_repairs: dict[str, set[str]] = {} + + monkeypatch.setattr( + check_questions_urls, "collect_urls", lambda **kwargs: collected + ) + monkeypatch.setattr(check_questions_urls, "build_session", lambda: object()) + + def fake_check_urls( + session: object, + urls: set[str], + timeout: int, + repair_candidates: dict[str, set[str]], + ): + captured_repairs.update(repair_candidates) + return ( + [ + ("https://broken-yaml.test", 404), + ("https://broken-template.test", 410), + ("https://shared-broken.test", 404), + ], + ["https://unreachable-yaml.test", "https://unreachable-template.test"], + ) + + monkeypatch.setattr(check_questions_urls, "check_urls", fake_check_urls) + + result = run_url_check(root=tmp_path, ignore_urls=["https://ignored.test"]) + + assert result.checked_url_count == 5 + assert result.ignored_url_count == 1 + assert captured_repairs["https://broken-yaml.test"] == {"https://fixed-yaml.test"} + assert [ + (issue.category, issue.source_kind, issue.url) for issue in result.issues + ] == [ + ("concatenated", "yaml", "https://bad-yaml.testhttps://other.test"), + ("broken", "yaml", "https://broken-yaml.test"), + ("broken", "yaml", "https://shared-broken.test"), + ("concatenated", "template", "https://bad-template.testhttps://other.test"), + ("broken", "template", "https://broken-template.test"), + ("broken", "template", "https://shared-broken.test"), + ("unreachable", "yaml", "https://unreachable-yaml.test"), + ("unreachable", "template", "https://unreachable-template.test"), + ] + + +def test_run_url_check_skips_network_when_no_urls( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + monkeypatch.setattr( + check_questions_urls, + "collect_urls", + lambda **kwargs: URLSourceCollection({}, {}, {}, {}, {}, {}), + ) + monkeypatch.setattr( + check_questions_urls, + "build_session", + lambda: pytest.fail("session should not be built without URLs"), + ) + + assert run_url_check(root=tmp_path).checked_url_count == 0 + + +def test_print_url_check_report_covers_empty_success_and_issue_buckets( + capsys: pytest.CaptureFixture[str], +) -> None: + print_url_check_report( + URLCheckResult(checked_url_count=0, ignored_url_count=1, issues=()) + ) + output = capsys.readouterr().out + assert "Ignoring 1 URL" in output + assert "No absolute URLs found" in output + + print_url_check_report( + URLCheckResult(checked_url_count=2, ignored_url_count=0, issues=()) + ) + assert "Checked 2 URLs" in capsys.readouterr().out + + print_url_check_report( + URLCheckResult( + checked_url_count=3, + ignored_url_count=0, + issues=( + URLIssue( + "error", + "broken", + "yaml", + "https://broken.test", + ("question.yml",), + 404, + ), + URLIssue( + "warning", + "unreachable", + "template", + "https://slow.test", + ("template.docx",), + ), + ), + ) + ) + output = capsys.readouterr().out + assert "URL checker errors" in output + assert "[404] https://broken.test" in output + assert "URL checker warnings" in output + assert "https://slow.test (found in: template.docx)" in output + + +def test_main_wires_args_and_returns_status( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + args = SimpleNamespace( + root=str(tmp_path), + timeout=7, + skip_templates=True, + ignore_urls="https://ignore.test", + yaml_url_severity="warning", + document_url_severity="ignore", + unreachable_url_severity="error", + ) + captured: dict[str, object] = {} + result = URLCheckResult( + checked_url_count=1, + ignored_url_count=0, + issues=( + URLIssue("error", "broken", "yaml", "https://bad.test", ("q.yml",), 404), + ), + ) + + def fake_run_url_check(**kwargs): + captured.update(kwargs) + return result + + monkeypatch.setattr(check_questions_urls, "parse_args", lambda: args) + monkeypatch.setattr(check_questions_urls, "run_url_check", fake_run_url_check) + monkeypatch.setattr(check_questions_urls, "print_url_check_report", lambda _: None) + + assert check_questions_urls.main() == 1 + assert captured == { + "root": tmp_path.resolve(), + "timeout": 7, + "check_documents": False, + "ignore_urls": {"https://ignore.test"}, + "yaml_severity": "warning", + "document_severity": "ignore", + "unreachable_severity": "error", + } + + monkeypatch.setattr( + check_questions_urls, + "run_url_check", + lambda **kwargs: URLCheckResult(0, 0, ()), + ) + assert check_questions_urls.main() == 0 + + +def test_module_main_guard_exits_with_main_status( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] +) -> None: + monkeypatch.setattr( + sys, + "argv", + [ + "check_questions_urls.py", + "--root", + str(tmp_path), + "--skip-templates", + ], + ) + + with pytest.raises(SystemExit) as exc_info: + runpy.run_path(str(Path(check_questions_urls.__file__)), run_name="__main__") + + assert exc_info.value.code == 0 + assert "No absolute URLs found" in capsys.readouterr().out diff --git a/tests/test_code_formatter.py b/tests/test_code_formatter.py index fcdcf73..947ce3d 100644 --- a/tests/test_code_formatter.py +++ b/tests/test_code_formatter.py @@ -1,10 +1,18 @@ import unittest +from pathlib import Path +from unittest.mock import patch + +from dayamlchecker._files import _collect_yaml_files +from dayamlchecker._jinja import ( + _contains_jinja_syntax, + _has_jinja_header, +) from dayamlchecker.code_formatter import ( - format_python_code, - format_yaml_string, FormatterConfig, _convert_indent_4_to_2, _strip_common_indent, + format_python_code, + format_yaml_string, ) @@ -73,6 +81,17 @@ def test_format_nested_indentation(self): # Nested should be 4 spaces (2 * 2) instead of 8 (4 * 2) self.assertIn("\n x = 1", result) + def test_format_without_trailing_whitespace_strip_branch(self): + code = "x = 1\n" + config = FormatterConfig(strip_trailing_whitespace=False) + + result = format_python_code(code, config) + + self.assertEqual(result, "x = 1\n") + + def test_format_blank_code_returns_empty_string(self): + self.assertEqual(format_python_code("\n"), "") + class TestFormatYamlString(unittest.TestCase): def test_format_code_block(self): @@ -230,6 +249,152 @@ def test_only_code_block_text_changes(self): self.assertIn("if True:\n x = 1", result) +class TestContainsJinjaSyntax(unittest.TestCase): + def test_detects_variable_expression(self): + self.assertTrue(_contains_jinja_syntax("question: Hello {{ user }}!")) + + def test_detects_block_tag(self): + self.assertTrue(_contains_jinja_syntax("{% if condition %}yes{% endif %}")) + + def test_detects_comment_tag(self): + self.assertTrue(_contains_jinja_syntax("{# This is a comment #}")) + + def test_detects_whitespace_control_tags(self): + self.assertTrue(_contains_jinja_syntax("{%- if x -%}\ncontent\n{%- endif -%}")) + + def test_plain_yaml_not_detected(self): + self.assertFalse( + _contains_jinja_syntax("question: Hello world\ncode: |\n x = 1\n") + ) + + def test_python_single_brace_dict_not_detected(self): + self.assertFalse(_contains_jinja_syntax("code: |\n d = {key: value}\n")) + + def test_empty_string_not_detected(self): + self.assertFalse(_contains_jinja_syntax("")) + + def test_plain_text_with_percent_not_detected(self): + self.assertFalse(_contains_jinja_syntax("discount: 50%\n")) + + +class TestHasJinjaHeader(unittest.TestCase): + def test_exact_header_detected(self): + self.assertTrue(_has_jinja_header("# use jinja\nquestion: test\n")) + + def test_header_only_line(self): + self.assertTrue(_has_jinja_header("# use jinja")) + + def test_header_with_leading_whitespace_not_detected(self): + # First line must be exactly '# use jinja'; leading spaces disqualify it + self.assertFalse(_has_jinja_header(" # use jinja\nquestion: test\n")) + + def test_missing_space_not_detected(self): + self.assertFalse(_has_jinja_header("#use jinja\nquestion: test\n")) + + def test_no_header_returns_false(self): + self.assertFalse(_has_jinja_header("---\nquestion: test\n")) + + def test_empty_content_returns_false(self): + self.assertFalse(_has_jinja_header("")) + + +class TestFormatYamlStringJinja(unittest.TestCase): + """Valid Jinja files (with '# use jinja' header) are skipped unchanged. + Files that contain Jinja syntax WITHOUT the header are an error. + """ + + # --- valid Jinja files (should be returned unchanged) --- + + def test_jinja_variable_file_returned_unchanged(self): + yaml_content = ( + "# use jinja\n---\nquestion: Hello {{ user }}\ncode: |\n x = 1\n" + ) + result, changed = format_yaml_string(yaml_content) + self.assertEqual(result, yaml_content) + self.assertFalse(changed) + + def test_jinja_block_tag_returned_unchanged(self): + # No code blocks — nothing to format, returned unchanged. + yaml_content = ( + "# use jinja\n---\n{% if condition %}\nquestion: test\n{% endif %}\n" + ) + result, changed = format_yaml_string(yaml_content) + self.assertEqual(result, yaml_content) + self.assertFalse(changed) + + def test_jinja_comment_returned_unchanged(self): + # No code blocks — nothing to format, returned unchanged. + yaml_content = "# use jinja\n{# template comment #}\nquestion: test\n" + result, changed = format_yaml_string(yaml_content) + self.assertEqual(result, yaml_content) + self.assertFalse(changed) + + def test_jinja_code_block_with_jinja_syntax_not_modified(self): + # A code block that itself contains Jinja syntax must be left alone. + yaml_content = ( + "# use jinja\n" + "---\n" + "code: |\n" + " {% for item in items %}\n" + " x = {{ item }}\n" + " {% endfor %}\n" + ) + result, changed = format_yaml_string(yaml_content) + self.assertEqual(result, yaml_content) + self.assertFalse(changed) + + def test_jinja_mixed_blocks_only_clean_ones_formatted(self): + # Second code block has no Jinja — only that block should be formatted. + yaml_content = "# use jinja\n---\ncode: |\n x={{ y }}\n---\ncode: |\n z=1\n" + result, changed = format_yaml_string(yaml_content) + self.assertTrue(changed) + # First block (Jinja) must be untouched + self.assertIn("x={{ y }}", result) + # Second block (clean) must be formatted + self.assertIn("z = 1", result) + + def test_jinja_variable_file_already_formatted_unchanged(self): + # Already-formatted code block — no change expected. + yaml_content = ( + "# use jinja\n---\nquestion: Hello {{ user }}\ncode: |\n x = 1\n" + ) + result, changed = format_yaml_string(yaml_content) + self.assertFalse(changed) + self.assertEqual(result, yaml_content) + + def test_jinja_with_unformatted_code_is_now_formatted(self): + # A valid Jinja file whose code block has no Jinja syntax IS now formatted. + yaml_content = "# use jinja\n---\nquestion: Hello {{ user }}\ncode: |\n x=1\n" + result, changed = format_yaml_string(yaml_content) + self.assertTrue(changed) + self.assertIn("x = 1", result) + # Jinja expression preserved exactly + self.assertIn("{{ user }}", result) + # Header preserved + self.assertTrue(result.startswith("# use jinja\n")) + + # --- Jinja-like syntax without '# use jinja' header: treated as plain YAML --- + + def test_jinja_variable_without_header_no_error(self): + # {{ }} in a YAML value is valid YAML; formatter should return it unchanged. + yaml_content = "---\nquestion: Hello {{ user }}\n" + result, changed = format_yaml_string(yaml_content) + self.assertFalse(changed) + self.assertEqual(result, yaml_content) + + def test_jinja_block_tag_without_header_yaml_error(self): + # {% %} on its own line is not valid YAML; the YAML parser raises. + yaml_content = "---\n{% if condition %}\nquestion: test\n{% endif %}\n" + with self.assertRaises(Exception): + format_yaml_string(yaml_content) + + def test_jinja_comment_without_header_yaml_error(self): + # {# #} is not valid YAML; the YAML parser raises. + yaml_content = "{# template comment #}\nquestion: test\n" + with self.assertRaises(Exception): + format_yaml_string(yaml_content) + + class TestFormatterConfig(unittest.TestCase): def test_default_config(self): config = FormatterConfig() @@ -245,5 +410,349 @@ def test_custom_python_keys(self): self.assertNotIn("validation code", config.python_keys) +class TestReindent(unittest.TestCase): + """Tests for _reindent.""" + + def setUp(self): + from dayamlchecker.code_formatter import _reindent + + self._fn = _reindent + + def test_zero_indent_returns_unchanged(self): + text = "x = 1\ny = 2\n" + self.assertEqual(self._fn(text, 0), text) + + def test_negative_indent_returns_unchanged(self): + text = "x = 1\n" + self.assertEqual(self._fn(text, -1), text) + + def test_positive_indent_adds_prefix_to_non_empty_lines(self): + text = "x = 1\ny = 2\n" + result = self._fn(text, 2) + self.assertIn(" x = 1", result) + self.assertIn(" y = 2", result) + + def test_blank_lines_not_indented(self): + text = "x = 1\n\ny = 2\n" + result = self._fn(text, 2) + lines = result.splitlines() + # blank line should remain blank (not gain spaces) + self.assertIn("", lines) + + +class TestFindBlockBodySpan(unittest.TestCase): + """Tests for _find_block_body_span.""" + + def setUp(self): + from dayamlchecker.code_formatter import _find_block_body_span + + self._fn = _find_block_body_span + + def test_header_at_last_line_returns_empty_span(self): + # Only one line, header is at index 0 -> no body + lines = ["code: |\n"] + start, end, indent = self._fn(lines, 0) + self.assertGreater(start, end) + + def test_basic_body_span(self): + lines = ["code: |\n", " x = 1\n", " y = 2\n"] + start, end, indent = self._fn(lines, 0) + self.assertEqual(start, 1) + self.assertEqual(end, 2) + self.assertEqual(indent, 2) + + def test_blank_lines_inside_body_included(self): + lines = ["code: |\n", " x = 1\n", "\n", " y = 2\n", "question: |\\n"] + start, end, _ = self._fn(lines, 0) + # blank line at index 2 is part of the block + self.assertGreaterEqual(end, 3) + + def test_dedented_line_ends_body(self): + lines = ["code: |\n", " x = 1\n", "other: value\n"] + start, end, _ = self._fn(lines, 0) + self.assertEqual(end, 1) + + +class TestFormatYamlFile(unittest.TestCase): + """Tests for format_yaml_file().""" + + def test_format_writes_and_returns_changes(self): + import os + import tempfile + + from dayamlchecker.code_formatter import format_yaml_file + + content = "---\ncode: |\n x=1\n" + with tempfile.NamedTemporaryFile( + suffix=".yml", mode="w", delete=False, encoding="utf-8" + ) as f: + f.write(content) + fname = f.name + try: + result, changed = format_yaml_file(fname, write=True) + self.assertTrue(changed) + self.assertIn("x = 1", result) + # File should be updated on disk + self.assertIn("x = 1", Path(fname).read_text(encoding="utf-8")) + finally: + os.unlink(fname) + + def test_format_no_write_does_not_modify_file(self): + import os + import tempfile + + from dayamlchecker.code_formatter import format_yaml_file + + content = "---\ncode: |\n x=1\n" + with tempfile.NamedTemporaryFile( + suffix=".yml", mode="w", delete=False, encoding="utf-8" + ) as f: + f.write(content) + fname = f.name + try: + result, changed = format_yaml_file(fname, write=False) + self.assertTrue(changed) + # Original file must be unchanged + self.assertEqual(Path(fname).read_text(encoding="utf-8"), content) + finally: + os.unlink(fname) + + def test_format_unchanged_file_returns_false(self): + import os + import tempfile + + from dayamlchecker.code_formatter import format_yaml_file + + content = "---\ncode: |\n x = 1\n" + with tempfile.NamedTemporaryFile( + suffix=".yml", mode="w", delete=False, encoding="utf-8" + ) as f: + f.write(content) + fname = f.name + try: + _, changed = format_yaml_file(fname) + self.assertFalse(changed) + finally: + os.unlink(fname) + + +class TestCollectYamlFiles(unittest.TestCase): + """Tests for _collect_yaml_files.""" + + def test_check_all_flag_disables_ignores(self): + import tempfile + + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + visible = root / "visible.yml" + git_dir = root / ".git" + git_dir.mkdir() + hidden = git_dir / "hidden.yml" + visible.write_text("---\n", encoding="utf-8") + hidden.write_text("---\n", encoding="utf-8") + + result = _collect_yaml_files([root], check_all=True) + paths = [p.name for p in result] + self.assertIn("hidden.yml", paths) + + def test_venv_dir_is_ignored_by_default(self): + import tempfile + + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + venv_dir = root / ".venv" + venv_dir.mkdir() + (venv_dir / "env.yml").write_text("---\n", encoding="utf-8") + (root / "real.yml").write_text("---\n", encoding="utf-8") + + result = _collect_yaml_files([root]) + names = [p.name for p in result] + self.assertIn("real.yml", names) + self.assertNotIn("env.yml", names) + + def test_single_yaml_file_path_collected(self): + import os + import tempfile + + with tempfile.NamedTemporaryFile( + suffix=".yml", delete=False, mode="w", encoding="utf-8" + ) as f: + f.write("---\n") + fname = f.name + try: + result = _collect_yaml_files([Path(fname)]) + self.assertEqual(len(result), 1) + finally: + os.unlink(fname) + + def test_non_yaml_file_not_collected(self): + import os + import tempfile + + with tempfile.NamedTemporaryFile( + suffix=".txt", delete=False, mode="w", encoding="utf-8" + ) as f: + f.write("hello\n") + fname = f.name + try: + result = _collect_yaml_files([Path(fname)]) + self.assertEqual(result, []) + finally: + os.unlink(fname) + + +class TestFormatterConfigDefaults(unittest.TestCase): + def test_prefer_literal_blocks_default_true(self): + config = FormatterConfig() + self.assertTrue(config.prefer_literal_blocks) + + def test_strip_trailing_whitespace_default_true(self): + config = FormatterConfig() + self.assertTrue(config.strip_trailing_whitespace) + + def test_black_target_versions_default_empty(self): + config = FormatterConfig() + self.assertEqual(config.black_target_versions, set()) + + +class TestFormatYamlStringEdgeCases(unittest.TestCase): + """Additional edge-case tests for format_yaml_string.""" + + def test_empty_yaml_no_change(self): + result, changed = format_yaml_string("") + self.assertFalse(changed) + + def test_none_document_no_crash(self): + # A YAML stream with only '---' produces a None document + result, changed = format_yaml_string("---\n") + self.assertFalse(changed) + + def test_no_trailing_newline_on_last_body_line_preserved(self): + # Body line that doesn't end with \n should not gain one after replacement + yaml_content = "code: |\n x=1" + result, _ = format_yaml_string(yaml_content) + self.assertIn("x = 1", result) + + def test_jinja_last_body_line_without_trailing_newline_stays_without_newline(self): + yaml_content = "# use jinja\n---\ncode: |\n x=1" + + result, changed = format_yaml_string(yaml_content) + + self.assertTrue(changed) + self.assertTrue(result.endswith("x = 1")) + self.assertFalse(result.endswith("\n")) + + def test_format_with_custom_line_length(self): + # A very short line length forces Black to break the line + config = FormatterConfig(black_line_length=20) + yaml_content = ( + "---\ncode: |\n very_long_variable_name = another_long_variable\n" + ) + result, _ = format_yaml_string(yaml_content, config) + self.assertIsInstance(result, str) + + def test_strip_common_indent_all_empty_lines(self): + """_strip_common_indent returns original when all lines are blank.""" + lines = ["\n", " \n", "\n"] + result, indent = _strip_common_indent(lines) + self.assertEqual(indent, 0) + self.assertEqual(result, lines) + + def test_code_block_empty_body(self): + """A code block header with no body (immediately followed by another key) is unchanged.""" + yaml_content = "---\ncode: |\nquestion: Hello\n" + result, changed = format_yaml_string(yaml_content) + self.assertFalse(changed) + + def test_jinja_regex_path_already_formatted(self): + """Jinja file with already-formatted code block returns unchanged.""" + yaml_content = "# use jinja\n---\ncode: |\n x = 1\n" + result, changed = format_yaml_string(yaml_content) + self.assertFalse(changed) + + def test_jinja_regex_path_code_with_jinja_skipped(self): + """Jinja file code block that contains Jinja syntax is left alone.""" + yaml_content = "# use jinja\n---\ncode: |\n x = {{ value }}\n" + result, changed = format_yaml_string(yaml_content) + self.assertFalse(changed) + self.assertIn("{{ value }}", result) + + def test_jinja_regex_path_empty_code_block(self): + """Jinja file with empty code block body is unchanged.""" + yaml_content = "# use jinja\n---\ncode: |\nquestion: Hello\n" + result, changed = format_yaml_string(yaml_content) + self.assertFalse(changed) + + def test_jinja_regex_path_formatter_exception_leaves_block_unchanged(self): + yaml_content = "# use jinja\n---\ncode: |\n x=1\n" + + with patch( + "dayamlchecker.code_formatter.format_python_code", + side_effect=ValueError("boom"), + ): + result, changed = format_yaml_string(yaml_content) + + self.assertFalse(changed) + self.assertEqual(result, yaml_content) + + def test_collect_yaml_files_include_default_ignores_none(self): + """_collect_yaml_files with include_default_ignores=None defaults to True.""" + import tempfile + + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + visible = root / "visible.yml" + git_dir = root / ".git" + git_dir.mkdir() + hidden = git_dir / "hidden.yml" + visible.write_text("---\n", encoding="utf-8") + hidden.write_text("---\n", encoding="utf-8") + + # include_default_ignores=None should default to ignoring + result = _collect_yaml_files([root], include_default_ignores=None) + names = [p.name for p in result] + self.assertIn("visible.yml", names) + self.assertNotIn("hidden.yml", names) + + +class TestCollectTextReplacements(unittest.TestCase): + def test_skip_replacement_when_location_lookup_fails(self): + from ruamel.yaml import YAML + + from dayamlchecker.code_formatter import _collect_text_replacements_for_doc + + yaml_content = "---\ncode: |\n x=1\n" + yaml = YAML() + doc = yaml.load(yaml_content) + lines = yaml_content.splitlines(keepends=True) + + with patch.object(doc.lc, "key", side_effect=RuntimeError("boom")): + replacements = _collect_text_replacements_for_doc( + doc, lines, FormatterConfig() + ) + + self.assertEqual(replacements, []) + + def test_skip_replacement_when_code_is_already_formatted(self): + from ruamel.yaml import YAML + + from dayamlchecker.code_formatter import _collect_text_replacements_for_doc + + yaml_content = "---\ncode: |\n x = 1\n" + yaml = YAML() + doc = yaml.load(yaml_content) + lines = yaml_content.splitlines(keepends=True) + + with patch( + "dayamlchecker.code_formatter.format_python_code", + return_value=str(doc["code"]), + ): + replacements = _collect_text_replacements_for_doc( + doc, lines, FormatterConfig() + ) + + self.assertEqual(replacements, []) + + if __name__ == "__main__": unittest.main() diff --git a/tests/test_code_formatter_cli.py b/tests/test_code_formatter_cli.py index e0d0b44..cfeae6a 100644 --- a/tests/test_code_formatter_cli.py +++ b/tests/test_code_formatter_cli.py @@ -1,7 +1,14 @@ +import io +import os +from contextlib import redirect_stderr, redirect_stdout from pathlib import Path from tempfile import TemporaryDirectory +from unittest.mock import patch -from dayamlchecker.code_formatter import _collect_yaml_files +from tests._cli_helpers import RunResult + +from dayamlchecker._files import _collect_yaml_files +from dayamlchecker.code_formatter import main def test_formatter_collect_yaml_files_default_ignores_common_directories(): @@ -74,3 +81,345 @@ def test_formatter_collect_yaml_files_can_disable_default_ignores(): sources_file, ] ) + + +def test_formatter_collect_yaml_files_skips_ignored_root_directory_itself(): + with TemporaryDirectory() as tmp: + git_root = Path(tmp) / ".git" + git_root.mkdir() + hidden = git_root / "hidden.yml" + hidden.write_text("question: git\n", encoding="utf-8") + + collected = _collect_yaml_files([git_root]) + + assert collected == [] + + +def test_formatter_collect_yaml_files_reads_yaml_path_from_pyproject_root(): + with TemporaryDirectory() as tmp: + root = Path(tmp) + (root / "pyproject.toml").write_text( + '[project]\nname = "demo"\nversion = "0.1.0"\n\n' + "[tool.dayaml]\n" + 'yaml_path = "interviews"\n', + encoding="utf-8", + ) + configured_yaml = root / "interviews" / "formatter.yml" + default_yaml = root / "docassemble" / "ignored.yml" + configured_yaml.parent.mkdir(parents=True) + default_yaml.parent.mkdir(parents=True) + + configured_yaml.write_text("question: configured\n", encoding="utf-8") + default_yaml.write_text("question: default\n", encoding="utf-8") + + collected = _collect_yaml_files([root]) + + assert [path.resolve() for path in collected] == [configured_yaml.resolve()] + + +def _run_formatter(*args: str) -> RunResult: + stdout_buf = io.StringIO() + stderr_buf = io.StringIO() + with patch("sys.argv", ["dayamlchecker.code_formatter", *args]): + with redirect_stdout(stdout_buf), redirect_stderr(stderr_buf): + returncode = main() + return RunResult(returncode, stdout_buf.getvalue(), stderr_buf.getvalue()) + + +def test_formatter_skips_jinja_file_with_message(): + """A Jinja file with no code blocks should still be handled without error.""" + with TemporaryDirectory() as tmp: + jinja_file = Path(tmp) / "interview.yml" + jinja_file.write_text( + "# use jinja\n---\nquestion: Hello {{ user }}\n", encoding="utf-8" + ) + + result = _run_formatter(str(jinja_file)) + + assert result.returncode == 0 + + +def test_formatter_jinja_count_in_summary(): + """A Jinja file with a clean (non-Jinja) code block is counted as reformatted.""" + with TemporaryDirectory() as tmp: + jinja_file = Path(tmp) / "interview.yml" + jinja_file.write_text( + "# use jinja\n---\nquestion: Hello {{ user }}\ncode: |\n x=1\n", + encoding="utf-8", + ) + + result = _run_formatter(str(jinja_file)) + + assert "1 reformatted" in result.stdout + assert "skipped (Jinja)" not in result.stdout + + +def test_formatter_jinja_not_in_summary_when_zero(): + """The old 'skipped (Jinja)' label should never appear in output.""" + with TemporaryDirectory() as tmp: + regular_file = Path(tmp) / "interview.yml" + regular_file.write_text("---\nquestion: Hello world\n", encoding="utf-8") + + result = _run_formatter(str(regular_file)) + + assert "skipped (Jinja)" not in result.stdout + + +def test_formatter_jinja_file_with_clean_code_is_formatted(): + """A Jinja file whose code block has no Jinja syntax should be formatted.""" + with TemporaryDirectory() as tmp: + jinja_file = Path(tmp) / "interview.yml" + original = "# use jinja\n---\nquestion: Hello {{ user }}\ncode: |\n x=1\n" + jinja_file.write_text(original, encoding="utf-8") + + result = _run_formatter(str(jinja_file)) + + assert result.returncode == 0 + assert "reformatted" in result.stdout + formatted = jinja_file.read_text(encoding="utf-8") + assert "x = 1" in formatted + assert "{{ user }}" in formatted + assert formatted.startswith("# use jinja\n") + + +def test_formatter_jinja_file_already_formatted_unchanged(): + """A Jinja file with already-formatted code should be reported unchanged.""" + with TemporaryDirectory() as tmp: + jinja_file = Path(tmp) / "interview.yml" + original = "# use jinja\n---\nquestion: Hello {{ user }}\ncode: |\n x = 1\n" + jinja_file.write_text(original, encoding="utf-8") + + result = _run_formatter(str(jinja_file)) + + assert result.returncode == 0 + assert "unchanged" in result.stdout + assert jinja_file.read_text(encoding="utf-8") == original + + +def test_formatter_jinja_file_with_jinja_in_code_block_not_modified(): + """A code block that itself contains Jinja syntax must not be touched.""" + with TemporaryDirectory() as tmp: + jinja_file = Path(tmp) / "interview.yml" + original = ( + "# use jinja\n" + "---\n" + "code: |\n" + " {% for item in items %}\n" + " x = {{ item }}\n" + " {% endfor %}\n" + ) + jinja_file.write_text(original, encoding="utf-8") + + result = _run_formatter(str(jinja_file)) + + assert result.returncode == 0 + assert jinja_file.read_text(encoding="utf-8") == original + + +def test_formatter_quiet_suppresses_output(): + with TemporaryDirectory() as tmp: + jinja_file = Path(tmp) / "interview.yml" + jinja_file.write_text( + "# use jinja\n---\nquestion: Hello {{ user }}\n", encoding="utf-8" + ) + + result = _run_formatter("--quiet", str(jinja_file)) + + assert result.returncode == 0 + # quiet suppresses all normal output + assert result.stdout.strip() == "" + + +def test_formatter_quiet_suppresses_reformatted_output_too(): + with TemporaryDirectory() as tmp: + interview = Path(tmp) / "interview.yml" + interview.write_text("---\ncode: |\n x=1\n", encoding="utf-8") + + result = _run_formatter("--quiet", str(interview)) + + assert result.returncode == 0 + assert result.stdout.strip() == "" + assert interview.read_text(encoding="utf-8") == "---\ncode: |\n x = 1\n" + + +def test_formatter_summary_shows_unchanged_for_already_formatted_jinja(): + """A Jinja file with already-formatted code blocks appears as 'unchanged'.""" + with TemporaryDirectory() as tmp: + jinja_file = Path(tmp) / "interview.yml" + jinja_file.write_text( + "# use jinja\n---\nquestion: Hello {{ user }}\n", encoding="utf-8" + ) + + result = _run_formatter(str(jinja_file)) + + assert "unchanged" in result.stdout + assert "skipped (Jinja)" not in result.stdout + + +def test_formatter_jinja_without_header_is_processed_as_plain_yaml(): + """A file with '{{ }}' and no header is handled through the normal YAML path.""" + with TemporaryDirectory() as tmp: + bad_file = Path(tmp) / "interview.yml" + original = "---\nquestion: Hello {{ user }}\n" + bad_file.write_text(original, encoding="utf-8") + + result = _run_formatter(str(bad_file)) + + assert result.returncode == 0 + assert result.stderr.strip() == "" + assert "unchanged" in result.stdout + assert bad_file.read_text(encoding="utf-8") == original + + +def test_formatter_jinja_without_header_with_code_block_is_formatted(): + """Without the header, valid YAML still gets normal code formatting.""" + with TemporaryDirectory() as tmp: + bad_file = Path(tmp) / "interview.yml" + original = "---\nquestion: Hello {{ user }}\ncode: |\n x=1\n" + bad_file.write_text(original, encoding="utf-8") + + result = _run_formatter(str(bad_file)) + + assert result.returncode == 0 + assert "reformatted" in result.stdout + assert bad_file.read_text(encoding="utf-8") == ( + "---\nquestion: Hello {{ user }}\ncode: |\n x = 1\n" + ) + + +def test_formatter_no_yaml_files_found(): + """main() returns 1 and prints error when no YAML files are found.""" + with TemporaryDirectory() as tmp: + txt = Path(tmp) / "readme.txt" + txt.write_text("not yaml\n", encoding="utf-8") + result = _run_formatter(str(txt)) + assert result.returncode == 1 + assert "no yaml files found" in result.stderr.lower() + + +def test_formatter_defaults_to_current_directory(): + with TemporaryDirectory() as tmp: + docassemble_dir = Path(tmp) / "docassemble" + docassemble_dir.mkdir() + interview = docassemble_dir / "interview.yml" + interview.write_text("---\ncode: |\n x=1\n", encoding="utf-8") + previous_cwd = Path.cwd() + + try: + os.chdir(tmp) + result = _run_formatter() + finally: + os.chdir(previous_cwd) + + assert result.returncode == 0 + assert "interview.yml" in result.stdout + assert "reformatted" in result.stdout + assert interview.read_text(encoding="utf-8") == "---\ncode: |\n x = 1\n" + + +def test_formatter_display_uses_absolute_path_for_file_outside_bases(): + with TemporaryDirectory() as base_tmp, TemporaryDirectory() as other_tmp: + base = Path(base_tmp) + outside = Path(other_tmp) / "outside.yml" + outside.write_text("---\ncode: |\n x = 1\n", encoding="utf-8") + + with patch( + "dayamlchecker.code_formatter._collect_yaml_files", return_value=[outside] + ): + result = _run_formatter(str(base)) + + assert result.returncode == 0 + assert result.stdout.startswith(".\n") + + +def test_formatter_display_prefers_path_relative_to_cwd(monkeypatch): + with TemporaryDirectory() as tmp: + root = Path(tmp) + scan_dir = root / "docassemble" + interview = scan_dir / "WorkflowDocs" / "data" / "questions" / "test.yml" + interview.parent.mkdir(parents=True, exist_ok=True) + interview.write_text("---\ncode: |\n x = 1\n", encoding="utf-8") + + previous_cwd = Path.cwd() + monkeypatch.chdir(root) + try: + result = _run_formatter(str(scan_dir)) + finally: + monkeypatch.chdir(previous_cwd) + + assert result.returncode == 0 + assert result.stdout.startswith(".\n") + + +def test_formatter_check_mode_does_not_write(): + """--check reports files that would change but doesn't modify them.""" + with TemporaryDirectory() as tmp: + f = Path(tmp) / "interview.yml" + original = "---\ncode: |\n x=1\n" + f.write_text(original, encoding="utf-8") + + result = _run_formatter("--check", str(f)) + + assert result.returncode == 1 + assert "would reformat" in result.stdout.lower() + assert f.read_text(encoding="utf-8") == original + + +def test_formatter_check_mode_unchanged_exits_zero(): + """--check returns 0 when no formatting changes are needed.""" + with TemporaryDirectory() as tmp: + f = Path(tmp) / "interview.yml" + f.write_text("---\ncode: |\n x = 1\n", encoding="utf-8") + + result = _run_formatter("--check", str(f)) + + assert result.returncode == 0 + + +def test_formatter_check_mode_unchanged_prints_dot_with_newline(): + with TemporaryDirectory() as tmp: + f = Path(tmp) / "interview.yml" + f.write_text("---\ncode: |\n x = 1\n", encoding="utf-8") + + result = _run_formatter("--check", str(f)) + + assert result.returncode == 0 + assert result.stdout == ".\n" + + +def test_formatter_no_summary_flag(): + """--no-summary suppresses the summary line.""" + with TemporaryDirectory() as tmp: + f = Path(tmp) / "interview.yml" + f.write_text("---\ncode: |\n x = 1\n", encoding="utf-8") + + result = _run_formatter("--no-summary", str(f)) + + assert result.returncode == 0 + assert "summary" not in result.stdout.lower() + + +def test_formatter_file_not_found_reports_error(): + """A non-existent file path is reported as an error in stderr.""" + with TemporaryDirectory() as tmp: + # Do not create the file; pass a path that doesn't exist to the CLI. + missing = Path(tmp) / "missing.yml" + + result = _run_formatter(str(missing)) + + # The formatter should fail and report the error on stderr. + assert result.returncode != 0 + assert "error" in result.stderr.lower() + + +def test_formatter_summary_shows_error_count(): + """Summary includes error count when a file causes an exception.""" + with TemporaryDirectory() as tmp: + # Create a binary file with .yml extension that can't be decoded + bad = Path(tmp) / "bad.yml" + bad.write_bytes(b"\x80\x81\x82\x83") + + result = _run_formatter(str(bad)) + + assert result.returncode == 1 + assert "error" in result.stderr.lower() or "error" in result.stdout.lower() diff --git a/tests/test_dayaml_cli.py b/tests/test_dayaml_cli.py new file mode 100644 index 0000000..518503c --- /dev/null +++ b/tests/test_dayaml_cli.py @@ -0,0 +1,172 @@ +import io +import os +from contextlib import redirect_stderr, redirect_stdout +from pathlib import Path +from tempfile import TemporaryDirectory + +from tests._cli_helpers import RunResult + +from dayamlchecker.cli import main +from dayamlchecker.__main__ import main as package_main +from dayamlchecker.yaml_structure import main as checker_main + + +def _run_dayaml(*args: str) -> RunResult: + stdout_buf = io.StringIO() + stderr_buf = io.StringIO() + with redirect_stdout(stdout_buf), redirect_stderr(stderr_buf): + returncode = main(list(args)) + return RunResult(returncode, stdout_buf.getvalue(), stderr_buf.getvalue()) + + +def test_dayaml_check_dispatches_to_checker_cli(): + with TemporaryDirectory() as tmp: + good = Path(tmp) / "good.yml" + good.write_text("---\nquestion: Hello\nfield: my_var\n", encoding="utf-8") + + result = _run_dayaml("check", str(good)) + + assert result.returncode == 0 + assert "1 ok" in result.stdout + + +def test_dayaml_check_defaults_to_current_directory(): + with TemporaryDirectory() as tmp: + docassemble_dir = Path(tmp) / "docassemble" + docassemble_dir.mkdir() + good = docassemble_dir / "good.yml" + good.write_text("---\nquestion: Hello\nfield: my_var\n", encoding="utf-8") + previous_cwd = Path.cwd() + + try: + os.chdir(tmp) + result = _run_dayaml("check") + finally: + os.chdir(previous_cwd) + + assert result.returncode == 0 + assert result.stdout.startswith(".\n") + assert "1 ok" in result.stdout + + +def test_dayaml_format_dispatches_to_formatter_cli(): + with TemporaryDirectory() as tmp: + interview = Path(tmp) / "interview.yml" + interview.write_text("---\ncode: |\n x=1\n", encoding="utf-8") + + result = _run_dayaml("format", str(interview)) + + assert result.returncode == 0 + assert "reformatted" in result.stdout + assert interview.read_text(encoding="utf-8") == "---\ncode: |\n x = 1\n" + + +def test_dayaml_format_defaults_to_current_directory(): + with TemporaryDirectory() as tmp: + docassemble_dir = Path(tmp) / "docassemble" + docassemble_dir.mkdir() + interview = docassemble_dir / "interview.yml" + interview.write_text("---\ncode: |\n x=1\n", encoding="utf-8") + previous_cwd = Path.cwd() + + try: + os.chdir(tmp) + result = _run_dayaml("format") + finally: + os.chdir(previous_cwd) + + assert result.returncode == 0 + assert "interview.yml" in result.stdout + assert "reformatted" in result.stdout + assert interview.read_text(encoding="utf-8") == "---\ncode: |\n x = 1\n" + + +def test_dayaml_help_lists_commands(): + result = _run_dayaml("--help") + + assert result.returncode == 0 + assert "check" in result.stdout + assert "format" in result.stdout + assert "defaults to the current working directory" in result.stdout + + +def test_dayaml_requires_known_command(): + result = _run_dayaml("unknown") + + assert result.returncode == 2 + assert "unknown command" in result.stderr.lower() + + +def test_dayaml_no_subcommand_shows_help_on_stderr(): + result = _run_dayaml() + + assert result.returncode == 2 + assert "check" in result.stderr + assert "format" in result.stderr + + +def test_dayaml_check_propagates_nonzero_for_invalid_file(): + with TemporaryDirectory() as tmp: + bad = Path(tmp) / "bad.yml" + bad.write_text("---\nnot_a_real_key: hello\n", encoding="utf-8") + + result = _run_dayaml("check", str(bad)) + + assert result.returncode != 0 + + +def test_dayaml_check_returns_nonzero_for_promoted_error_file(): + with TemporaryDirectory() as tmp: + warning_file = Path(tmp) / "warning.yml" + warning_file.write_text( + "---\n" + "question: Hello\n" + "fields:\n" + " - Preferred salutation: preferred_salutation\n" + " - Follow up: follow_up\n" + " show if:\n" + ' code: preferred_salutation == "Ms."\n', + encoding="utf-8", + ) + + result = _run_dayaml("check", str(warning_file)) + + assert result.returncode == 1 + assert "errors (1)" in result.stdout + assert "[E410]" in result.stdout + + +def test_dayaml_check_format_on_success_reformats_file(): + with TemporaryDirectory() as tmp: + interview = Path(tmp) / "interview.yml" + interview.write_text("mandatory: True\ncode: |\n x=1\n", encoding="utf-8") + + result = _run_dayaml( + "check", "--format-on-success", "--no-url-check", str(interview) + ) + + assert result.returncode == 0 + assert "reformatted:" in result.stdout + assert "interview.yml" in result.stdout + assert ( + interview.read_text(encoding="utf-8") + == "mandatory: True\ncode: |\n x = 1\n" + ) + + +def test_dayaml_format_check_flag_does_not_write(): + with TemporaryDirectory() as tmp: + interview = Path(tmp) / "interview.yml" + original = "---\ncode: |\n x=1\n" + interview.write_text(original, encoding="utf-8") + + result = _run_dayaml("format", "--check", str(interview)) + + assert result.returncode != 0 + assert "Would reformat" in result.stdout + assert interview.read_text(encoding="utf-8") == original + + +def test_package_main_aliases_yaml_structure_main(): + """The package entrypoint should directly expose the checker CLI main.""" + assert package_main is checker_main diff --git a/tests/test_files.py b/tests/test_files.py new file mode 100644 index 0000000..d1383d6 --- /dev/null +++ b/tests/test_files.py @@ -0,0 +1,168 @@ +from pathlib import Path +from unittest.mock import patch + +from dayamlchecker._files import ( + DayamlProjectConfig, + _collect_dayaml_cli_args, + _collect_dayaml_ignore_codes, + _load_dayaml_project_config, + _normalize_cli_args, + _normalize_ignore_codes, +) + + +def test_normalize_ignore_codes_handles_strings_and_invalid_types() -> None: + assert _normalize_ignore_codes(" e301, e410 , ,c101 ") == frozenset( + {"E301", "E410", "C101"} + ) + assert _normalize_ignore_codes(["e101, e102", 123, " c103 "]) == frozenset( + {"E101", "E102", "C103"} + ) + assert _normalize_ignore_codes(123) == frozenset() + + +def test_normalize_cli_args_handles_strings_and_invalid_types() -> None: + assert _normalize_cli_args('--flag "two words" --count=2') == ( + "--flag", + "two words", + "--count=2", + ) + assert _normalize_cli_args({"not": "supported"}) == () + + +def test_load_dayaml_project_config_ignores_non_mapping_dayaml_section( + tmp_path: Path, +) -> None: + (tmp_path / "pyproject.toml").write_text( + '[project]\nname = "demo"\nversion = "0.1.0"\n\n' + "[tool]\n" + 'dayaml = "not-a-table"\n', + encoding="utf-8", + ) + + config = _load_dayaml_project_config(tmp_path) + + assert config is not None + assert config.project_root == tmp_path + assert config.yaml_path == tmp_path / "docassemble" + assert config.ignore_codes == frozenset() + assert config.cli_args == () + + +def test_load_dayaml_project_config_handles_blank_and_absolute_yaml_paths( + tmp_path: Path, +) -> None: + blank_root = tmp_path / "blank" + blank_root.mkdir() + (blank_root / "pyproject.toml").write_text( + '[project]\nname = "demo"\nversion = "0.1.0"\n\n' + "[tool.dayaml]\n" + 'yaml_path = " "\n', + encoding="utf-8", + ) + + blank_config = _load_dayaml_project_config(blank_root) + + assert blank_config is not None + assert blank_config.yaml_path == blank_root / "docassemble" + + absolute_root = tmp_path / "absolute" + absolute_root.mkdir() + absolute_yaml_path = tmp_path / "shared-docassemble" + (absolute_root / "pyproject.toml").write_text( + '[project]\nname = "demo"\nversion = "0.1.0"\n\n' + "[tool.dayaml]\n" + f'yaml_path = "{absolute_yaml_path}"\n', + encoding="utf-8", + ) + + absolute_config = _load_dayaml_project_config(absolute_root) + + assert absolute_config is not None + assert absolute_config.yaml_path == absolute_yaml_path + + +def test_collect_dayaml_helpers_skip_duplicate_projects(tmp_path: Path) -> None: + project_root = tmp_path / "project" + nested = project_root / "docassemble" / "pkg" / "data" / "questions" + nested.mkdir(parents=True) + first = nested / "first.yml" + second = nested / "second.yml" + other_root = tmp_path / "other-project" + other_nested = other_root / "docassemble" / "pkg" / "data" / "questions" + other_nested.mkdir(parents=True) + third = other_nested / "third.yml" + first.write_text("question: first\n", encoding="utf-8") + second.write_text("question: second\n", encoding="utf-8") + third.write_text("question: third\n", encoding="utf-8") + (project_root / "pyproject.toml").write_text( + '[project]\nname = "demo"\nversion = "0.1.0"\n\n' + "[tool.dayaml]\n" + 'ignore_codes = ["e301", "e410"]\n' + 'args = "--no-url-check --template-url-severity error"\n' + 'check_args = ["--wcag", 123, "--url-check"]\n', + encoding="utf-8", + ) + (other_root / "pyproject.toml").write_text( + '[project]\nname = "other"\nversion = "0.1.0"\n\n' + "[tool.dayaml]\n" + 'ignore_codes = "e999"\n' + 'args = ["--strict"]\n', + encoding="utf-8", + ) + + paths = [first, second, third] + + assert _collect_dayaml_ignore_codes(paths) == frozenset({"E301", "E410", "E999"}) + assert _collect_dayaml_cli_args(paths) == ( + "--no-url-check", + "--template-url-severity", + "error", + "--wcag", + "--url-check", + "--strict", + ) + + +def test_collect_dayaml_helpers_continue_after_missing_project_config( + tmp_path: Path, +) -> None: + first_path = tmp_path / "first.yml" + second_path = tmp_path / "second.yml" + first_path.write_text("question: first\n", encoding="utf-8") + second_path.write_text("question: second\n", encoding="utf-8") + + missing_project = tmp_path / "missing-project" + loaded_project = tmp_path / "loaded-project" + config = DayamlProjectConfig( + project_root=loaded_project, + yaml_path=loaded_project / "docassemble", + ignore_codes=frozenset({"E777"}), + cli_args=("--from-config",), + ) + + with ( + patch( + "dayamlchecker._files._find_nearest_pyproject_dir", + side_effect=[missing_project, loaded_project], + ), + patch( + "dayamlchecker._files._load_dayaml_project_config", + side_effect=[None, config], + ), + ): + assert _collect_dayaml_ignore_codes([first_path, second_path]) == frozenset( + {"E777"} + ) + + with ( + patch( + "dayamlchecker._files._find_nearest_pyproject_dir", + side_effect=[missing_project, loaded_project], + ), + patch( + "dayamlchecker._files._load_dayaml_project_config", + side_effect=[None, config], + ), + ): + assert _collect_dayaml_cli_args([first_path, second_path]) == ("--from-config",) diff --git a/tests/test_yaml_structure.py b/tests/test_yaml_structure.py index 3598c4b..216d5ea 100644 --- a/tests/test_yaml_structure.py +++ b/tests/test_yaml_structure.py @@ -1,7 +1,94 @@ import unittest from pathlib import Path from tempfile import TemporaryDirectory -from dayamlchecker.yaml_structure import RuntimeOptions, find_errors_from_string +from unittest.mock import patch + +import jinja2 + +from dayamlchecker._jinja import JinjaError, _SilentUndefined, preprocess_jinja +from dayamlchecker.messages import ( + MESSAGE_DEFINITIONS, + MessageCode, + format_message, + is_experimental_code, +) +from dayamlchecker.yaml_structure import ( + RuntimeOptions, + YAMLError, + _lc_key_line, + _message_severity, + _variable_candidates, + find_errors_from_string, +) + +MESSAGE_CODE_PATTERN = r"^[EWC]\d{3}$" +LARGE_INVALID_INTERVIEW_FIXTURE = ( + Path(__file__).parent / "fixtures" / "large_invalid_interview.yml" +) +LARGE_INVALID_JINJA_SYNTAX_FIXTURE = ( + Path(__file__).parent / "fixtures" / "large_invalid_jinja_syntax.yml" +) +LARGE_INVALID_JINJA_TEMPLATE_FIXTURE = ( + Path(__file__).parent / "fixtures" / "large_invalid_jinja_template.yml" +) +LARGE_VALID_INTERVIEW_FIXTURE = ( + Path(__file__).parent / "fixtures" / "large_valid_interview.yml" +) +ALL_ERROR_CODE_FIXTURES = ( + LARGE_INVALID_INTERVIEW_FIXTURE, + LARGE_INVALID_JINJA_SYNTAX_FIXTURE, + LARGE_INVALID_JINJA_TEMPLATE_FIXTURE, +) +ACCESSIBILITY_ERROR_CODES = { + MessageCode.ACCESSIBILITY_COMBOBOX_NOT_ACCESSIBLE, + MessageCode.ACCESSIBILITY_NO_LABEL_MULTI_FIELD, + MessageCode.ACCESSIBILITY_TAGGED_PDF_NOT_ENABLED, + MessageCode.ACCESSIBILITY_THEME_CONTRAST_TOO_LOW, + MessageCode.ACCESSIBILITY_IMAGE_MISSING_ALT_TEXT, + MessageCode.ACCESSIBILITY_MARKDOWN_HEADING_LEVEL_SKIP, + MessageCode.ACCESSIBILITY_HTML_HEADING_LEVEL_SKIP, + MessageCode.ACCESSIBILITY_EMPTY_LINK_TEXT, + MessageCode.ACCESSIBILITY_NON_DESCRIPTIVE_LINK_TEXT, +} + + +def test_message_severity_handles_none_convention_and_unknown_codes(): + assert _message_severity(None) == "error" + assert _message_severity("W101") == "warning" + assert _message_severity("C101") == "convention" + assert _message_severity("Z999") == "error" + + +def test_yaml_error_severity_uses_warning_prefix_without_code(): + err = YAMLError(err_str="Warning: heads up", line_number=1, file_name="test.yml") + + assert err.severity == "warning" + + +def test_yaml_error_severity_uses_info_prefix_without_code(): + err = YAMLError(err_str="Info: note", line_number=1, file_name="test.yml") + + assert err.severity == "convention" + + +def test_yaml_error_severity_defaults_to_error_without_code_or_prefix(): + err = YAMLError(err_str="bad key", line_number=1, file_name="test.yml") + + assert err.severity == "error" + + +def test_lc_key_line_uses_key_specific_metadata_when_available(): + class Location: + line = 2 + + def key(self, key): + assert key == "question" + return (6, 0) + + class Node: + lc = Location() + + assert _lc_key_line(Node(), "question") == 7 class TestYAMLStructure(unittest.TestCase): @@ -23,7 +110,11 @@ def test_question_and_template_exclusive_error(self): """ errs = find_errors_from_string(invalid, input_file="") self.assertTrue( - any("Too many types this block could be" in e.err_str for e in errs), + any( + e.code == MessageCode.TOO_MANY_TYPES + and "Too many types this block could be" in e.err_str + for e in errs + ), f"Expected exclusivity error, got: {errs}", ) @@ -84,8 +175,11 @@ def test_duplicate_key_error(self): ) self.assertTrue( any( - "duplicate key" in e.err_str.lower() - or "found duplicate key" in e.err_str.lower() + e.code == MessageCode.YAML_DUPLICATE_KEY + and ( + "duplicate key" in e.err_str.lower() + or "found duplicate key" in e.err_str.lower() + ) for e in errs ), f"Expected duplicate key error, got: {errs}", @@ -122,6 +216,14 @@ def test_accessibility_markdown_image_missing_alt_text(self): ), f"Expected markdown alt text accessibility error, got: {errs}", ) + self.assertTrue( + any( + e.code == MessageCode.ACCESSIBILITY_IMAGE_MISSING_ALT_TEXT + and e.severity == "error" + for e in errs + ), + f"Expected coded accessibility error, got: {errs}", + ) def test_accessibility_file_tag_missing_alt_text(self): yaml_content = """question: | @@ -358,6 +460,21 @@ def test_accessibility_mode_still_reports_yaml_parse_errors(self): f"Expected YAML parse error in accessibility mode, got: {errs}", ) + def test_accessibility_mode_ignores_non_mapping_yaml_documents(self): + yaml_content = """--- +- item one +- item two +--- +question: | + Valid mapping +""" + errs = find_errors_from_string( + yaml_content, + input_file="", + lint_mode="accessibility", + ) + self.assertEqual(errs, []) + def test_accessibility_combobox_field_disabled_by_default(self): yaml_content = """question: | Pick one @@ -536,6 +653,75 @@ def test_accessibility_no_label_fails_multi_field_screen(self): ), f"Expected multi-field no-label accessibility error, got: {errs}", ) + self.assertTrue( + any( + e.code == MessageCode.ACCESSIBILITY_NO_LABEL_MULTI_FIELD + and e.severity == "error" + for e in errs + ), + f"Expected coded no-label accessibility error, got: {errs}", + ) + + def test_accessibility_no_label_uses_field_variable_in_message(self): + yaml_content = """question: | + Enter information +fields: + - First name: user_first + - no label: court_county +""" + errs = find_errors_from_string( + yaml_content, + input_file="", + lint_mode="accessibility", + ) + + self.assertTrue( + any( + e.code == MessageCode.ACCESSIBILITY_NO_LABEL_MULTI_FIELD + and "court_county" in e.err_str + for e in errs + ), + f"Expected no-label accessibility error to name field variable, got: {errs}", + ) + + def test_accessibility_code_only_field_allowed_multi_field_screen(self): + yaml_content = """question: | + Enter information +fields: + - code: | + dynamic_fields + - First name: user_first +""" + errs = find_errors_from_string( + yaml_content, + input_file="", + lint_mode="accessibility", + ) + self.assertFalse( + any("single-field screens" in e.err_str.lower() for e in errs), + f"Did not expect code-only field accessibility error, got: {errs}", + ) + + def test_accessibility_field_with_code_and_no_label_still_fails_multi_field_screen( + self, + ): + yaml_content = """question: | + Enter information +fields: + - First name: user_first + - code: | + dynamic_fields + datatype: checkboxes +""" + errs = find_errors_from_string( + yaml_content, + input_file="", + lint_mode="accessibility", + ) + self.assertTrue( + any("single-field screens" in e.err_str.lower() for e in errs), + f"Expected non-code-only missing-label accessibility error, got: {errs}", + ) def test_accessibility_empty_label_fails_multi_field_screen(self): yaml_content = """question: | @@ -573,7 +759,7 @@ def test_accessibility_no_label_false_does_not_count_as_label(self): f"Expected missing-label accessibility error, got: {errs}", ) - def test_accessibility_tagged_pdf_info_for_docx_without_setting(self): + def test_accessibility_tagged_pdf_error_for_docx_without_setting(self): yaml_content = """attachments: - name: Letter docx template file: letter_template.docx @@ -585,11 +771,12 @@ def test_accessibility_tagged_pdf_info_for_docx_without_setting(self): ) self.assertTrue( any( - e.err_str.lower().startswith("info:") + e.code == MessageCode.ACCESSIBILITY_TAGGED_PDF_NOT_ENABLED + and e.severity == "error" and "docx attachment detected" in e.err_str.lower() for e in errs ), - f"Expected tagged-pdf info note, got: {errs}", + f"Expected tagged-pdf accessibility error, got: {errs}", ) def test_accessibility_tagged_pdf_true_in_features_suppresses_info(self): @@ -749,7 +936,11 @@ def test_js_show_if_val_references_unknown_field(self): """ errs = find_errors_from_string(invalid, input_file="") self.assertTrue( - any("not defined on this screen" in e.err_str.lower() for e in errs), + any( + e.code == MessageCode.JS_UNKNOWN_SCREEN_FIELD + and "not defined on this screen" in e.err_str.lower() + for e in errs + ), f"Expected unknown field error, got: {errs}", ) @@ -772,7 +963,7 @@ def test_js_show_if_unknown_field_with_dynamic_fields_code_warns(self): errs = find_errors_from_string(warning_yaml, input_file="") self.assertTrue( any( - e.err_str.lower().startswith("warning:") + e.code == MessageCode.JS_UNKNOWN_SCREEN_FIELD and "unable to fully validate screen variables" in e.err_str.lower() for e in errs ), @@ -1009,7 +1200,11 @@ def test_show_if_variable_not_on_screen(self): """ errs = find_errors_from_string(invalid, input_file="") self.assertTrue( - any("not defined on this screen" in e.err_str.lower() for e in errs), + any( + e.code == MessageCode.FIELD_MODIFIER_UNKNOWN_VARIABLE_STRING + and "not defined on this screen" in e.err_str.lower() + for e in errs + ), f"Expected 'not defined on screen' error, got: {errs}", ) @@ -1068,7 +1263,11 @@ def test_hide_if_variable_not_on_screen(self): """ errs = find_errors_from_string(invalid, input_file="") self.assertTrue( - any("not defined on this screen" in e.err_str.lower() for e in errs), + any( + e.code == MessageCode.FIELD_MODIFIER_UNKNOWN_VARIABLE_STRING + and "not defined on this screen" in e.err_str.lower() + for e in errs + ), f"Expected 'not defined on screen' error, got: {errs}", ) @@ -1105,7 +1304,8 @@ def test_enable_if_variable_not_on_screen(self): errs = find_errors_from_string(invalid, input_file="") self.assertTrue( any( - "enable if" in e.err_str.lower() + e.code == MessageCode.FIELD_MODIFIER_UNKNOWN_VARIABLE_STRING + and "enable if" in e.err_str.lower() and "not defined on this screen" in e.err_str.lower() for e in errs ), @@ -1124,7 +1324,8 @@ def test_disable_if_variable_not_on_screen(self): errs = find_errors_from_string(invalid, input_file="") self.assertTrue( any( - "disable if" in e.err_str.lower() + e.code == MessageCode.FIELD_MODIFIER_UNKNOWN_VARIABLE_STRING + and "disable if" in e.err_str.lower() and "not defined on this screen" in e.err_str.lower() for e in errs ), @@ -1393,7 +1594,11 @@ def test_validation_code_missing_validation_error_warns(self): """ errs = find_errors_from_string(invalid, input_file="") self.assertTrue( - any("does not call validation_error" in e.err_str.lower() for e in errs), + any( + e.code == MessageCode.VALIDATION_CODE_MISSING_VALIDATION_ERROR + and "does not call validation_error" in e.err_str.lower() + for e in errs + ), f"Expected missing validation_error warning, got: {errs}", ) @@ -1457,18 +1662,39 @@ def test_fields_code_dict_valid(self): continue button field: interrogatory_questions """ errs = find_errors_from_string(valid, input_file="") - field_errors = [ - e - for e in errs - if "fields should be a list" in e.err_str.lower() - or "fields dict must have" in e.err_str.lower() - ] + field_errors = [e for e in errs if e.code == MessageCode.FIELDS_DICT_KEYS] self.assertEqual( len(field_errors), 0, f"Expected no fields-shape errors, got: {field_errors}", ) + def test_fields_single_field_dict_shorthand_errors_with_list_guidance(self): + """Error: a single field under fields must still be written as a YAML list item.""" + invalid = """ +question: | + What venue? +fields: + label: no label + field: M.venue.type + input type: radio + choices: + - label: ":building: Administrative" + value: admin + - label: ":landmark: County Circuit Court" + value: circuit +""" + errs = find_errors_from_string(invalid, input_file="") + self.assertTrue( + any( + e.code == MessageCode.FIELDS_DICT_KEYS + and "fields must be a yaml list of field items" in e.err_str.lower() + and "single field written as a dict" in e.err_str.lower() + for e in errs + ), + f"Expected fields bare-dict guidance error, got: {errs}", + ) + def test_fields_mako_templates_valid(self): """Error: fields with mako in them are checked.""" valid = """ @@ -1482,110 +1708,1656 @@ def test_fields_mako_templates_valid(self): continue button field: interrogatory_questions """ errs = find_errors_from_string(valid, input_file="") - field_errors = [ - e - for e in errs - if "default value has (indentationerror)" in e.err_str.lower() - ] + field_errors = [e for e in errs if "default value has" in e.err_str.lower()] self.assertEqual( len(field_errors), 1, - f"Expected one mako field errors, got: {field_errors}", + f"Expected one mako field error, got: {field_errors}", ) - def test_interview_order_reference_without_matching_guard_errors(self): - """Error when interview-order style code references a conditionally shown field without guard""" + def test_object_checkboxes_choices_code_dict_errors(self): + """Error: object-style fields must use a direct choices expression, not choices: {code: ...}.""" invalid = """ question: | - Eviction details + Choose enclosures fields: - - Reason: eviction_reason + - no label: s3_enclosure_object.selected_enclosures + datatype: object_checkboxes choices: - - Nonpayment - - Other - - Other details: other_details - show if: - variable: eviction_reason - is: Other ---- -id: interview_order -mandatory: True -code: | - other_details + code: | + s3_enclosure_object.choices() """ errs = find_errors_from_string(invalid, input_file="") self.assertTrue( - any( - 'references "other_details" without a matching guard' - in e.err_str.lower() - for e in errs - ), - f"Expected interview-order guard error, got: {errs}", + any(e.code == MessageCode.OBJECT_FIELD_CHOICES_CODE_DICT for e in errs), + f"Expected object choices code-dict error, got: {errs}", ) - def test_interview_order_reference_with_matching_guard_valid(self): - """No error when interview-order style code guards conditional field usage""" + def test_object_checkboxes_choices_direct_expression_valid(self): valid = """ question: | - Eviction details + Choose enclosures fields: - - Reason: eviction_reason - choices: - - Nonpayment - - Other - - Other details: other_details - show if: - variable: eviction_reason - is: Other ---- -id: interview_order -mandatory: True -code: | - if eviction_reason == "Other": - other_details + - no label: s3_enclosure_object.selected_enclosures + datatype: object_checkboxes + choices: s3_enclosure_object.choices() """ errs = find_errors_from_string(valid, input_file="") self.assertFalse( - any("without a matching guard" in e.err_str.lower() for e in errs), - f"Expected no interview-order guard error, got: {errs}", + any(e.code == MessageCode.OBJECT_FIELD_CHOICES_CODE_DICT for e in errs), + f"Did not expect object choices code-dict error, got: {errs}", ) - def test_interview_order_reference_with_showifdef_guard_valid(self): - """No error when interview-order code uses showifdef('') as guard""" - valid = """ + # -- _variable_candidates coverage -- + + def test_variable_candidates_empty_string(self): + """_variable_candidates with an empty/whitespace string returns empty set.""" + result = _variable_candidates(" ") + self.assertEqual(result, set()) + + def test_variable_candidates_indexed_path(self): + """_variable_candidates strips trailing brackets iteratively.""" + result = _variable_candidates('children[i].parents["Other"]') + self.assertIn("children[i].parents", result) + self.assertIn("children", result) + + # -- PythonText non-string input (lines 157–158) -- + + def test_python_code_block_non_string_error(self): + """Error: code block value must be a YAML string.""" + invalid = """ +code: + - some_list_item +""" + errs = find_errors_from_string(invalid, input_file="") + self.assertTrue( + any("code block must be a yaml string" in e.err_str.lower() for e in errs), + f"Expected code-block type error, got: {errs}", + ) + + # -- ValidationCode with raise/assert suppresses warning (lines 216–217) -- + + def test_validation_code_with_raise_still_warns(self): + """Validation code that only raises (no validation_error) should still warn.""" + yaml_str = """ question: | - Eviction details + Test fields: - - Reason: eviction_reason - choices: - - Nonpayment - - Other - - Other details: other_details + - Apples: apples + datatype: integer +validation code: | + if apples < 0: + raise Exception("negative") +""" + errs = find_errors_from_string(yaml_str, input_file="") + self.assertTrue( + any("does not call validation_error" in e.err_str.lower() for e in errs), + f"Expected missing validation_error warning for raise-only code, got: {errs}", + ) + + # -- ShowIf malformed string (lines 366–369) -- + + def test_show_if_malformed_string_variable_colon(self): + """Error: 'show if: variable:foo' as a plain string is malformed.""" + from dayamlchecker.yaml_structure import ShowIf + + validator = ShowIf("variable:a") + self.assertTrue( + any( + "appears to be malformed" in str(e[0]).lower() for e in validator.errors + ), + f"Expected malformed show if error, got: {validator.errors}", + ) + + def test_show_if_malformed_string_code_colon(self): + """Error: 'show if: code:True' as a plain string is malformed.""" + from dayamlchecker.yaml_structure import ShowIf + + validator = ShowIf("code:True") + self.assertTrue( + any( + "appears to be malformed" in str(e[0]).lower() for e in validator.errors + ), + f"Expected malformed show if error, got: {validator.errors}", + ) + + # -- ShowIf dict missing variable/code keys (line ~399) -- + + def test_show_if_dict_missing_variable_and_code(self): + """Error: show if dict with neither 'variable' nor 'code' key.""" + invalid = """ +question: Test +fields: + - First: a + - Second: b show if: - variable: eviction_reason - is: Other + unknown_key: something +""" + errs = find_errors_from_string(invalid, input_file="") + self.assertTrue( + any("must have either" in e.err_str.lower() for e in errs), + f"Expected show if dict key error, got: {errs}", + ) + + # -- ShowIf code: non-string value -- + + def test_show_if_code_non_string_error(self): + """Error: show if: code must be a YAML string.""" + invalid = """ +question: Test +fields: + - First: a + - Second: b + show if: + code: + - a + - b +""" + errs = find_errors_from_string(invalid, input_file="") + self.assertTrue( + any( + "code block must be a yaml string" in e.err_str.lower() + or "code must be a yaml string" in e.err_str.lower() + for e in errs + ), + f"Expected show if code type error, got: {[e.err_str for e in errs]}", + ) + + # -- DAPythonVar non-string / whitespace (lines 399–400) -- + + def test_field_var_with_whitespace_error(self): + """Error: a field variable name with spaces should be flagged.""" + invalid = """ +question: Test +field: some var name +""" + errs = find_errors_from_string(invalid, input_file="") + self.assertTrue( + any("whitespace" in e.err_str.lower() for e in errs), + f"Expected whitespace error for python var, got: {errs}", + ) + + # -- Nesting depth warning (line ~1580) -- + + def test_deeply_nested_show_if_warns(self): + """Warning when show if nesting depth exceeds 2.""" + deep = """ +question: Test +fields: + - A: a + datatype: yesnoradio + - B: b + datatype: yesnoradio + show if: a + - C: c + datatype: yesnoradio + show if: b + - D: d + show if: c +""" + errs = find_errors_from_string(deep, input_file="") + self.assertTrue( + any( + "nested" in e.err_str.lower() and "levels" in e.err_str.lower() + for e in errs + ), + f"Expected nesting depth warning, got: {errs}", + ) + + # -- Interview-order unmatched guard reference -- + + def test_interview_order_unmatched_guard_warning(self): + """Warning when interview-order code references conditional field without guard.""" + yaml_str = """ +question: Test +fields: + - First: a + datatype: yesnoradio + - Second: b + show if: a --- -id: interview_order mandatory: True code: | - if showifdef("other_details") and other_details: - pass + a + b """ - errs = find_errors_from_string(valid, input_file="") - self.assertFalse( + errs = find_errors_from_string(yaml_str, input_file="") + self.assertTrue( any("without a matching guard" in e.err_str.lower() for e in errs), - f"Expected no interview-order guard error with showifdef guard, got: {errs}", + f"Expected interview-order guard warning, got: {errs}", ) - def test_interview_order_all_conditional_modifiers_without_guard_error(self): - """Each conditional modifier should trigger interview-order guard mismatch when unguarded""" - cases = [ - ("show if", "eviction_reason == 'Other'"), - ("hide if", "eviction_reason == 'Other'"), - ("enable if", "eviction_reason == 'Other'"), - ("disable if", "eviction_reason == 'Other'"), - ("js show if", 'val("eviction_reason") === "Other"'), - ("js hide if", 'val("eviction_reason") === "Other"'), + # -- Unknown block keys -- + + def test_unknown_key_error(self): + """Error: keys that shouldn't exist are flagged.""" + invalid = """ +not_a_real_key: hello +another_bad_key: world +""" + errs = find_errors_from_string(invalid, input_file="") + self.assertTrue( + any("keys that shouldn't exist" in e.err_str.lower() for e in errs), + f"Expected unknown key error, got: {errs}", + ) + + # -- Non-string YAML key -- + + def test_non_string_key_error(self): + """Error: boolean/numeric keys are flagged as unexpected.""" + invalid = """ +True: hello +question: test +""" + errs = find_errors_from_string(invalid, input_file="") + self.assertTrue( + any("keys that shouldn't exist" in e.err_str.lower() for e in errs), + f"Expected non-string key error, got: {errs}", + ) + + # -- Enable/Disable if with code (touching lines ~537, ~588–589) -- + + def test_js_disable_if_valid(self): + """Valid: js disable if with proper val() call.""" + valid = """ +question: Test +fields: + - Watcher: watches + datatype: yesnoradio + - Show: show + js disable if: | + val("watches") === false +""" + errs = find_errors_from_string(valid, input_file="") + js_errors = [e for e in errs if "js disable if" in e.err_str.lower()] + self.assertEqual( + len(js_errors), 0, f"Expected no js disable if errors, got: {js_errors}" + ) + + def test_js_enable_if_references_unknown_field(self): + """Error: js enable if val() references a field not on this screen.""" + invalid = """ +question: Test +fields: + - Show: show + js enable if: | + val("nonexistent") === true +""" + errs = find_errors_from_string(invalid, input_file="") + self.assertTrue( + any("not defined on this screen" in e.err_str.lower() for e in errs), + f"Expected unknown field error, got: {errs}", + ) + + # -- YAML syntax error (MarkedYAMLError) -- + + def test_yaml_syntax_error_in_second_document(self): + """A MarkedYAMLError in one document is reported without crashing.""" + invalid = "---\nquestion: Hello\nfield: x\n---\nbad: [unclosed\n" + errs = find_errors_from_string(invalid, input_file="") + self.assertTrue( + any( + "parsing" in e.err_str.lower() or "flow" in e.err_str.lower() + for e in errs + ), + f"Expected YAML parse error, got: {errs}", + ) + + # -- Too many exclusive types -- + + def test_too_many_exclusive_types_error(self): + """Error when a block has multiple exclusive top-level keys.""" + invalid = """ +question: Hello +template: something +""" + errs = find_errors_from_string(invalid, input_file="") + self.assertTrue( + any("too many types" in e.err_str.lower() for e in errs), + f"Expected too-many-types error, got: {errs}", + ) + + # -- MakoText CompileException (lines 106–107) -- + + def test_mako_compile_error_in_subquestion(self): + """A Mako compile error in a subquestion is reported.""" + invalid = """ +question: Hello +subquestion: | + ${invalid mako +field: x +""" + errs = find_errors_from_string(invalid, input_file="") + self.assertTrue(len(errs) > 0, f"Expected mako error, got: {errs}") + + +class TestJinjaHandling(unittest.TestCase): + """Valid Jinja files (with '# use jinja' header) are pre-processed through + Jinja2 and the rendered output is passed to the structure checker. + Files with Jinja syntax but NO header are flagged as errors. + """ + + def test_valid_jinja_file_no_structure_errors(self): + """A valid Jinja file renders to checkable YAML and produces no errors.""" + content = "# use jinja\n---\nquestion: Hello {{ user }}\n" + errs = find_errors_from_string(content, input_file="") + self.assertEqual( + errs, [], f"Expected no errors for valid Jinja file, got: {errs}" + ) + + def test_valid_jinja_with_block_tags_no_errors(self): + """Jinja if-block with undefined (falsy) condition renders to empty YAML — no errors.""" + content = "# use jinja\n{% if condition %}\nquestion: test\n{% endif %}\n" + errs = find_errors_from_string(content, input_file="") + self.assertEqual(errs, []) + + def test_jinja_variable_without_header_no_jinja_error(self): + # {{ }} inside a YAML value is valid YAML; no Jinja-specific error. + content = "---\nquestion: Hello {{ user }}\n" + errs = find_errors_from_string(content, input_file="") + jinja_errs = [e for e in errs if "# use jinja" in e.err_str] + self.assertEqual(jinja_errs, []) + + def test_jinja_block_tag_without_header_yaml_parse_error(self): + # {% %} on its own line is not valid YAML; a regular parse error is expected. + content = "---\n{% if condition %}\nquestion: test\n{% endif %}\n" + errs = find_errors_from_string(content, input_file="") + self.assertGreater(len(errs), 0) + # Should be a YAML parse error, not a Jinja header error. + jinja_header_errs = [e for e in errs if "# use jinja" in e.err_str] + self.assertEqual(jinja_header_errs, []) + + def test_plain_yaml_no_jinja_errors(self): + """Regular YAML without Jinja should not trigger any Jinja-related error.""" + content = "---\nquestion: Hello world\n" + errs = find_errors_from_string(content, input_file="") + jinja_errs = [e for e in errs if "jinja" in e.err_str.lower()] + self.assertEqual(jinja_errs, []) + + def test_leading_whitespace_before_header_not_valid(self): + """Leading space means '# use jinja' is not recognised; file is parsed as + normal YAML and {{ }} in a value is valid — no errors expected.""" + content = " # use jinja\n---\nquestion: Hello {{ user }}\n" + errs = find_errors_from_string(content, input_file="") + jinja_header_errs = [e for e in errs if "# use jinja" in e.err_str] + self.assertEqual(jinja_header_errs, []) + + def test_jinja_syntax_error_returns_error(self): + """A Jinja2 template syntax error (e.g. unclosed block) surfaces as a YAMLError.""" + # '{% if %}' with no condition is a Jinja2 TemplateSyntaxError. + content = "# use jinja\n---\nquestion: {% if %}\n" + errs = find_errors_from_string(content, input_file="") + self.assertGreater(len(errs), 0) + self.assertTrue( + any("jinja2" in e.err_str.lower() for e in errs), + f"Expected a Jinja2 syntax error, got: {errs}", + ) + + def test_jinja_invalid_structure_after_render_returns_error(self): + """After rendering, structure errors in the resulting YAML are still caught.""" + # An unknown top-level key should produce a structure error. + content = "# use jinja\n---\nnot_a_real_key: {{ value }}\n" + errs = find_errors_from_string(content, input_file="") + self.assertGreater(len(errs), 0) + + def test_jinja_error_line_numbers_account_for_header(self): + """Line numbers in errors should reflect the original file, including + the '# use jinja' header on line 1.""" + # The structure error is on line 3 of the original file, but the + # block-level error ("No possible types found") is attributed to the + # document separator on line 2 — consistent with how non-jinja files + # report block errors at the '---' line. + content = "# use jinja\n---\nnot_a_real_key: hello\n" + errs = find_errors_from_string(content, input_file="") + self.assertGreater(len(errs), 0) + # Without the offset fix the error would report line 1 (the stripped + # header). With the fix it should be >= 2 (the '---' separator). + for err in errs: + self.assertGreaterEqual( + err.line_number, + 2, + f"Expected line >= 2 for error after header, got {err.line_number}", + ) + + +class TestPreprocessJinja(unittest.TestCase): + """Unit tests for the preprocess_jinja() function itself.""" + + def test_simple_variable_renders_empty(self): + """Undefined variables render as empty string.""" + rendered, errors = preprocess_jinja( + "# use jinja\n---\nquestion: Hello {{ user }}\n" + ) + self.assertEqual(errors, []) + self.assertIn("question: Hello ", rendered) + self.assertNotIn("{{ user }}", rendered) + + def test_known_literal_renders_correctly(self): + """Hard-coded Jinja expressions render to their value.""" + rendered, errors = preprocess_jinja( + "# use jinja\n---\nquestion: {{ 'world' }}\n" + ) + self.assertEqual(errors, []) + self.assertIn("question: world", rendered) + + def test_conditional_false_omits_block(self): + """An undefined (falsy) condition causes the if-block to be omitted.""" + rendered, errors = preprocess_jinja( + "# use jinja\n{% if show_block %}\nquestion: test\n{% endif %}\n" + ) + self.assertEqual(errors, []) + self.assertNotIn("question: test", rendered) + + def test_conditional_true_includes_block(self): + """A truthy hard-coded condition includes the block.""" + rendered, errors = preprocess_jinja( + "# use jinja\n{% if true %}\nquestion: test\n{% endif %}\n" + ) + self.assertEqual(errors, []) + self.assertIn("question: test", rendered) + + def test_chained_attribute_renders_empty(self): + """Chained attribute access on an undefined variable renders as empty string.""" + rendered, errors = preprocess_jinja( + "# use jinja\n---\nquestion: {{ user.first_name }}\n" + ) + self.assertEqual(errors, []) + self.assertNotIn("{{", rendered) + + def test_syntax_error_returns_error_list(self): + """A Jinja2 template syntax error is captured and returned, not raised.""" + _, errors = preprocess_jinja("# use jinja\n---\nquestion: {% if %}\n") + self.assertGreater(len(errors), 0) + self.assertIsInstance(errors[0], JinjaError) + self.assertEqual(errors[0].code, MessageCode.JINJA2_SYNTAX_ERROR) + self.assertIn("syntax error", errors[0].message.lower()) + + def test_template_runtime_error_returns_error_list(self): + """A TemplateError raised during render() is caught and returned, not raised.""" + with patch.object( + jinja2.Template, + "render", + side_effect=jinja2.exceptions.TemplateRuntimeError("boom"), + ): + content = "# use jinja\n---\nquestion: test\n" + rendered, errors = preprocess_jinja(content) + self.assertEqual(rendered, content) + self.assertEqual(len(errors), 1) + self.assertIsInstance(errors[0], JinjaError) + self.assertEqual(errors[0].code, MessageCode.JINJA2_TEMPLATE_ERROR) + self.assertIn("Jinja2 template error", errors[0].message) + + def test_for_loop_over_undefined_renders_empty(self): + """Iterating over an undefined variable produces no output (covers __iter__).""" + rendered, errors = preprocess_jinja( + "# use jinja\n{% for x in items %}{{ x }}{% endfor %}\n" + ) + self.assertEqual(errors, []) + # 'items' is undefined -> __iter__ returns iter([]) -> loop body never runs + self.assertNotIn("items", rendered) + + def test_length_filter_on_undefined_is_zero(self): + """The 'length' filter on an undefined variable returns 0 (covers __len__).""" + rendered, errors = preprocess_jinja( + "# use jinja\n---\ncount: {{ undef | length }}\n" + ) + self.assertEqual(errors, []) + self.assertIn("count: 0", rendered) + + def test_subscript_on_undefined_renders_empty(self): + """Subscript access on an undefined variable renders empty (covers __getitem__).""" + rendered, errors = preprocess_jinja( + "# use jinja\n---\nquestion: {{ data[0] }}\n" + ) + self.assertEqual(errors, []) + self.assertNotIn("{{", rendered) + + def test_call_on_undefined_renders_empty(self): + """Calling an undefined variable renders empty (covers __call__).""" + rendered, errors = preprocess_jinja( + "# use jinja\n---\nquestion: {{ some_macro() }}\n" + ) + self.assertEqual(errors, []) + self.assertNotIn("{{", rendered) + + def test_header_line_preserved(self): + """The '# use jinja' header line passes through rendering unchanged.""" + rendered, _ = preprocess_jinja("# use jinja\n---\nquestion: test\n") + self.assertTrue(rendered.startswith("# use jinja\n")) + + +class TestSilentUndefined(unittest.TestCase): + """Direct unit tests for _SilentUndefined, which is used by preprocess_jinja + to allow static analysis of Jinja files whose runtime variables are unknown. + """ + + def setUp(self): + self.u = _SilentUndefined() + + def test_str_returns_empty_string(self): + self.assertEqual(str(self.u), "") + + def test_iter_returns_empty_iterator(self): + self.assertEqual(list(self.u), []) + + def test_len_returns_zero(self): + self.assertEqual(len(self.u), 0) + + def test_public_attribute_returns_silent_undefined(self): + result = self.u.some_attribute + self.assertIsInstance(result, _SilentUndefined) + + def test_private_attribute_raises_attribute_error(self): + """Attributes starting with '_' raise AttributeError to avoid confusing + Python's own protocol checks (e.g. __iter__, __len__ lookups).""" + with self.assertRaises(AttributeError): + _ = self.u._private # single leading underscore triggers the guard + + def test_getitem_returns_silent_undefined(self): + result = self.u[0] + self.assertIsInstance(result, _SilentUndefined) + + def test_getitem_string_key_returns_silent_undefined(self): + result = self.u["key"] + self.assertIsInstance(result, _SilentUndefined) + + def test_call_returns_silent_undefined(self): + result = self.u() + self.assertIsInstance(result, _SilentUndefined) + + def test_call_with_args_returns_silent_undefined(self): + result = self.u(1, 2, key="value") + self.assertIsInstance(result, _SilentUndefined) + + +class TestYAMLStr(unittest.TestCase): + """Tests for the YAMLStr validator.""" + + def test_valid_string_no_errors(self): + from dayamlchecker.yaml_structure import YAMLStr + + v = YAMLStr("hello world") + self.assertEqual(v.errors, []) + + def test_non_string_produces_error(self): + from dayamlchecker.yaml_structure import YAMLStr + + v = YAMLStr(42) + self.assertEqual(len(v.errors), 1) + self.assertIn("isn't a string", v.errors[0][0]) + + +class TestMakoText(unittest.TestCase): + """Tests for the MakoText validator.""" + + def test_valid_mako_string_no_errors(self): + from dayamlchecker.yaml_structure import MakoText + + v = MakoText("Hello ${name}") + self.assertEqual(v.errors, []) + + def test_invalid_mako_syntax_error(self): + from dayamlchecker.yaml_structure import MakoText + + # '${' without closing '}' is a Mako SyntaxException + v = MakoText("Hello ${unclosed") + self.assertGreater(len(v.errors), 0) + + +class TestPythonText(unittest.TestCase): + """Tests for the PythonText validator.""" + + def test_non_string_produces_error(self): + from dayamlchecker.yaml_structure import PythonText + + v = PythonText(123) + self.assertEqual(len(v.errors), 1) + self.assertIn("must be a YAML string", v.errors[0][0]) + + def test_valid_python_no_errors(self): + from dayamlchecker.yaml_structure import PythonText + + v = PythonText("x = 1\ny = 2\n") + self.assertEqual(v.errors, []) + + def test_invalid_python_syntax_error(self): + from dayamlchecker.yaml_structure import PythonText + + v = PythonText("if True\n x = 1\n") + self.assertEqual(len(v.errors), 1) + self.assertIn("Python syntax error", v.errors[0][0]) + + +class TestObjectsAttrType(unittest.TestCase): + """Tests for the ObjectsAttrType validator.""" + + def test_valid_list(self): + from dayamlchecker.yaml_structure import ObjectsAttrType + + v = ObjectsAttrType([{"MyObj": "DAObject"}]) + self.assertEqual(v.errors, []) + + def test_valid_dict(self): + from dayamlchecker.yaml_structure import ObjectsAttrType + + v = ObjectsAttrType({"MyObj": "DAObject"}) + self.assertEqual(v.errors, []) + + def test_invalid_scalar_produces_error(self): + from dayamlchecker.yaml_structure import ObjectsAttrType + + v = ObjectsAttrType("MyObj: DAObject") + self.assertGreater(len(v.errors), 0) + + +class TestDAPythonVar(unittest.TestCase): + """Tests for the DAPythonVar validator.""" + + def test_valid_var(self): + from dayamlchecker.yaml_structure import DAPythonVar + + v = DAPythonVar("my_var") + self.assertEqual(v.errors, []) + + def test_non_string_produces_error(self): + from dayamlchecker.yaml_structure import DAPythonVar + + v = DAPythonVar(42) + self.assertEqual(len(v.errors), 1) + self.assertIn("needs to be a YAML string", v.errors[0][0]) + + def test_whitespace_in_var_name_produces_error(self): + from dayamlchecker.yaml_structure import DAPythonVar + + v = DAPythonVar("my var") + self.assertEqual(len(v.errors), 1) + self.assertIn("cannot have whitespace", v.errors[0][0]) + + def test_quoted_string_with_space_is_valid(self): + from dayamlchecker.yaml_structure import DAPythonVar + + # A quoted string with a space is not a plain var name but passes the check + v = DAPythonVar("'my var'") + self.assertEqual(v.errors, []) + + +class TestDAFields(unittest.TestCase): + """Tests for the DAFields validator (dict/non-list/code variants).""" + + def test_fields_dict_with_code_valid(self): + from dayamlchecker.yaml_structure import DAFields + + v = DAFields({"code": "my_fields_list"}) + self.assertEqual(v.errors, []) + + def test_fields_dict_missing_code_key_error(self): + from dayamlchecker.yaml_structure import DAFields + + # A dict with no recognised field keys AND no 'code' key is still an error. + v = DAFields({"not_code": "value"}) + self.assertEqual(len(v.errors), 1) + self.assertIn("code", v.errors[0][0]) + + def test_fields_single_field_dict_shorthand_errors(self): + """fields: bare dict shorthand should be rejected with list guidance.""" + from dayamlchecker.yaml_structure import DAFields + + v = DAFields( + { + "label": "no label", + "field": "venue_type", + "input type": "radio", + "choices": ["admin", "circuit"], + } + ) + self.assertEqual(len(v.errors), 1) + self.assertIn("fields must be a YAML list of field items", v.errors[0][0]) + self.assertIn("single field written as a dict", v.errors[0][0]) + + def test_fields_single_field_dict_with_only_field_key_errors(self): + from dayamlchecker.yaml_structure import DAFields + + v = DAFields({"field": "my_var", "datatype": "text"}) + self.assertEqual(len(v.errors), 1) + self.assertIn("fields must be a YAML list of field items", v.errors[0][0]) + + def test_fields_dict_code_non_string_error(self): + from dayamlchecker.yaml_structure import DAFields + + v = DAFields({"code": 42}) + self.assertEqual(len(v.errors), 1) + self.assertIn("must be a YAML string", v.errors[0][0]) + + def test_fields_non_list_non_dict_error(self): + from dayamlchecker.yaml_structure import DAFields + + v = DAFields("not a list") + self.assertEqual(len(v.errors), 1) + self.assertIn("should be a list or dict", v.errors[0][0]) + + def test_object_style_field_choices_code_dict_errors(self): + from dayamlchecker.yaml_structure import DAFields + + v = DAFields( + [ + { + "no label": "s3_enclosure_object.selected_enclosures", + "datatype": "object_checkboxes", + "choices": {"code": "s3_enclosure_object.choices()"}, + } + ] + ) + + self.assertTrue( + any( + err[2] == MessageCode.OBJECT_FIELD_CHOICES_CODE_DICT for err in v.errors + ), + f"Expected object choices code-dict error, got: {v.errors}", + ) + + def test_object_style_field_choices_direct_expression_valid(self): + from dayamlchecker.yaml_structure import DAFields + + v = DAFields( + [ + { + "no label": "s3_enclosure_object.selected_enclosures", + "datatype": "object_checkboxes", + "choices": "s3_enclosure_object.choices()", + } + ] + ) + + self.assertFalse( + any( + err[2] == MessageCode.OBJECT_FIELD_CHOICES_CODE_DICT for err in v.errors + ), + f"Did not expect object choices code-dict error, got: {v.errors}", + ) + + def test_fields_js_validator_invalid_error_shape_raises_value_error(self): + from dayamlchecker import yaml_structure + + class BrokenJSShowIf: + def __init__(self, *_args, **_kwargs): + self.errors = [("missing code", 1)] + + with patch.object(yaml_structure, "JSShowIf", BrokenJSShowIf): + with self.assertRaisesRegex( + ValueError, "Validator errors must be 3-tuples" + ): + yaml_structure.DAFields( + [ + { + "Favorite fruit": "fruit", + "js show if": 'val("fruit") === "apple"', + } + ] + ) + + def test_fields_python_validator_invalid_error_shape_raises_value_error(self): + from dayamlchecker import yaml_structure + + class BrokenPythonText: + def __init__(self, *_args, **_kwargs): + self.errors = [("missing code", 1)] + + with patch.object(yaml_structure, "PythonText", BrokenPythonText): + with self.assertRaisesRegex( + ValueError, "Validator errors must be 3-tuples" + ): + yaml_structure.DAFields( + [ + { + "Favorite fruit": "fruit", + "show if": {"code": "fruit == 'apple'"}, + } + ] + ) + + +class TestShowIf(unittest.TestCase): + """Tests for the ShowIf validator (non-js variants).""" + + def test_show_if_string_simple_var_no_errors(self): + from dayamlchecker.yaml_structure import ShowIf + + v = ShowIf("my_var") + self.assertEqual(v.errors, []) + + def test_show_if_malformed_string_with_colon_produces_error(self): + from dayamlchecker.yaml_structure import ShowIf + + v = ShowIf("variable: foo") + self.assertEqual(len(v.errors), 1) + self.assertIn("malformed", v.errors[0][0]) + + def test_show_if_dict_variable_key_no_errors(self): + from dayamlchecker.yaml_structure import ShowIf + + v = ShowIf({"variable": "foo", "is": "bar"}) + self.assertEqual(v.errors, []) + + def test_show_if_dict_code_key_valid_python_no_errors(self): + from dayamlchecker.yaml_structure import ShowIf + + v = ShowIf({"code": "x == 1"}) + self.assertEqual(v.errors, []) + + def test_show_if_dict_code_non_string_error(self): + from dayamlchecker.yaml_structure import ShowIf + + v = ShowIf({"code": 123}) + self.assertEqual(len(v.errors), 1) + self.assertIn("must be a YAML string", v.errors[0][0]) + + def test_show_if_dict_missing_both_keys_error(self): + from dayamlchecker.yaml_structure import ShowIf + + v = ShowIf({"some_other_key": "value"}) + self.assertEqual(len(v.errors), 1) + self.assertIn('"variable" key or "code" key', v.errors[0][0]) + + +class TestYAMLError(unittest.TestCase): + """Tests for the YAMLError class.""" + + def test_str_experimental(self): + from dayamlchecker.yaml_structure import YAMLError + + err = YAMLError( + err_str="bad key", + line_number=5, + file_name="test.yml", + code="W999", + ) + result = str(err) + self.assertIn("test.yml", result) + self.assertIn("5", result) + self.assertIn("bad key", result) + self.assertIn("[W999]", result) + self.assertNotIn("REAL ERROR", result) + + def test_str_non_experimental(self): + from dayamlchecker.yaml_structure import YAMLError + + err = YAMLError( + err_str="bad key", + line_number=5, + file_name="test.yml", + experimental=False, + code="E999", + ) + result = str(err) + self.assertIn("REAL ERROR", result) + self.assertIn("[E999]", result) + + def test_default_experimental_true(self): + from dayamlchecker.yaml_structure import YAMLError + + err = YAMLError(err_str="x", line_number=1, file_name="f.yml") + self.assertTrue(err.experimental) + + def test_code_defaults_to_none(self): + from dayamlchecker.yaml_structure import YAMLError + + err = YAMLError(err_str="x", line_number=1, file_name="f.yml") + self.assertIsNone(err.code) + + def test_format_show_experimental_false_omits_real_error(self): + from dayamlchecker.yaml_structure import YAMLError + + err = YAMLError( + err_str="bad key", + line_number=5, + file_name="test.yml", + experimental=False, + code="E999", + ) + result = err.format(show_experimental=False) + self.assertNotIn("REAL ERROR", result) + self.assertIn("[E999]", result) + self.assertIn("bad key", result) + + +class TestMessageRegistry(unittest.TestCase): + def test_message_codes_are_unique(self): + codes = list(MESSAGE_DEFINITIONS) + self.assertEqual(len(codes), len(set(codes))) + + def test_message_codes_follow_expected_pattern(self): + for code in MESSAGE_DEFINITIONS: + # Structural check with precise numeric boundaries included + self.assertRegex(code, MESSAGE_CODE_PATTERN) + kind = code[0] + num = int(code[1:]) + if kind == "E": + # E codes must be in the range 101–599 + self.assertGreaterEqual(num, 101, f"Invalid E-code: {code}") + self.assertLessEqual(num, 599, f"Invalid E-code: {code}") + elif kind == "W": + # W codes must be in the range 101–699 + self.assertGreaterEqual(num, 101, f"Invalid W-code: {code}") + self.assertLessEqual(num, 699, f"Invalid W-code: {code}") + elif kind == "C": + # C codes must be 101 or higher (no upper bound) + self.assertGreaterEqual(num, 101, f"Invalid C-code: {code}") + + def test_messagecode_constants_match_registry(self): + """ + Ensure that all public constants defined on MessageCode: + - have values present in MESSAGE_DEFINITIONS + - follow the same pattern and numeric ranges as registry codes. + """ + for name, value in vars(MessageCode).items(): + # Consider only public, constant-like attributes + if name.startswith("_"): + continue + if not name.isupper(): + continue + + code = value + # The constant value must be a registered message code + self.assertIn( + code, + MESSAGE_DEFINITIONS, + msg=f"MessageCode.{name} value {code!r} not in MESSAGE_DEFINITIONS", + ) + + # Reuse the structural and precision checks + self.assertRegex(code, MESSAGE_CODE_PATTERN) + kind = code[0] + num = int(code[1:]) + if kind == "E": + self.assertGreaterEqual(num, 101, f"Invalid E-code for {name}: {code}") + self.assertLessEqual(num, 599, f"Invalid E-code for {name}: {code}") + elif kind == "W": + self.assertGreaterEqual(num, 101, f"Invalid W-code for {name}: {code}") + self.assertLessEqual(num, 699, f"Invalid W-code for {name}: {code}") + elif kind == "C": + self.assertGreaterEqual(num, 101, f"Invalid C-code for {name}: {code}") + + +class TestFindErrors(unittest.TestCase): + """Tests for find_errors() which reads from a file on disk.""" + + def test_find_errors_valid_file(self): + import tempfile + + from dayamlchecker.yaml_structure import find_errors + + content = "---\nquestion: Hello\nfield: my_var\n" + with tempfile.NamedTemporaryFile( + suffix=".yml", mode="w", delete=False, encoding="utf-8" + ) as f: + f.write(content) + fname = f.name + try: + errs = find_errors(fname) + self.assertEqual(errs, []) + finally: + import os + + os.unlink(fname) + + def test_find_errors_invalid_file(self): + import tempfile + + from dayamlchecker.yaml_structure import find_errors + + content = "---\nnot_a_real_key: hello\n" + with tempfile.NamedTemporaryFile( + suffix=".yml", mode="w", delete=False, encoding="utf-8" + ) as f: + f.write(content) + fname = f.name + try: + errs = find_errors(fname) + self.assertGreater(len(errs), 0) + finally: + import os + + os.unlink(fname) + + def test_find_errors_large_invalid_fixture(self): + from dayamlchecker.yaml_structure import find_errors + + errs = find_errors(str(LARGE_INVALID_INTERVIEW_FIXTURE)) + codes = {err.code for err in errs} + expected_codes = ( + set(MESSAGE_DEFINITIONS) + - { + MessageCode.JINJA2_SYNTAX_ERROR, + MessageCode.JINJA2_TEMPLATE_ERROR, + } + - ACCESSIBILITY_ERROR_CODES + ) + + self.assertEqual( + codes, + expected_codes, + f"Expected fixture to produce {sorted(expected_codes)}, got {sorted(codes)}", + ) + + def test_error_code_fixtures_cover_entire_registry(self): + from dayamlchecker.yaml_structure import find_errors + + covered_codes = set() + for fixture_path in ALL_ERROR_CODE_FIXTURES: + covered_codes.update(err.code for err in find_errors(str(fixture_path))) + + self.assertEqual( + covered_codes, + set(MESSAGE_DEFINITIONS) - ACCESSIBILITY_ERROR_CODES, + f"Expected fixtures to cover {sorted(MESSAGE_DEFINITIONS)}, got {sorted(covered_codes)}", + ) + + def test_find_errors_large_valid_fixture(self): + from dayamlchecker.yaml_structure import find_errors + + errs = find_errors(str(LARGE_VALID_INTERVIEW_FIXTURE)) + self.assertEqual( + errs, [], f"Expected no errors from valid fixture, got: {errs}" + ) + + +class TestFindErrorsEdgeCases(unittest.TestCase): + """Tests for edge cases in find_errors_from_string.""" + + def test_no_input_file_defaults_to_unknown(self): + """When input_file is omitted, errors still have a file_name.""" + errs = find_errors_from_string("---\nnot_real: hello\n") + self.assertGreater(len(errs), 0) + # Should not crash; file_name set to a default like "" + self.assertIsNotNone(errs[0].file_name) + + def test_unrecognised_top_level_key_is_real_error(self): + """Non-DA keys must be flagged as non-experimental (REAL ERROR).""" + errs = find_errors_from_string("---\nbad_key: hello\n", input_file="") + real_errors = [e for e in errs if not e.experimental] + self.assertGreater(len(real_errors), 0) + + def test_multi_document_processes_all_blocks(self): + """Multiple YAML documents in one file are all checked.""" + content = "---\nquestion: Hello\n---\nbad_key: oops\n" + errs = find_errors_from_string(content, input_file="") + # The second block has a bad key + self.assertGreater(len(errs), 0) + + def test_empty_yaml_document_no_crash(self): + """An empty / comment-only document does not crash.""" + errs = find_errors_from_string("# just a comment\n", input_file="") + # No crash and no false positives + self.assertIsInstance(errs, list) + + def test_non_string_top_level_key_flagged(self): + """A top-level key that isn't a string is flagged.""" + content = "---\n123: value\n" + errs = find_errors_from_string(content, input_file="") + # Should produce an error about unexpected keys + self.assertGreater(len(errs), 0) + + def test_invalid_validator_error_shape_raises_value_error(self): + """Malformed validator outputs should fail explicitly, even with assertions disabled.""" + import dayamlchecker.yaml_structure as yaml_structure + + class BrokenValidator: + def __init__(self, _value): + self.errors = [("missing code", 1)] + + content = "---\nquestion: Hello\n" + with patch.dict( + yaml_structure.big_dict, + {"question": {"type": BrokenValidator}}, + clear=False, + ): + with self.assertRaisesRegex( + ValueError, "Validator errors must be 3-tuples" + ): + find_errors_from_string(content, input_file="") + + +class TestProcessFile(unittest.TestCase): + """Tests for process_file() which reads a file from disk.""" + + def _write_temp(self, content: str, suffix: str = ".yml") -> str: + import tempfile + + with tempfile.NamedTemporaryFile( + suffix=suffix, mode="w", delete=False, encoding="utf-8" + ) as f: + f.write(content) + return f.name + + def tearDown(self): + import os + + for attr in ("_tmp_path",): + p = getattr(self, attr, None) + if p: + try: + os.unlink(p) + except OSError: + pass + + def test_process_valid_file_prints_ok(self): + import io + + from dayamlchecker.yaml_structure import process_file + + path = self._write_temp("---\nquestion: Hello\nfield: my_var\n") + out = io.StringIO() + try: + with patch("sys.stdout", out): + process_file(path) + finally: + import os + + os.unlink(path) + self.assertIn("ok", out.getvalue()) + + def test_process_jinja_file_prints_ok_jinja(self): + import io + + from dayamlchecker.yaml_structure import process_file + + path = self._write_temp("# use jinja\n---\nquestion: Hello {{ name }}\n") + out = io.StringIO() + try: + with patch("sys.stdout", out): + process_file(path) + finally: + import os + + os.unlink(path) + self.assertIn("ok (jinja)", out.getvalue()) + + def test_process_file_with_errors_prints_error_count(self): + import io + + from dayamlchecker.yaml_structure import process_file + + path = self._write_temp("---\nbad_key: hello\n") + out = io.StringIO() + try: + with patch("sys.stdout", out): + process_file(path) + finally: + import os + + os.unlink(path) + self.assertIn("errors", out.getvalue()) + + def test_process_ignored_da_filename_skipped(self): + """Files whose basename is in the DA ignore list are silently skipped.""" + import io + + from dayamlchecker.yaml_structure import process_file + + # Write a file with a bad-key error but with an ignored name + path = self._write_temp("---\nbad_key: hello\n") + import os + import shutil + + ignored_path = os.path.join(os.path.dirname(path), "documentation.yml") + shutil.copy(path, ignored_path) + out = io.StringIO() + try: + with patch("sys.stdout", out): + process_file(ignored_path) + finally: + os.unlink(path) + os.unlink(ignored_path) + # Default mode should print "skipped: ", not errors + self.assertIn("skipped", out.getvalue()) + self.assertNotIn("error", out.getvalue().lower()) + + def test_process_ignored_da_filename_uses_line_reporter(self): + from dayamlchecker.yaml_structure import process_file + + messages = [] + result = process_file("/tmp/documentation.yml", line_reporter=messages.append) + + self.assertEqual(result, "skipped") + self.assertEqual(messages, ["skipped: /tmp/documentation.yml"]) + + def test_process_jinja_file_default_mode_prints_ok_status(self): + """Default mode prints the file name and ok (jinja) status per-file.""" + import io + + from dayamlchecker.yaml_structure import process_file + + path = self._write_temp("# use jinja\n---\nquestion: Hello {{ name }}\n") + out = io.StringIO() + try: + with patch("sys.stdout", out): + process_file(path) + finally: + import os + + os.unlink(path) + output = out.getvalue() + self.assertIn("ok (jinja)", output) + self.assertNotIn("Jinja-rendered output", output) + + def test_process_warning_file_uses_progress_line_reporter(self): + import io + + from dayamlchecker.yaml_structure import ( + _ProgressOutput, + YAMLError, + process_file, + ) + + path = self._write_temp("---\nquestion: Hello\nfield: my_var\n") + out = io.StringIO() + progress = _ProgressOutput() + warning = YAMLError( + err_str="Warning: heads up", + line_number=2, + file_name="warning.yml", + ) + try: + with patch("sys.stdout", out): + with patch( + "dayamlchecker.yaml_structure.find_errors_from_string", + return_value=[warning], + ): + progress.dot() + result = process_file(path, line_reporter=progress.line) + finally: + import os + + os.unlink(path) + + self.assertEqual(result, "warning") + output = out.getvalue() + self.assertTrue(output.startswith(".\nwarnings ("), output) + + def test_process_convention_file_uses_line_reporter(self): + from dayamlchecker.yaml_structure import YAMLError, process_file + + path = self._write_temp("---\nquestion: Hello\nfield: my_var\n") + messages = [] + convention = YAMLError( + err_str="Info: style note", + line_number=2, + file_name="convention.yml", + ) + try: + with patch( + "dayamlchecker.yaml_structure.find_errors_from_string", + return_value=[convention], + ): + result = process_file(path, line_reporter=messages.append) + finally: + import os + + os.unlink(path) + + self.assertEqual(result, "warning") + self.assertEqual(len(messages), 1) + self.assertIn("conventions (", messages[0]) + + +class TestDAFieldsDeepPaths(unittest.TestCase): + """Cover the deeper `_validate_field_modifiers` paths in DAFields.""" + + def test_accept_with_unquoted_extension_list_errors(self): + content = """\ +question: | + Sample +fields: + - Upload deed document: deed_upload + datatype: file + accept: | + .pdf,.jpg,.jpeg,.png,.tiff,.tif +""" + errs = find_errors_from_string(content, input_file="") + accept_errs = [ + e + for e in errs + if "accept must be a python string literal" in e.err_str.lower() + ] + self.assertGreaterEqual( + len(accept_errs), + 1, + f"Expected accept syntax error, got: {errs}", + ) + + def test_accept_with_quoted_string_is_valid(self): + content = """\ +question: | + Sample +fields: + - Upload deed document: deed_upload + datatype: file + accept: "'application/pdf,image/jpeg,image/png,image/tiff'" +""" + errs = find_errors_from_string(content, input_file="") + accept_errs = [e for e in errs if "accept" in e.err_str.lower()] + self.assertEqual(accept_errs, [], f"Unexpected accept errors: {accept_errs}") + + def test_accept_with_non_string_value_errors(self): + content = """\ +question: | + Sample +fields: + - Upload deed document: deed_upload + datatype: file + accept: + - .pdf + - .jpg +""" + errs = find_errors_from_string(content, input_file="") + accept_errs = [e for e in errs if e.code == "E121"] + self.assertGreaterEqual( + len(accept_errs), + 1, + f"Expected type error for non-string accept, got: {errs}", + ) + + def test_accept_with_mime_types_is_valid(self): + content = """\ +question: | + Sample +fields: + - Upload deed document: deed_upload + datatype: file + accept: "'application/pdf,image/jpeg,image/png'" +""" + errs = find_errors_from_string(content, input_file="") + accept_errs = [e for e in errs if "accept" in e.err_str.lower()] + self.assertEqual(accept_errs, [], f"Unexpected accept errors: {accept_errs}") + + def test_accept_block_scalar_double_quoted_string_is_valid(self): + """Block scalar with double-quoted Python string literal is valid.""" + content = """\ +question: | + Sample +fields: + - Upload deed document: deed_upload + datatype: file + accept: | + "application/pdf,image/jpeg,image/png,image/tiff" +""" + errs = find_errors_from_string(content, input_file="") + accept_errs = [e for e in errs if "accept" in e.err_str.lower()] + self.assertEqual(accept_errs, [], f"Unexpected accept errors: {accept_errs}") + + def test_accept_unquoted_mime_type_errors(self): + """Bare MIME type without Python quoting (e.g. application/pdf) is caught.""" + content = """\ +question: | + Sample +fields: + - Upload deed document: deed_upload + datatype: file + accept: application/pdf +""" + errs = find_errors_from_string(content, input_file="") + accept_errs = [ + e + for e in errs + if "accept must be a python string literal" in e.err_str.lower() + ] + self.assertGreaterEqual( + len(accept_errs), + 1, + f"Expected non-string-literal error for bare MIME type, got: {errs}", + ) + + def test_show_if_dict_missing_variable_and_code_keys_error(self): + """show if dict without 'variable' or 'code' should trigger an error.""" + content = """\ +question: | + Sample +fields: + - Name: name + - Conditional: other + show if: + neither_key: value +""" + errs = find_errors_from_string(content, input_file="") + self.assertTrue( + any( + '"variable" or "code"' in e.err_str.lower() + or "variable" in e.err_str.lower() + for e in errs + ), + f"Expected show-if dict key error, got: {errs}", + ) + + def test_hide_if_dict_missing_variable_and_code_keys_error(self): + """hide if dict without 'variable' or 'code' should trigger an error.""" + content = """\ +question: | + Sample +fields: + - Name: name + - Conditional: other + hide if: + neither_key: value +""" + errs = find_errors_from_string(content, input_file="") + self.assertTrue( + any( + '"variable" or "code"' in e.err_str.lower() + or "variable" in e.err_str.lower() + for e in errs + ), + f"Expected hide-if dict key error, got: {errs}", + ) + + def test_js_enable_if_valid(self): + """js enable if should work the same as js show if.""" + content = """\ +question: | + Sample +fields: + - Name: name + - Toggled: toggle + js enable if: | + val("name") !== "" +""" + errs = find_errors_from_string(content, input_file="") + enable_if_errs = [e for e in errs if "enable if" in e.err_str.lower()] + self.assertEqual(len(enable_if_errs), 0) + + def test_js_enable_if_no_val_call_error(self): + content = """\ +question: | + Sample +fields: + - Name: name + - Toggled: toggle + js enable if: | + true +""" + errs = find_errors_from_string(content, input_file="") + self.assertTrue( + any("val()" in e.err_str for e in errs), + f"Expected val() error for js enable if, got: {errs}", + ) + + def test_fields_list_with_non_dict_item_no_crash(self): + """A fields list containing a non-dict entry should not crash.""" + content = """\ +question: | + Sample +fields: + - note: just a note +""" + errs = find_errors_from_string(content, input_file="") + # Should either produce no error or a meaningful one, not crash + self.assertIsInstance(errs, list) + + def test_x_alias_in_show_if_on_generic_object_screen(self): + """show if with x. should match children[i]. on screen.""" + content = """\ +question: | + Generic screen +fields: + - Fruit: x.fruit + - Why: x.reason + show if: x.fruit +""" + errs = find_errors_from_string(content, input_file="") + show_if_errs = [ + e + for e in errs + if "show if" in e.err_str.lower() and "not defined" in e.err_str.lower() + ] + self.assertEqual(len(show_if_errs), 0, f"Unexpected: {show_if_errs}") + + +class TestMakoCompileException(unittest.TestCase): + """Cover the CompileException branch of MakoText.""" + + def test_mako_compile_error_produces_error(self): + from dayamlchecker.yaml_structure import MakoText + + # Patch MakoTemplate to raise a generic Exception to simulate a compile + # failure (MakoText only catches SyntaxException and CompileException; + # we test CompileException by using the SyntaxException path here since + # CompileException requires 4 positional args that are hard to construct). + # Instead, trigger a MakoText error by passing a non-string value. + v = MakoText.__new__(MakoText) + v.errors = [] + # We can indirectly test the compile except path by passing markup that + # triggers a Mako SyntaxException — a lone '$' is not valid Mako. + v2 = MakoText("${") + self.assertGreater(len(v2.errors), 0) + + +class TestJSShowIfNonString(unittest.TestCase): + """Cover the non-string branch of JSShowIf.""" + + def test_js_show_if_non_string_produces_error(self): + from dayamlchecker.yaml_structure import JSShowIf + + v = JSShowIf(42, modifier_key="js show if") + self.assertEqual(len(v.errors), 1) + self.assertIn("must be a string", v.errors[0][0]) + + def test_js_show_if_reference_helper_rejects_non_strings(self): + from dayamlchecker.yaml_structure import JSShowIf + + v = JSShowIf('val("field")', modifier_key="js show if") + + self.assertFalse(v._references_screen_variable(42)) + + +class TestShowIfMalformedCodePrefix(unittest.TestCase): + """ShowIf string values that start with 'code:' should be flagged.""" + + def test_show_if_code_colon_prefix_produces_error(self): + from dayamlchecker.yaml_structure import ShowIf + + v = ShowIf("code: x == 1") + self.assertEqual(len(v.errors), 1) + self.assertIn("malformed", v.errors[0][0]) + + def test_show_if_dict_variable_form_is_valid(self): + from dayamlchecker.yaml_structure import ShowIf + + v = ShowIf({"variable": "ready", "is": True}) + + self.assertEqual(v.errors, []) + + def test_interview_order_reference_without_matching_guard_errors(self): + """Error when interview-order style code references a conditionally shown field without guard""" + invalid = """ +question: | + Eviction details +fields: + - Reason: eviction_reason + choices: + - Nonpayment + - Other + - Other details: other_details + show if: + variable: eviction_reason + is: Other +--- +id: interview_order +mandatory: True +code: | + other_details +""" + errs = find_errors_from_string(invalid, input_file="") + self.assertTrue( + any( + 'references "other_details" without a matching guard' + in e.err_str.lower() + for e in errs + ), + f"Expected interview-order guard error, got: {errs}", + ) + + def test_interview_order_reference_with_matching_guard_valid(self): + """No error when interview-order style code guards conditional field usage""" + valid = """ +question: | + Eviction details +fields: + - Reason: eviction_reason + choices: + - Nonpayment + - Other + - Other details: other_details + show if: + variable: eviction_reason + is: Other +--- +id: interview_order +mandatory: True +code: | + if eviction_reason == "Other": + other_details +""" + errs = find_errors_from_string(valid, input_file="") + self.assertFalse( + any("without a matching guard" in e.err_str.lower() for e in errs), + f"Expected no interview-order guard error, got: {errs}", + ) + + def test_interview_order_reference_with_showifdef_guard_valid(self): + """No error when interview-order code uses showifdef('') as guard""" + valid = """ +question: | + Eviction details +fields: + - Reason: eviction_reason + choices: + - Nonpayment + - Other + - Other details: other_details + show if: + variable: eviction_reason + is: Other +--- +id: interview_order +mandatory: True +code: | + if showifdef("other_details") and other_details: + pass +""" + errs = find_errors_from_string(valid, input_file="") + self.assertFalse( + any("without a matching guard" in e.err_str.lower() for e in errs), + f"Expected no interview-order guard error with showifdef guard, got: {errs}", + ) + + def test_interview_order_all_conditional_modifiers_without_guard_error(self): + """Each conditional modifier should trigger interview-order guard mismatch when unguarded""" + cases = [ + ("show if", "eviction_reason == 'Other'"), + ("hide if", "eviction_reason == 'Other'"), + ("enable if", "eviction_reason == 'Other'"), + ("disable if", "eviction_reason == 'Other'"), + ("js show if", 'val("eviction_reason") === "Other"'), + ("js hide if", 'val("eviction_reason") === "Other"'), ("js enable if", 'val("eviction_reason") === "Other"'), ("js disable if", 'val("eviction_reason") === "Other"'), ] @@ -1711,5 +3483,540 @@ def test_show_hide_nesting_depth_two_no_warning(self): ) +class TestYamlStructureHelpers(unittest.TestCase): + def test_normalize_validator_error_requires_tuple(self): + from dayamlchecker.yaml_structure import _normalize_validator_error + + with self.assertRaisesRegex(TypeError, "must be tuples"): + _normalize_validator_error("not-a-tuple") + + def test_normalize_validator_error_requires_string_message(self): + from dayamlchecker.yaml_structure import _normalize_validator_error + + with self.assertRaisesRegex(TypeError, "message must be a string"): + _normalize_validator_error((123, 1, MessageCode.NO_POSSIBLE_TYPES)) + + def test_normalize_validator_error_requires_int_line_number(self): + from dayamlchecker.yaml_structure import _normalize_validator_error + + with self.assertRaisesRegex(TypeError, "line number must be an int"): + _normalize_validator_error(("msg", "1", MessageCode.NO_POSSIBLE_TYPES)) + + def test_normalize_validator_error_requires_string_code(self): + from dayamlchecker.yaml_structure import _normalize_validator_error + + with self.assertRaisesRegex(TypeError, "code must be a string"): + _normalize_validator_error(("msg", 1, 123)) + + def test_format_message_and_experimental_lookup_reject_unknown_codes(self): + with self.assertRaisesRegex(ValueError, "Unknown message code"): + format_message("E999") + + with self.assertRaisesRegex(ValueError, "Unknown message code"): + is_experimental_code("E999") + + def test_validation_code_handles_second_parse_syntax_error(self): + import ast as pyast + + from dayamlchecker.yaml_structure import ValidationCode + + with patch( + "dayamlchecker.yaml_structure.ast.parse", + side_effect=[pyast.parse("x = 1"), SyntaxError("boom")], + ): + validator = ValidationCode("x = 1") + + self.assertEqual(validator.errors, []) + + def test_da_type_accepts_any_value(self): + from dayamlchecker.yaml_structure import DAType + + self.assertEqual(DAType("Individual").errors, []) + + def test_lc_line_defaults_to_one_without_metadata(self): + from dayamlchecker.yaml_structure import _lc_line + + self.assertEqual(_lc_line(object()), 1) + + def test_interview_order_style_detects_id_and_comment_markers(self): + from dayamlchecker.yaml_structure import _is_interview_order_style_block + + self.assertTrue(_is_interview_order_style_block({"id": "Interview Order"})) + self.assertTrue(_is_interview_order_style_block({"comment": "interview_order"})) + self.assertFalse(_is_interview_order_style_block({"id": "plain"})) + + def test_extract_field_var_name_ignores_non_field_inputs(self): + from dayamlchecker.yaml_structure import _extract_field_var_name + + self.assertIsNone(_extract_field_var_name("not a dict")) + self.assertIsNone(_extract_field_var_name({"show if": "ready"})) + + def test_lc_line_with_missing_line_attribute_defaults_to_one(self): + from types import SimpleNamespace + + from dayamlchecker.yaml_structure import _lc_line + + self.assertEqual(_lc_line(SimpleNamespace(lc=SimpleNamespace(line=None))), 1) + + def test_lc_key_line_no_lc_attribute(self): + from dayamlchecker.yaml_structure import _lc_key_line + + self.assertEqual(_lc_key_line(object(), "k"), 1) + + def test_lc_key_line_key_getter_not_callable(self): + from types import SimpleNamespace + + from dayamlchecker.yaml_structure import _lc_key_line + + obj = SimpleNamespace(lc=SimpleNamespace(key="not callable", line=5)) + self.assertEqual(_lc_key_line(obj, "k"), 6) + + def test_lc_key_line_key_getter_raises(self): + from types import SimpleNamespace + + from dayamlchecker.yaml_structure import _lc_key_line + + obj = SimpleNamespace( + lc=SimpleNamespace(key=lambda k: (_ for _ in ()).throw(KeyError(k)), line=3) + ) + self.assertEqual(_lc_key_line(obj, "missing"), 4) + + def test_lc_key_line_non_tuple_line_info(self): + from types import SimpleNamespace + + from dayamlchecker.yaml_structure import _lc_key_line + + obj = SimpleNamespace(lc=SimpleNamespace(key=lambda k: "not a tuple", line=2)) + self.assertEqual(_lc_key_line(obj, "k"), 3) + + def test_lc_key_line_non_int_in_tuple(self): + from types import SimpleNamespace + + from dayamlchecker.yaml_structure import _lc_key_line + + obj = SimpleNamespace(lc=SimpleNamespace(key=lambda k: ("not_int",), line=4)) + self.assertEqual(_lc_key_line(obj, "k"), 5) + + def test_extract_controller_vars_and_js_vars_handle_non_strings(self): + from dayamlchecker.yaml_structure import ( + _extract_controller_vars_for_field_modifier, + _extract_vars_from_js_condition, + ) + + self.assertEqual(_extract_controller_vars_for_field_modifier(None), set()) + self.assertEqual(_extract_vars_from_js_condition(None), set()) + + def test_guard_candidates_cover_blank_and_hide_dict_paths(self): + from dayamlchecker.yaml_structure import _guard_candidates_for_modifier + + self.assertEqual(_guard_candidates_for_modifier("show if", " "), []) + self.assertEqual( + _guard_candidates_for_modifier( + "hide if", {"variable": "status", "is": "done"} + ), + ["status != 'done'", "not (status == 'done')"], + ) + self.assertEqual( + _guard_candidates_for_modifier("hide if", {"code": "ready"}), + ["not (ready)"], + ) + self.assertEqual( + _guard_candidates_for_modifier("hide if", {"variable": "ready"}), + ["not (ready)", "not ready"], + ) + + def test_statement_span_returns_none_without_line_numbers(self): + from dayamlchecker.yaml_structure import _statement_span + + class DummyStmt: + pass + + self.assertIsNone(_statement_span([DummyStmt()])) + + def test_extract_branch_guards_returns_empty_on_syntax_error(self): + from dayamlchecker.yaml_structure import _extract_branch_guards_by_line + + self.assertEqual(_extract_branch_guards_by_line("if :\n pass"), {}) + + def test_extract_branch_guards_skips_when_condition_cannot_be_rendered(self): + import ast + + from dayamlchecker.yaml_structure import _extract_branch_guards_by_line + + if_node = ast.parse("if flag:\n pass\n").body[0] + + with patch("dayamlchecker.yaml_structure.ast.walk", return_value=[if_node]): + with patch( + "dayamlchecker.yaml_structure.ast.get_source_segment", + return_value=None, + ): + with patch( + "dayamlchecker.yaml_structure.ast.unparse", + side_effect=Exception("boom"), + ): + self.assertEqual( + _extract_branch_guards_by_line("if flag:\n pass\n"), {} + ) + + def test_extract_branch_guards_handles_missing_lineno_and_else_only_spans(self): + import ast + + from dayamlchecker.yaml_structure import _extract_branch_guards_by_line + + if_node = ast.parse("if flag:\n pass\nelse:\n other()\n").body[0] + if_node.lineno = None + + with patch("dayamlchecker.yaml_structure.ast.walk", return_value=[if_node]): + with patch( + "dayamlchecker.yaml_structure._statement_span", + side_effect=[None, (3, 3)], + ): + self.assertEqual( + _extract_branch_guards_by_line( + "if flag:\n pass\nelse:\n other()\n" + ), + {3: ["not (flag)"]}, + ) + + def test_has_matching_guard_accepts_empty_expectations(self): + from dayamlchecker.yaml_structure import _has_matching_guard + + self.assertTrue(_has_matching_guard(["anything"], [])) + + def test_has_matching_guard_returns_false_when_nothing_matches(self): + from dayamlchecker.yaml_structure import _has_matching_guard + + self.assertFalse(_has_matching_guard(["x == 1"], ["y == 2"])) + + def test_max_screen_visibility_nesting_depth_handles_edge_cases_and_cycles(self): + from dayamlchecker.yaml_structure import _max_screen_visibility_nesting_depth + + self.assertEqual(_max_screen_visibility_nesting_depth({"fields": []}), 0) + self.assertEqual( + _max_screen_visibility_nesting_depth({"fields": [{"note": "Only note"}]}), + 0, + ) + self.assertEqual( + _max_screen_visibility_nesting_depth({"fields": ["not-a-dict"]}), + 0, + ) + self.assertEqual( + _max_screen_visibility_nesting_depth( + { + "fields": [ + {"First": "a", "show if": "b"}, + {"Second": "b", "show if": "a"}, + ] + } + ), + 2, + ) + + def test_find_screen_variable_references_in_code_handles_invalid_and_indexed_vars( + self, + ): + from dayamlchecker.yaml_structure import DAFields + + validator = DAFields([]) + + self.assertEqual( + validator._find_screen_variable_references_in_code("if :", {"name"}), + set(), + ) + self.assertEqual( + validator._find_screen_variable_references_in_code( + "children[i].name", {"children[i].name"} + ), + {"children[i].name"}, + ) + + def test_javascript_text_accepts_any_value(self): + from dayamlchecker.yaml_structure import JavascriptText + + self.assertEqual(JavascriptText("alert(1)").errors, []) + + def test_js_show_if_handles_non_val_calls_and_missing_arguments(self): + from dayamlchecker.yaml_structure import JSShowIf + + validator = JSShowIf("foo(bar(), val())", modifier_key="js show if") + + self.assertTrue( + any("quoted string" in err[0].lower() for err in validator.errors) + ) + + def test_js_show_if_traverses_dict_and_list_nodes_explicitly(self): + from dayamlchecker.yaml_structure import JSShowIf + + class Parsed: + def toDict(self): + return { + "type": "Program", + "body": [ + { + "type": "CallExpression", + "callee": {"type": "Identifier", "name": "foo"}, + "arguments": [], + }, + { + "type": "CallExpression", + "callee": {"type": "Identifier", "name": "val"}, + "arguments": [], + "loc": {"start": {"line": 1}}, + }, + ], + } + + with patch( + "dayamlchecker.yaml_structure.esprima.parseScript", + return_value=Parsed(), + ): + validator = JSShowIf("ignored", modifier_key="js show if") + + self.assertTrue( + any("quoted string" in err[0].lower() for err in validator.errors) + ) + + def test_fields_detects_dynamic_code_only_entry(self): + from dayamlchecker.yaml_structure import DAFields + + validator = DAFields([{"code": "field_list"}]) + + self.assertTrue(validator.has_dynamic_fields_code) + + def test_fields_skip_non_dict_entries_during_modifier_validation(self): + from dayamlchecker.yaml_structure import DAFields + + validator = DAFields([{"Name": "name"}, "not-a-dict"]) + + self.assertEqual(validator.errors, []) + + def test_show_if_simple_string_form_is_accepted(self): + from dayamlchecker.yaml_structure import ShowIf + + self.assertEqual(ShowIf("ready").errors, []) + + def test_show_if_dict_variable_form_is_accepted_directly(self): + from dayamlchecker.yaml_structure import ShowIf + + self.assertEqual(ShowIf({"variable": "ready"}).errors, []) + + def test_extract_field_name_skips_modifier_keys_before_returning_field(self): + from dayamlchecker.yaml_structure import DAFields + + validator = DAFields([]) + + self.assertEqual( + validator._extract_field_name({"show if": "ready", "Name": "person.name"}), + "person.name", + ) + + def test_extract_field_name_skips_non_string_field_values(self): + from dayamlchecker.yaml_structure import DAFields + + validator = DAFields([]) + + self.assertEqual( + validator._extract_field_name({"Count": 1, "Name": "person.name"}), + "person.name", + ) + + def test_extract_field_var_name_skips_modifier_keys_before_returning_field(self): + from dayamlchecker.yaml_structure import _extract_field_var_name + + self.assertEqual( + _extract_field_var_name({"show if": "ready", "Name": "person.name"}), + "person.name", + ) + + def test_extract_field_var_name_skips_non_string_field_values(self): + from dayamlchecker.yaml_structure import _extract_field_var_name + + self.assertEqual( + _extract_field_var_name({"Count": 1, "Name": "person.name"}), + "person.name", + ) + + def test_guard_candidates_cover_non_hide_variable_without_is(self): + from dayamlchecker.yaml_structure import _guard_candidates_for_modifier + + self.assertEqual( + _guard_candidates_for_modifier("show if", {"variable": "ready"}), + ["ready"], + ) + + def test_validate_python_modifier_allows_known_string_variable(self): + from dayamlchecker.yaml_structure import DAFields + + validator = DAFields([]) + validator._validate_python_modifier( + "show if", + "toggle", + {"label": "Reason"}, + {"toggle"}, + ) + + self.assertEqual(validator.errors, []) + + def test_fields_x_alias_matches_deep_screen_variable(self): + content = """ +question: | + Generic screen +fields: + - Fruit: children[i].fruit + - Reason: reason + show if: x.fruit +""" + + errs = find_errors_from_string(content, input_file="") + + self.assertFalse( + any( + e.code == MessageCode.FIELD_MODIFIER_UNKNOWN_VARIABLE_STRING + for e in errs + ), + errs, + ) + + def test_max_screen_visibility_nesting_depth_skips_non_dict_items_after_building_vars( + self, + ): + from dayamlchecker.yaml_structure import _max_screen_visibility_nesting_depth + + self.assertEqual( + _max_screen_visibility_nesting_depth( + {"fields": [{"Name": "name"}, "not-a-dict"]} + ), + 0, + ) + + def test_fields_string_modifier_known_variable_does_not_error(self): + content = """ +question: | + Sample +fields: + - Toggle: toggle + - Reason: reason + show if: toggle +""" + + errs = find_errors_from_string(content, input_file="") + + self.assertFalse( + any( + e.code == MessageCode.FIELD_MODIFIER_UNKNOWN_VARIABLE_STRING + for e in errs + ), + errs, + ) + + def test_fields_variable_modifier_non_string_reports_type_error(self): + content = """ +question: | + Sample +fields: + - Toggle: toggle + - Reason: reason + show if: + variable: + - toggle +""" + + errs = find_errors_from_string(content, input_file="") + + self.assertTrue( + any(e.code == MessageCode.FIELD_MODIFIER_VARIABLE_TYPE for e in errs) + ) + + def test_extract_field_name_returns_none_for_non_field_items(self): + from dayamlchecker.yaml_structure import DAFields + + validator = DAFields([]) + + self.assertIsNone(validator._extract_field_name("not a dict")) + self.assertIsNone(validator._extract_field_name({"show if": "ready"})) + + def test_find_errors_duplicate_key_without_problem_mark(self): + import dayamlchecker.yaml_structure as yaml_structure + from unittest.mock import Mock + + class DummyDuplicateKeyError(yaml_structure._RuamelDuplicateKeyError): + pass + + err = DummyDuplicateKeyError.__new__(DummyDuplicateKeyError) + err.problem = 'found duplicate key "dup"' + err.problem_mark = None + + loader = Mock() + loader.allow_duplicate_keys = False + loader.load.side_effect = err + + with patch("dayamlchecker.yaml_structure._RuamelYAML", return_value=loader): + errs = yaml_structure.find_errors_from_string( + "---\nquestion: Hello\n", input_file="" + ) + + self.assertTrue(any(e.code == MessageCode.YAML_DUPLICATE_KEY for e in errs)) + self.assertEqual(errs[0].line_number, 1) + + def test_find_errors_marked_yaml_error_without_marks(self): + import dayamlchecker.yaml_structure as yaml_structure + from unittest.mock import Mock + + class DummyMarkedYAMLError(yaml_structure._RuamelMarkedYAMLError): + def __str__(self): + return "marked parse error" + + err = DummyMarkedYAMLError.__new__(DummyMarkedYAMLError) + err.context_mark = None + err.problem_mark = None + + loader = Mock() + loader.allow_duplicate_keys = False + loader.load.side_effect = err + + with patch("dayamlchecker.yaml_structure._RuamelYAML", return_value=loader): + errs = yaml_structure.find_errors_from_string( + "---\nquestion: Hello\n", input_file="" + ) + + self.assertTrue(any(e.code == MessageCode.YAML_PARSE_ERROR for e in errs)) + + def test_find_errors_generic_yaml_exception_uses_parse_error_path(self): + import dayamlchecker.yaml_structure as yaml_structure + from unittest.mock import Mock + + loader = Mock() + loader.allow_duplicate_keys = False + loader.load.side_effect = RuntimeError("boom") + + with patch("dayamlchecker.yaml_structure._RuamelYAML", return_value=loader): + errs = yaml_structure.find_errors_from_string( + "---\nquestion: Hello\n", input_file="" + ) + + self.assertTrue(any(e.code == MessageCode.YAML_PARSE_ERROR for e in errs)) + + def test_partner_block_pair_does_not_trigger_too_many_types(self): + content = """ +question: | + Hello +attachment: + name: Greeting + filename: greeting + content: | + Hello world +""" + + errs = find_errors_from_string(content, input_file="") + + self.assertFalse(any(e.code == MessageCode.TOO_MANY_TYPES for e in errs)) + + def test_variable_candidates_strip_multiple_trailing_indexes(self): + result = _variable_candidates("items[0][1]") + + self.assertIn("items[0]", result) + self.assertIn("items", result) + + if __name__ == "__main__": unittest.main() diff --git a/tests/test_yaml_structure_cli.py b/tests/test_yaml_structure_cli.py index c56ad9d..71b2945 100644 --- a/tests/test_yaml_structure_cli.py +++ b/tests/test_yaml_structure_cli.py @@ -1,12 +1,21 @@ import io +import re import sys from contextlib import redirect_stdout from pathlib import Path from tempfile import TemporaryDirectory +from unittest.mock import patch import dayamlchecker.yaml_structure as yaml_structure from dayamlchecker.check_questions_urls import URLCheckResult, URLIssue -from dayamlchecker.yaml_structure import _collect_yaml_files, main +from dayamlchecker._files import _collect_yaml_files +from dayamlchecker.__main__ import main as package_main +from dayamlchecker.yaml_structure import ( + YAMLError, + main, + parse_ignore_codes, + process_file, +) def _write_valid_question(path: Path) -> None: @@ -17,6 +26,14 @@ def _write_valid_question(path: Path) -> None: ) +def _write_valid_code_interview(path: Path, *, code: str = "x=1") -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text( + f"mandatory: True\ncode: |\n {code}\n", + encoding="utf-8", + ) + + def test_collect_yaml_files_recurses_directories_and_dedupes(): with TemporaryDirectory() as tmp: root = Path(tmp) @@ -108,7 +125,918 @@ def test_collect_yaml_files_can_disable_default_ignores(): ) -def test_main_default_wcag_reports_failures(): +def test_collect_yaml_files_uses_docassemble_when_root_has_pyproject(): + with TemporaryDirectory() as tmp: + root = Path(tmp) + (root / "pyproject.toml").write_text( + '[project]\nname = "demo"\nversion = "0.1.0"\n', + encoding="utf-8", + ) + root_level_yaml = root / "visible.yml" + docassemble_yaml = ( + root / "docassemble" / "pkg" / "data" / "questions" / "test.yml" + ) + docassemble_yaml.parent.mkdir(parents=True) + + root_level_yaml.write_text("question: visible\n", encoding="utf-8") + docassemble_yaml.write_text("question: test\n", encoding="utf-8") + + collected = _collect_yaml_files([root]) + + assert [path.resolve() for path in collected] == [docassemble_yaml.resolve()] + + +def test_collect_yaml_files_reads_yaml_path_from_pyproject(): + with TemporaryDirectory() as tmp: + root = Path(tmp) + (root / "pyproject.toml").write_text( + '[project]\nname = "demo"\nversion = "0.1.0"\n\n' + "[tool.dayaml]\n" + 'yaml_path = "interviews"\n', + encoding="utf-8", + ) + configured_yaml = root / "interviews" / "test.yml" + default_yaml = root / "docassemble" / "ignored.yml" + configured_yaml.parent.mkdir(parents=True) + default_yaml.parent.mkdir(parents=True) + + configured_yaml.write_text("question: configured\n", encoding="utf-8") + default_yaml.write_text("question: default\n", encoding="utf-8") + + collected = _collect_yaml_files([root]) + + assert [path.resolve() for path in collected] == [configured_yaml.resolve()] + + +# --------------------------------------------------------------------------- +# CLI (main()) / process_file() integration tests +# --------------------------------------------------------------------------- + + +def test_cli_valid_file_exits_zero(): + with TemporaryDirectory() as tmp: + good = Path(tmp) / "good.yml" + good.write_text("---\nquestion: Hello\nfield: my_var\n", encoding="utf-8") + assert process_file(str(good)) == "ok" + + +def test_cli_no_files_found_exits_nonzero(): + with TemporaryDirectory() as tmp: + # Write a text file, not YAML — _collect_yaml_files skips it, main() returns 1 + txt = Path(tmp) / "readme.txt" + txt.write_text("hello\n", encoding="utf-8") + with patch("sys.argv", ["dayamlchecker", str(txt)]): + assert main() == 1 + + +def test_cli_main_defaults_to_current_directory_when_no_files_passed(monkeypatch): + with TemporaryDirectory() as tmp: + root = Path(tmp) + docassemble_dir = root / "docassemble" + docassemble_dir.mkdir() + good = docassemble_dir / "good.yml" + good.write_text("---\nquestion: Hello\nfield: my_var\n", encoding="utf-8") + + previous_cwd = Path.cwd() + monkeypatch.chdir(root) + try: + buf = io.StringIO() + with redirect_stdout(buf): + result = main([]) + finally: + monkeypatch.chdir(previous_cwd) + + assert result == 0 + output = buf.getvalue() + assert output.startswith(".\n") + assert "1 ok" in output + + +def test_cli_main_no_files_reads_pyproject_args_from_cwd(monkeypatch): + with TemporaryDirectory() as tmp: + root = Path(tmp) + (root / "pyproject.toml").write_text( + '[project]\nname = "demo"\nversion = "0.1.0"\n\n' + "[tool.dayaml]\n" + 'args = ["--no-url-check"]\n', + encoding="utf-8", + ) + interview = root / "docassemble" / "Demo" / "data" / "questions" / "test.yml" + _write_valid_question(interview) + + called = False + + def fake_run_url_check(**kwargs): + nonlocal called + called = True + return URLCheckResult(checked_url_count=0, ignored_url_count=0, issues=()) + + monkeypatch.setattr(yaml_structure, "run_url_check", fake_run_url_check) + previous_cwd = Path.cwd() + monkeypatch.chdir(root) + try: + assert main([]) == 0 + finally: + monkeypatch.chdir(previous_cwd) + + assert called is False + + +def test_cli_jinja_file_prints_ok_jinja(): + with TemporaryDirectory() as tmp: + jinja_file = Path(tmp) / "interview.yml" + jinja_file.write_text( + "# use jinja\n---\nquestion: Hello {{ user }}\n", encoding="utf-8" + ) + buf = io.StringIO() + with redirect_stdout(buf): + result = process_file(str(jinja_file)) + assert result == "ok" + assert "ok (jinja)" in buf.getvalue() + + +def test_cli_check_all_flag_includes_git_dirs(): + with TemporaryDirectory() as tmp: + root = Path(tmp) + git_dir = root / ".git" + git_dir.mkdir() + git_file = git_dir / "hidden.yml" + git_file.write_text("---\nquestion: git\nfield: x\n", encoding="utf-8") + # --check-all maps to include_default_ignores=False + collected = _collect_yaml_files([root], include_default_ignores=False) + assert git_file in collected + assert process_file(str(git_file)) == "ok" + + +def test_cli_jinja_file_default_mode_prints_ok_status(): + with TemporaryDirectory() as tmp: + jinja_file = Path(tmp) / "interview.yml" + jinja_file.write_text( + "# use jinja\n---\nquestion: Hello {{ user }}\n", encoding="utf-8" + ) + buf = io.StringIO() + with redirect_stdout(buf): + result = process_file(str(jinja_file)) + assert result == "ok" + output = buf.getvalue() + assert "ok (jinja)" in output + assert "interview.yml" in output + + +def test_cli_file_with_errors_reports_error_status(): + """process_file returns 'error' and prints an errors summary line.""" + with TemporaryDirectory() as tmp: + bad = Path(tmp) / "bad.yml" + bad.write_text("---\nnot_a_real_key: hello\n", encoding="utf-8") + buf = io.StringIO() + with redirect_stdout(buf): + result = process_file(str(bad)) + assert result == "error" + output = buf.getvalue() + assert re.search(r"errors \(\d+\):.*bad\.yml", output) + assert "[E301]" in output + + +def test_cli_file_with_promoted_errors_reports_error_status(): + with TemporaryDirectory() as tmp: + warning_file = Path(tmp) / "warning.yml" + warning_file.write_text( + "---\n" + "question: Hello\n" + "fields:\n" + " - Preferred salutation: preferred_salutation\n" + " - Follow up: follow_up\n" + " show if:\n" + ' code: preferred_salutation == "Ms."\n', + encoding="utf-8", + ) + buf = io.StringIO() + with redirect_stdout(buf): + result = process_file(str(warning_file)) + assert result == "error" + output = buf.getvalue() + assert re.search(r"errors \(\d+\):.*warning\.yml", output) + assert "[E410]" in output + + +def test_parse_ignore_codes_normalizes_comma_separated_codes(): + assert parse_ignore_codes(" e410, E301 ,, c101 ") == frozenset( + {"E410", "E301", "C101"} + ) + + +def test_cli_process_file_can_ignore_promoted_error_codes(): + with TemporaryDirectory() as tmp: + warning_file = Path(tmp) / "warning.yml" + warning_file.write_text( + "---\n" + "question: Hello\n" + "fields:\n" + " - Preferred salutation: preferred_salutation\n" + " - Follow up: follow_up\n" + " show if:\n" + ' code: preferred_salutation == "Ms."\n', + encoding="utf-8", + ) + buf = io.StringIO() + with redirect_stdout(buf): + result = process_file(str(warning_file), ignore_codes=frozenset({"E410"})) + assert result == "ok" + output = buf.getvalue() + assert "ok:" in output + assert "[E410]" not in output + assert "warnings (" not in output + + +def test_cli_file_with_conventions_reports_convention_status(): + with TemporaryDirectory() as tmp: + convention_file = Path(tmp) / "convention.yml" + convention_file.write_text( + "---\n" + "question: Total fruit\n" + "fields:\n" + " - Apples: number_apples\n" + " datatype: integer\n" + " - Oranges: number_oranges\n" + " datatype: integer\n" + "validation code: |\n" + " if number_apples + number_oranges != 10:\n" + " raise Exception('Bad total')\n", + encoding="utf-8", + ) + buf = io.StringIO() + with redirect_stdout(buf): + result = process_file(str(convention_file)) + assert result == "warning" + output = buf.getvalue() + assert re.search(r"conventions \(\d+\):.*convention\.yml", output) + assert "[C101]" in output + assert "warnings (" not in output + assert "errors (" not in output + + +def test_cli_file_with_warnings_reports_warning_status(): + with TemporaryDirectory() as tmp: + warning_file = Path(tmp) / "warning.yml" + warning_file.write_text( + "---\nquestion: Hello\nfield: user_name\n", encoding="utf-8" + ) + buf = io.StringIO() + warning = YAMLError( + err_str="Warning: heads up", + line_number=2, + file_name="warning.yml", + ) + + with patch( + "dayamlchecker.yaml_structure.find_errors_from_string", + return_value=[warning], + ): + with redirect_stdout(buf): + result = process_file(str(warning_file)) + + assert result == "warning" + output = buf.getvalue() + assert re.search(r"warnings \(1\):.*warning\.yml", output) + assert "Warning: heads up" in output + assert "conventions (" not in output + assert "errors (" not in output + + +def test_cli_main_exits_nonzero_when_any_file_has_errors(): + with TemporaryDirectory() as tmp: + bad = Path(tmp) / "bad.yml" + bad.write_text("---\nnot_a_real_key: hello\n", encoding="utf-8") + + with patch("sys.argv", ["dayamlchecker", str(bad)]): + assert main() == 1 + + +def test_cli_main_can_ignore_error_codes_from_flag(): + with TemporaryDirectory() as tmp: + bad = Path(tmp) / "bad.yml" + bad.write_text( + "---\nquestion: Hello\nquestion: Again\nfield: my_var\n", + encoding="utf-8", + ) + buf = io.StringIO() + + with redirect_stdout(buf): + with patch( + "sys.argv", ["dayamlchecker", "--ignore-codes", "e101", str(bad)] + ): + result = main() + + assert result == 0 + output = buf.getvalue() + assert output.startswith(".\n") + assert "[E101]" not in output + assert "0 errors" in output + + +def test_cli_main_exits_nonzero_when_file_has_only_promoted_errors(): + with TemporaryDirectory() as tmp: + warning_file = Path(tmp) / "warning.yml" + warning_file.write_text( + "---\n" + "question: Hello\n" + "fields:\n" + " - Preferred salutation: preferred_salutation\n" + " - Follow up: follow_up\n" + " show if:\n" + ' code: preferred_salutation == "Ms."\n', + encoding="utf-8", + ) + buf = io.StringIO() + + with redirect_stdout(buf): + with patch("sys.argv", ["dayamlchecker", str(warning_file)]): + result = main() + + assert result == 1 + output = buf.getvalue() + assert "1 errors" in output + assert "0 warnings" in output + + +def test_cli_main_reads_ignore_codes_from_parent_pyproject(): + with TemporaryDirectory() as tmp: + root = Path(tmp) + (root / "pyproject.toml").write_text( + '[project]\nname = "demo"\nversion = "0.1.0"\n\n' + "[tool.dayaml]\n" + 'ignore_codes = ["E410"]\n', + encoding="utf-8", + ) + warning_file = root / "docassemble" / "warning.yml" + warning_file.parent.mkdir(parents=True) + warning_file.write_text( + "---\n" + "question: Hello\n" + "fields:\n" + " - Preferred salutation: preferred_salutation\n" + " - Follow up: follow_up\n" + " show if:\n" + ' code: preferred_salutation == "Ms."\n', + encoding="utf-8", + ) + buf = io.StringIO() + + with redirect_stdout(buf): + result = main(["--no-url-check", str(warning_file.parent)]) + + assert result == 0 + output = buf.getvalue() + assert output.startswith(".\n") + assert "[E410]" not in output + assert "1 ok" in output + + +def test_cli_main_reads_raw_args_from_pyproject(monkeypatch): + with TemporaryDirectory() as tmp: + root = Path(tmp) + (root / "pyproject.toml").write_text( + '[project]\nname = "demo"\nversion = "0.1.0"\n\n' + "[tool.dayaml]\n" + 'args = ["--no-url-check"]\n', + encoding="utf-8", + ) + interview = root / "docassemble" / "Demo" / "data" / "questions" / "test.yml" + _write_valid_question(interview) + + called = False + + def fake_run_url_check(**kwargs): + nonlocal called + called = True + return URLCheckResult(checked_url_count=0, ignored_url_count=0, issues=()) + + monkeypatch.setattr(yaml_structure, "run_url_check", fake_run_url_check) + + assert yaml_structure.main([str(root)]) == 0 + assert called is False + + +def test_cli_args_override_pyproject_raw_args(monkeypatch): + with TemporaryDirectory() as tmp: + root = Path(tmp) + (root / "pyproject.toml").write_text( + '[project]\nname = "demo"\nversion = "0.1.0"\n\n' + "[tool.dayaml]\n" + 'args = ["--no-url-check"]\n', + encoding="utf-8", + ) + interview = root / "docassemble" / "Demo" / "data" / "questions" / "test.yml" + _write_valid_question(interview) + + called = False + + def fake_run_url_check(**kwargs): + nonlocal called + called = True + return URLCheckResult(checked_url_count=0, ignored_url_count=0, issues=()) + + monkeypatch.setattr(yaml_structure, "run_url_check", fake_run_url_check) + + assert yaml_structure.main(["--url-check", str(root)]) == 0 + assert called is True + + +def test_cli_jinja_file_with_bad_key_reports_errors(): + """Errors in the Jinja-rendered content must still be caught and reported. + + This is a companion to test_cli_jinja_file_default_mode_prints_ok_status: + it verifies that the Jinja pre-processing path (preprocess_jinja -> recursive + find_errors_from_string call) doesn't silently discard validation errors. + """ + with TemporaryDirectory() as tmp: + jinja_file = Path(tmp) / "bad_jinja.yml" + jinja_file.write_text( + "# use jinja\n---\nnot_a_real_key: hello\n", encoding="utf-8" + ) + buf = io.StringIO() + with redirect_stdout(buf): + result = process_file(str(jinja_file)) + assert result == "error" + output = buf.getvalue() + assert re.search(r"errors \(\d+\).*bad_jinja\.yml", output) + assert "[E301]" in output + + +def test_cli_process_file_skips_known_da_files(): + """process_file returns 'skipped' for known DA helper files like docstring.yml.""" + with TemporaryDirectory() as tmp: + skipped = Path(tmp) / "docstring.yml" + skipped.write_text( + "this is not valid yaml interview content\n", encoding="utf-8" + ) + buf = io.StringIO() + with redirect_stdout(buf): + result = process_file(str(skipped)) + assert result == "skipped" + assert "skipped" in buf.getvalue() + + +def test_cli_process_file_skipped_uses_line_reporter(): + with TemporaryDirectory() as tmp: + skipped = Path(tmp) / "docstring.yml" + skipped.write_text("ignored\n", encoding="utf-8") + + messages: list[str] = [] + result = process_file(str(skipped), line_reporter=messages.append) + + assert result == "skipped" + assert messages == [f"skipped: {skipped}"] + + +def test_cli_process_file_quiet_skips_no_output(): + """process_file with quiet=True suppresses output for skipped files.""" + with TemporaryDirectory() as tmp: + skipped = Path(tmp) / "docstring.yml" + skipped.write_text("ignored\n", encoding="utf-8") + buf = io.StringIO() + with redirect_stdout(buf): + result = process_file(str(skipped), quiet=True) + assert result == "skipped" + assert buf.getvalue() == "" + + +def test_cli_process_file_quiet_ok_no_output(): + """process_file with quiet=True suppresses output for ok files.""" + with TemporaryDirectory() as tmp: + good = Path(tmp) / "good.yml" + good.write_text("---\nquestion: Hello\nfield: my_var\n", encoding="utf-8") + buf = io.StringIO() + with redirect_stdout(buf): + result = process_file(str(good), quiet=True) + assert result == "ok" + assert buf.getvalue() == "" + + +def test_process_file_format_on_success_prints_reformatted_without_line_reporter(): + with TemporaryDirectory() as tmp: + interview = Path(tmp) / "format_me.yml" + _write_valid_code_interview(interview) + + buf = io.StringIO() + with redirect_stdout(buf): + result = process_file(str(interview), format_on_success=True) + + assert result == "ok" + assert ( + interview.read_text(encoding="utf-8") + == "mandatory: True\ncode: |\n x = 1\n" + ) + assert buf.getvalue() == f"reformatted: {interview}\n" + + +def test_process_file_format_on_success_leaves_already_formatted_file_unchanged(): + with TemporaryDirectory() as tmp: + interview = Path(tmp) / "already_formatted.yml" + _write_valid_code_interview(interview, code="x = 1") + + buf = io.StringIO() + with redirect_stdout(buf): + result = process_file(str(interview), format_on_success=True) + + assert result == "ok" + assert ( + interview.read_text(encoding="utf-8") + == "mandatory: True\ncode: |\n x = 1\n" + ) + assert buf.getvalue() == f"ok: {interview}\n" + + +def test_process_file_warning_uses_line_reporter(): + with TemporaryDirectory() as tmp: + warning_file = Path(tmp) / "warning.yml" + warning_file.write_text( + "---\nquestion: Hello\nfield: user_name\n", encoding="utf-8" + ) + messages: list[str] = [] + warning = YAMLError( + err_str="Warning: heads up", + line_number=2, + file_name="warning.yml", + ) + + with patch( + "dayamlchecker.yaml_structure.find_errors_from_string", + return_value=[warning], + ): + result = process_file(str(warning_file), line_reporter=messages.append) + + assert result == "warning" + assert messages == [f"warnings (1): {warning_file}"] + + +def test_process_file_format_on_success_warning_prints_reformatted_without_line_reporter(): + with TemporaryDirectory() as tmp: + convention_file = Path(tmp) / "convention.yml" + convention_file.write_text( + "---\n" + "question: Total fruit\n" + "fields:\n" + " - Apples: number_apples\n" + " datatype: integer\n" + " - Oranges: number_oranges\n" + " datatype: integer\n" + "validation code: |\n" + " if number_apples+number_oranges !=10:\n" + " raise Exception('Bad total')\n", + encoding="utf-8", + ) + + buf = io.StringIO() + with redirect_stdout(buf): + result = process_file(str(convention_file), format_on_success=True) + + assert result == "warning" + assert "if number_apples + number_oranges != 10:" in convention_file.read_text( + encoding="utf-8" + ) + output = buf.getvalue() + assert f"conventions (1): {convention_file}" in output + assert f"reformatted: {convention_file}" in output + + +def test_cli_main_no_summary_flag(): + """--no-summary flag suppresses the summary line.""" + with TemporaryDirectory() as tmp: + good = Path(tmp) / "good.yml" + good.write_text("---\nquestion: Hello\nfield: my_var\n", encoding="utf-8") + buf = io.StringIO() + with redirect_stdout(buf): + with patch("sys.argv", ["dayamlchecker", "--no-summary", str(good)]): + result = main() + assert result == 0 + assert "summary" not in buf.getvalue().lower() + + +def test_cli_main_quiet_flag(): + """--quiet flag suppresses all non-error output.""" + with TemporaryDirectory() as tmp: + good = Path(tmp) / "good.yml" + good.write_text("---\nquestion: Hello\nfield: my_var\n", encoding="utf-8") + buf = io.StringIO() + with redirect_stdout(buf): + with patch("sys.argv", ["dayamlchecker", "--quiet", str(good)]): + result = main() + assert result == 0 + assert buf.getvalue().strip() == "" + + +def test_main_format_on_success_reformats_ok_file_without_url_check(monkeypatch): + with TemporaryDirectory() as tmp: + interview = Path(tmp) / "format_me.yml" + _write_valid_code_interview(interview) + + called = False + + def fake_run_url_check(**kwargs): + nonlocal called + called = True + return URLCheckResult(checked_url_count=0, ignored_url_count=0, issues=()) + + monkeypatch.setattr(yaml_structure, "run_url_check", fake_run_url_check) + + buf = io.StringIO() + with redirect_stdout(buf): + result = main(["--format-on-success", "--no-url-check", str(interview)]) + + assert result == 0 + assert called is False + assert ( + interview.read_text(encoding="utf-8") + == "mandatory: True\ncode: |\n x = 1\n" + ) + output = buf.getvalue() + assert "reformatted:" in output + assert "format_me.yml" in output + assert "Summary: 1 ok" in output + + +def test_main_format_on_success_reformats_warning_file(): + with TemporaryDirectory() as tmp: + convention_file = Path(tmp) / "convention.yml" + convention_file.write_text( + "---\n" + "question: Total fruit\n" + "fields:\n" + " - Apples: number_apples\n" + " datatype: integer\n" + " - Oranges: number_oranges\n" + " datatype: integer\n" + "validation code: |\n" + " if number_apples+number_oranges !=10:\n" + " raise Exception('Bad total')\n", + encoding="utf-8", + ) + + buf = io.StringIO() + with redirect_stdout(buf): + result = main( + ["--format-on-success", "--no-url-check", str(convention_file)] + ) + + assert result == 0 + assert "if number_apples + number_oranges != 10:" in convention_file.read_text( + encoding="utf-8" + ) + output = buf.getvalue() + assert result == 0 + assert "conventions (1):" in output + assert "convention.yml" in output + assert "reformatted:" in output + assert "Summary: 0 ok, 1 warnings, 0 errors, 0 skipped" in output + + +def test_main_format_on_success_does_not_write_yaml_error_file(): + with TemporaryDirectory() as tmp: + interview = Path(tmp) / "bad.yml" + original = "mandatory: True\ncode: |\n x=1\nnot_a_real_key: hello\n" + interview.write_text(original, encoding="utf-8") + + buf = io.StringIO() + with redirect_stdout(buf): + result = main(["--format-on-success", "--no-url-check", str(interview)]) + + assert result == 1 + assert interview.read_text(encoding="utf-8") == original + output = buf.getvalue() + assert "errors (1):" in output + assert "bad.yml" in output + assert "reformatted:" not in output + + +def test_main_format_on_success_respects_ignore_codes(): + with TemporaryDirectory() as tmp: + interview = Path(tmp) / "ignored.yml" + interview.write_text( + "mandatory: True\ncode: |\n x=1\nnot_a_real_key: hello\n", + encoding="utf-8", + ) + + buf = io.StringIO() + with redirect_stdout(buf): + result = main( + [ + "--format-on-success", + "--no-url-check", + "--ignore-codes", + "E301", + str(interview), + ] + ) + + assert result == 0 + assert interview.read_text(encoding="utf-8").startswith( + "mandatory: True\ncode: |\n x = 1\n" + ) + output = buf.getvalue() + assert "reformatted:" in output + assert "ignored.yml" in output + assert "[E301]" not in output + + +def test_main_format_on_success_writes_before_url_checker_error(monkeypatch, capsys): + with TemporaryDirectory() as tmp: + root = Path(tmp) + interview = root / "docassemble" / "Demo" / "data" / "questions" / "test.yml" + _write_valid_code_interview(interview) + + def fake_run_url_check(**kwargs): + return URLCheckResult( + checked_url_count=1, + ignored_url_count=0, + issues=( + URLIssue( + severity="error", + category="broken", + source_kind="yaml", + url="https://example.invalid/question", + sources=("docassemble/Demo/data/questions/test.yml",), + status_code=404, + ), + ), + ) + + monkeypatch.setattr(yaml_structure, "run_url_check", fake_run_url_check) + + assert main(["--format-on-success", str(interview)]) == 1 + assert ( + interview.read_text(encoding="utf-8") + == "mandatory: True\ncode: |\n x = 1\n" + ) + + output = capsys.readouterr().out + assert "reformatted:" in output + assert "test.yml" in output + assert "url checker errors:" in output.lower() + + +def test_cli_main_summary_shows_counts(): + """Summary line shows counts for ok, errors, skipped.""" + with TemporaryDirectory() as tmp: + good = Path(tmp) / "good.yml" + good.write_text("---\nquestion: Hello\nfield: my_var\n", encoding="utf-8") + buf = io.StringIO() + with redirect_stdout(buf): + with patch("sys.argv", ["dayamlchecker", str(good)]): + main() + output = buf.getvalue() + assert "Summary:" in output + assert "1 ok" in output + + +def test_cli_main_summary_counts_skipped_files(): + with TemporaryDirectory() as tmp: + skipped = Path(tmp) / "good.yml" + skipped.write_text("---\nquestion: Hello\nfield: my_var\n", encoding="utf-8") + buf = io.StringIO() + with redirect_stdout(buf): + with patch( + "dayamlchecker.yaml_structure.process_file", return_value="skipped" + ): + with patch("sys.argv", ["dayamlchecker", str(skipped)]): + result = main() + + assert result == 0 + assert "1 skipped" in buf.getvalue() + + +def test_cli_main_summary_counts_warning_files(): + with TemporaryDirectory() as tmp: + warning_file = Path(tmp) / "warning.yml" + warning_file.write_text( + "---\nquestion: Hello\nfield: user_name\n", encoding="utf-8" + ) + buf = io.StringIO() + + with redirect_stdout(buf): + with patch( + "dayamlchecker.yaml_structure.process_file", return_value="warning" + ): + result = main(["--no-url-check", str(warning_file)]) + + assert result == 0 + assert "1 warnings" in buf.getvalue() + + +def test_cli_display_falls_back_to_absolute_path_when_not_under_base(): + with TemporaryDirectory() as base_tmp, TemporaryDirectory() as other_tmp: + base = Path(base_tmp) + outside = Path(other_tmp) / "outside.yml" + outside.write_text("---\nquestion: Hello\nfield: my_var\n", encoding="utf-8") + buf = io.StringIO() + + with redirect_stdout(buf): + with patch( + "dayamlchecker.yaml_structure._collect_yaml_files", + return_value=[outside], + ): + with patch("sys.argv", ["dayamlchecker", str(base)]): + result = main() + + assert result == 0 + assert buf.getvalue().startswith(".\n") + + +def test_cli_display_prefers_path_relative_to_cwd(monkeypatch): + with TemporaryDirectory() as tmp: + root = Path(tmp) + scan_dir = root / "docassemble" + interview = scan_dir / "WorkflowDocs" / "data" / "questions" / "test.yml" + _write_valid_question(interview) + buf = io.StringIO() + + previous_cwd = Path.cwd() + monkeypatch.chdir(root) + try: + with redirect_stdout(buf): + result = main(["--no-url-check", str(scan_dir)]) + finally: + monkeypatch.chdir(previous_cwd) + + assert result == 0 + assert buf.getvalue().startswith(".\n") + + +def test_cli_main_no_summary_still_ends_dot_line(): + with TemporaryDirectory() as tmp: + good = Path(tmp) / "good.yml" + good.write_text("---\nquestion: Hello\nfield: my_var\n", encoding="utf-8") + buf = io.StringIO() + + with redirect_stdout(buf): + result = main(["--no-summary", "--no-url-check", str(good)]) + + assert result == 0 + assert buf.getvalue() == ".\n" + + +def test_cli_display_path_used_in_output(): + """process_file uses display_path when provided.""" + with TemporaryDirectory() as tmp: + good = Path(tmp) / "good.yml" + good.write_text("---\nquestion: Hello\nfield: my_var\n", encoding="utf-8") + buf = io.StringIO() + with redirect_stdout(buf): + result = process_file(str(good), display_path="custom/path.yml") + assert result == "ok" + assert "custom/path.yml" in buf.getvalue() + + +def test_cli_default_omits_real_error_prefix(): + """process_file omits the REAL ERROR prefix by default on non-experimental errors.""" + with TemporaryDirectory() as tmp: + bad = Path(tmp) / "bad.yml" + bad.write_text("---\nnot_a_real_key: hello\n", encoding="utf-8") + buf = io.StringIO() + with redirect_stdout(buf): + result = process_file(str(bad)) + assert result == "error" + output = buf.getvalue() + assert "[E301]" in output + assert "REAL ERROR" not in output + + +def test_cli_show_experimental_flag_via_main(): + """--show-experimental adds the REAL ERROR prefix through the main() entry point.""" + with TemporaryDirectory() as tmp: + bad = Path(tmp) / "bad.yml" + bad.write_text("---\nnot_a_real_key: hello\n", encoding="utf-8") + buf = io.StringIO() + with redirect_stdout(buf): + with patch("sys.argv", ["dayamlchecker", "--show-experimental", str(bad)]): + main() + output = buf.getvalue() + assert "[E301]" in output + assert "REAL ERROR" in output + + +def test_cli_no_show_experimental_flag_via_main(): + """--no-show-experimental explicitly suppresses the REAL ERROR prefix.""" + with TemporaryDirectory() as tmp: + bad = Path(tmp) / "bad.yml" + bad.write_text("---\nnot_a_real_key: hello\n", encoding="utf-8") + buf = io.StringIO() + with redirect_stdout(buf): + with patch( + "sys.argv", ["dayamlchecker", "--no-show-experimental", str(bad)] + ): + main() + output = buf.getvalue() + assert "[E301]" in output + assert "REAL ERROR" not in output + + +def test_package_main_aliases_yaml_structure_main(): + """The package entrypoint should directly expose the checker CLI main.""" + assert package_main is main + + +def test_main_default_wcag_reports_errors_and_fails(): with TemporaryDirectory() as tmp: root = Path(tmp) interview = root / "accessibility.yml" @@ -123,11 +1051,12 @@ def test_main_default_wcag_reports_failures(): output = stdout.getvalue().lower() assert exit_code == 1 - assert "found 1 errors" in output + assert "errors (1)" in output + assert "[e505]" in output assert "accessibility: markdown image" in output -def test_main_wcag_info_only_does_not_fail(): +def test_main_wcag_accessibility_error_fails(): with TemporaryDirectory() as tmp: root = Path(tmp) interview = root / "tagged-pdf-warning.yml" @@ -143,11 +1072,13 @@ def test_main_wcag_info_only_does_not_fail(): exit_code = main([str(interview)]) output = stdout.getvalue().lower() - assert exit_code == 0 - assert "info: accessibility: docx attachment detected" in output + assert exit_code == 1 + assert "errors (1)" in output + assert "[e503]" in output + assert "accessibility: docx attachment detected" in output -def test_main_warning_still_fails_outside_info_only_mode(): +def test_main_promoted_error_only_file_fails(): with TemporaryDirectory() as tmp: root = Path(tmp) interview = root / "warning.yml" @@ -173,7 +1104,7 @@ def test_main_warning_still_fails_outside_info_only_mode(): output = stdout.getvalue().lower() assert exit_code == 1 - assert "warning:" in output + assert "errors (" in output def test_main_combobox_widget_check_disabled_by_default(): @@ -191,10 +1122,10 @@ def test_main_combobox_widget_check_disabled_by_default(): output = stdout.getvalue().lower() assert exit_code == 0 - assert "combobox" not in output + assert "screen uses `combobox`" not in output -def test_main_can_enable_combobox_widget_check(): +def test_main_can_enable_combobox_widget_error(): with TemporaryDirectory() as tmp: root = Path(tmp) interview = root / "combobox.yml" @@ -216,6 +1147,8 @@ def test_main_can_enable_combobox_widget_check(): output = stdout.getvalue().lower() assert exit_code == 1 + assert "errors (1)" in output + assert "[e501]" in output assert "screen uses `combobox`" in output @@ -252,9 +1185,9 @@ def fake_run_url_check(**kwargs): ) assert yaml_structure.main() == 0 - assert captured["root"] == root + assert captured["root"] == root.resolve() assert captured["question_files"] == [interview] - assert captured["package_dirs"] == [root / "docassemble" / "Demo"] + assert captured["package_dirs"] == [(root / "docassemble" / "Demo").resolve()] assert captured["timeout"] == 10 assert captured["check_documents"] is True assert captured["ignore_urls"] == set() @@ -360,7 +1293,7 @@ def fake_run_url_check(**kwargs): ) assert yaml_structure.main() == 0 - assert captured["root"] == root + assert captured["root"] == root.resolve() assert captured["timeout"] == 3 assert captured["check_documents"] is False assert captured["ignore_urls"] == {"https://ignore.example/path"}