import sys import tempfile import unittest from pathlib import Path ROOT = Path(__file__).resolve().parents[1] SRC = ROOT / "src" sys.path.insert(0, str(SRC)) from iftypeset.linting import lint_paths, manual_checklist # noqa: E402 from iftypeset.spec_loader import load_spec # noqa: E402 class LintingTests(unittest.TestCase): def test_lint_diagnostics_and_fixes(self) -> None: with tempfile.TemporaryDirectory() as tmp: path = Path(tmp) / "doc.md" path.write_text( "# Title\n\nLine with trailing spaces \n\nSee [link] ( https://example.com )\n", encoding="utf-8", ) result = lint_paths( [path], profile_id="web_pdf", fix=True, fix_mode="suggest", degraded_ok=True, fail_on="warn", ) codes = {d.get("code") for d in result.report.get("diagnostics", [])} self.assertIn("WS.TRAILING", codes) self.assertIn("LINK.SPACING", codes) self.assertTrue(result.report.get("fixes")) def test_must_rules_emit_and_rewrite_respects_code(self) -> None: with tempfile.TemporaryDirectory() as tmp: path = Path(tmp) / "doc.md" path.write_text( "# Title\n\n" 'He said "Hello". Next sentence.\n\n' "Note:This needs a space.\n\n" "Em dash -- like this.\n\n" "A decimal .5 should be written with a leading zero.\n\n" "The 1990's were loud.\n\n" "Inline `code:NoSpace` and `x -- y` should stay.\n\n" "```python\n" "x -- y\n" "```\n\n" "See file://etc/passwd and javascript:alert(1) and http://localhost/test.\n\n" "This has a footnote.[^2] And another.[^1]\n\n" "[^1]: One.\n" "[^2]: Two.\n", encoding="utf-8", ) result = lint_paths( [path], profile_id="web_pdf", fix=True, fix_mode="rewrite", degraded_ok=True, fail_on="must", ) diagnostics = result.report.get("diagnostics", []) codes = {d.get("code") for d in diagnostics} rule_ids = {d.get("rule_id") for d in diagnostics} self.assertIn("TYPO.SENTENCE_SPACE", codes) self.assertIn("PUNCT.COLON_SPACE", codes) self.assertIn("PUNCT.EM_DASH", codes) self.assertIn("PUNCT.SMART_QUOTES", codes) self.assertIn("NUM.LEADING_ZERO", codes) self.assertIn("NUM.DECADE_PLURAL", codes) self.assertIn("LINK.FILE_URI", codes) self.assertIn("LINK.JAVASCRIPT_URI", codes) self.assertIn("LINK.LOCALHOST", codes) self.assertIn("CITATION.NOTE_SEQUENCE", codes) self.assertIn("BRING.TYPOGRAPHY.SPACING.SENTENCE_SPACE.SINGLE", rule_ids) self.assertIn("CMOS.PUNCTUATION.COLONS.SPACE_AFTER", rule_ids) self.assertIn("CMOS.PUNCTUATION.DASHES.EM_DASH.USE_WITHOUT_SPACES_US", rule_ids) self.assertIn("CMOS.PUNCTUATION.QUOTES.SMART_QUOTES", rule_ids) self.assertIn("CMOS.NUMBERS.DECIMALS.LEADING_ZERO", rule_ids) self.assertIn("CMOS.NUMBERS.PLURALS.DECADE.NO_APOSTROPHE", rule_ids) self.assertIn("HOUSE.LINKS.DISALLOW.FILE_URIS", rule_ids) self.assertIn("HOUSE.LINKS.DISALLOW.JAVASCRIPT_URIS", rule_ids) self.assertIn("HOUSE.LINKS.URLS.DISALLOW.LOCALHOST", rule_ids) self.assertIn("CMOS.CITATIONS.NOTES.NOTE_MARKERS.SEQUENCE_CONTINUOUS", rule_ids) fixed = result.fixed_outputs.get(str(path)) self.assertIsNotNone(fixed) assert fixed is not None self.assertIn("Note: This needs a space.", fixed) self.assertIn("Em dash—like this.", fixed) self.assertIn("A decimal 0.5 should be written with a leading zero.", fixed) self.assertIn("The 1990s were loud.", fixed) self.assertIn("Inline `code:NoSpace` and `x -- y` should stay.", fixed) self.assertIn("```python\nx -- y\n```", fixed) self.assertNotIn('"', fixed) def test_alt_text_quality_suggests_description(self) -> None: with tempfile.TemporaryDirectory() as tmp: path = Path(tmp) / "doc.md" path.write_text("# A11y\n\n![image](a.png)\n", encoding="utf-8") result = lint_paths( [path], profile_id="web_pdf", fix=False, degraded_ok=True, fail_on="warn", ) diagnostics = result.report.get("diagnostics", []) self.assertTrue(any(d.get("code") == "A11Y.IMG_ALT_QUALITY" for d in diagnostics)) def test_citation_notes_lints(self) -> None: with tempfile.TemporaryDirectory() as tmp: path = Path(tmp) / "doc.md" path.write_text( "# Title\n\n" "Bracketed numeric [1] appears in prose.\n\n" "## Heading with note[^1] extra\n\n" "See note.[^1]\n\n" "[^1]: Ibid., 12.\n", encoding="utf-8", ) result = lint_paths( [path], profile_id="web_pdf", fix=False, degraded_ok=True, fail_on="warn", ) diagnostics = result.report.get("diagnostics", []) codes = {d.get("code") for d in diagnostics} self.assertIn("CITATION.BRACKETED_NUMERIC", codes) self.assertIn("CITATION.HEADING_NOTE_PLACEMENT", codes) self.assertIn("CITATION.IBID", codes) self.assertIn("CITATION.BIBLIOGRAPHY_REQUIRED", codes) def test_citation_missing_footnote_definition(self) -> None: with tempfile.TemporaryDirectory() as tmp: path = Path(tmp) / "doc.md" path.write_text("# Title\n\nFootnote marker.[^1]\n", encoding="utf-8") result = lint_paths( [path], profile_id="web_pdf", fix=False, degraded_ok=True, fail_on="warn", ) diagnostics = result.report.get("diagnostics", []) code_rules = {(d.get("code"), d.get("rule_id")) for d in diagnostics} self.assertIn( ("CITATION.NOTE_DEFINITION", "HOUSE.CITATIONS.NOTES.DEFINITIONS.REQUIRED"), code_rules, ) def test_editorial_draft_markers_emit(self) -> None: with tempfile.TemporaryDirectory() as tmp: path = Path(tmp) / "doc.md" path.write_text( "# Title\n\nTODO: replace.\n\nThis is TBD.\n\nLorem ipsum dolor sit amet.\n", encoding="utf-8", ) result = lint_paths( [path], profile_id="web_pdf", fix=False, degraded_ok=True, fail_on="warn", ) diagnostics = result.report.get("diagnostics", []) codes = {d.get("code") for d in diagnostics} rule_ids = {d.get("rule_id") for d in diagnostics} self.assertIn("EDITORIAL.TODO_FIXME", codes) self.assertIn("EDITORIAL.TBD_TKTK", codes) self.assertIn("EDITORIAL.LOREM_IPSUM", codes) self.assertIn("HOUSE.EDITORIAL.PLACEHOLDERS.NO_TODO_FIXME", rule_ids) self.assertIn("HOUSE.EDITORIAL.PLACEHOLDERS.NO_TBD_TKTK", rule_ids) self.assertIn("HOUSE.EDITORIAL.PLACEHOLDERS.NO_LOREM_IPSUM", rule_ids) def test_editorial_structure_and_heading_rules_emit(self) -> None: with tempfile.TemporaryDirectory() as tmp: path = Path(tmp) / "doc.md" path.write_text( "Intro paragraph one.\n\n" "Intro paragraph two.\n\n" "- bullet one\n" "- bullet two\n\n" "## ALL CAPS HEADING\n\n" "# Late Title\n\n" "## Trailing colon:\n\n" "- only one item\n\n" "# Second Title\n", encoding="utf-8", ) result = lint_paths( [path], profile_id="web_pdf", fix=False, degraded_ok=True, fail_on="warn", ) diagnostics = result.report.get("diagnostics", []) codes = {d.get("code") for d in diagnostics} rule_ids = {d.get("rule_id") for d in diagnostics} self.assertIn("EDITORIAL.HEADING_TRAILING_PUNCT", codes) self.assertIn("EDITORIAL.LIST_SINGLE_ITEM", codes) self.assertIn("EDITORIAL.HEADING_ALL_CAPS", codes) self.assertIn("EDITORIAL.TITLE.MULTIPLE", codes) self.assertIn("EDITORIAL.TITLE.LATE", codes) self.assertIn("HOUSE.EDITORIAL.HEADINGS.NO_TRAILING_PUNCTUATION", rule_ids) self.assertIn("HOUSE.EDITORIAL.HEADINGS.AVOID_ALL_CAPS", rule_ids) self.assertIn("HOUSE.EDITORIAL.LISTS.AVOID_SINGLE_ITEM", rule_ids) self.assertIn("HOUSE.EDITORIAL.STRUCTURE.H1_TITLE.SINGLE", rule_ids) self.assertIn("HOUSE.EDITORIAL.STRUCTURE.H1_TITLE.EARLY", rule_ids) def test_doi_note_placement_and_ordinals(self) -> None: with tempfile.TemporaryDirectory() as tmp: path = Path(tmp) / "doc.md" path.write_text( "# Title\n\n" "Bare DOI 10.1000/182 should be linked.\n\n" "The 11st example is wrong.\n\n" "Correct note.[^1]\n" "Wrong note[^2].\n\n" "[^1]: One.\n" "[^2]: Two.\n", encoding="utf-8", ) result = lint_paths( [path], profile_id="web_pdf", fix=False, degraded_ok=True, fail_on="warn", ) diagnostics = result.report.get("diagnostics", []) codes = {d.get("code") for d in diagnostics} rule_ids = {d.get("rule_id") for d in diagnostics} self.assertIn("CITATION.DOI_PREFERRED", codes) self.assertIn("NUM.ORDINAL_SUFFIX", codes) self.assertIn("CITATION.NOTE_PLACEMENT", codes) self.assertIn("CMOS.CITATIONS.DOI.PREFERRED_OVER_URL", rule_ids) self.assertIn("CMOS.NUMBERS.ORDINALS.SUFFIX.CORRECT", rule_ids) self.assertIn("CMOS.CITATIONS.NOTES.NOTE_MARKERS.PLACEMENT_AFTER_PUNCT", rule_ids) def test_author_date_year_only_and_reference_year(self) -> None: with tempfile.TemporaryDirectory() as tmp: path = Path(tmp) / "doc.md" path.write_text( "# Title\n\n" "Smith (2020) established the baseline.\n\n" "See (2021) for a later variant.\n\n" "## References\n\n" "- Smith, John. Title of Work.\n" "- Doe, Jane. 2020. Another Title.\n", encoding="utf-8", ) result = lint_paths( [path], profile_id="web_pdf", fix=False, degraded_ok=True, fail_on="warn", ) diagnostics = result.report.get("diagnostics", []) codes = {d.get("code") for d in diagnostics} rule_ids = {d.get("rule_id") for d in diagnostics} self.assertIn("CITATION.AUTHOR_DATE.PAREN_YEAR_ONLY", codes) self.assertIn("CITATION.AUTHOR_DATE.MISSING_YEAR", codes) self.assertIn("HOUSE.CITATIONS.AUTHOR_DATE.PAREN_YEAR_ONLY", rule_ids) self.assertIn("HOUSE.CITATIONS.AUTHOR_DATE.REFLIST.YEAR_REQUIRED", rule_ids) def test_author_date_requires_reference_list(self) -> None: with tempfile.TemporaryDirectory() as tmp: path = Path(tmp) / "doc.md" path.write_text( "# Title\n\n" "Smith (2020) established the baseline.\n\n", encoding="utf-8", ) result = lint_paths( [path], profile_id="web_pdf", fix=False, degraded_ok=True, fail_on="warn", ) diagnostics = result.report.get("diagnostics", []) codes = {d.get("code") for d in diagnostics} rule_ids = {d.get("rule_id") for d in diagnostics} self.assertIn("CITATION.AUTHOR_DATE.REFLIST_REQUIRED", codes) self.assertIn("HOUSE.CITATIONS.AUTHOR_DATE.REFLIST.REQUIRED", rule_ids) def test_link_hazards_emit(self) -> None: with tempfile.TemporaryDirectory() as tmp: path = Path(tmp) / "doc.md" path.write_text( "# Title\n\n" "See [proto](//example.com/path).\n\n" "See [frag](https://example.com/page#:~:text=alpha).\n\n" "See [signed](https://example.com/file?X-Amz-Signature=abc&X-Amz-Expires=3600).\n\n" "See [short](https://bit.ly/abc).\n\n" "See [ip](http://192.168.1.10/path).\n", encoding="utf-8", ) result = lint_paths( [path], profile_id="web_pdf", fix=False, degraded_ok=True, fail_on="warn", ) diagnostics = result.report.get("diagnostics", []) codes = {d.get("code") for d in diagnostics} rule_ids = {d.get("rule_id") for d in diagnostics} self.assertIn("LINK.PROTOCOL_RELATIVE", codes) self.assertIn("LINK.TEXT_FRAGMENT", codes) self.assertIn("LINK.EXPIRING_SIGNED", codes) self.assertIn("LINK.SHORTENER", codes) self.assertIn("LINK.IP_LITERAL", codes) self.assertIn("HOUSE.LINKS.URLS.NO_PROTOCOL_RELATIVE", rule_ids) self.assertIn("HOUSE.LINKS.URLS.AVOID.TEXT_FRAGMENTS", rule_ids) self.assertIn("HOUSE.LINKS.URLS.AVOID.EXPIRING_SIGNED", rule_ids) self.assertIn("HOUSE.ACCESSIBILITY.LINKS.AVOID_SHORTENERS", rule_ids) self.assertIn("HOUSE.LINKS.URLS.AVOID.IP_LITERALS", rule_ids) def test_link_auto_checks_emit(self) -> None: with tempfile.TemporaryDirectory() as tmp: path = Path(tmp) / "doc.md" path.write_text( "# Title\n\n" "See [stage](https://staging.example.com/path).\n\n" "See [internal](http://internal.local/path).\n\n" "See [ftp](ftp://example.com/file).\n\n" "See [track](https://example.com/page?utm_source=x).\n\n" "See [session](https://example.com/page?session=abc).\n\n" "See [invite](https://example.com/invite?token=abc1234567890).\n\n" "See [reset](https://example.com/reset?token=abc).\n\n" "See [login](https://example.com/login).\n\n" "See [creds](https://user:pass@example.com/).\n\n" "See [private](http://10.0.0.1/path).\n", encoding="utf-8", ) result = lint_paths( [path], profile_id="web_pdf", fix=False, degraded_ok=True, fail_on="warn", ) diagnostics = result.report.get("diagnostics", []) codes = {d.get("code") for d in diagnostics} self.assertIn("LINK.STAGING_HOST", codes) self.assertIn("LINK.INTERNAL_HOST", codes) self.assertIn("LINK.FTP", codes) self.assertIn("LINK.TRACKING_PARAMS", codes) self.assertIn("LINK.SESSION_PARAMS", codes) self.assertIn("LINK.INVITE", codes) self.assertIn("LINK.PASSWORD_RESET", codes) self.assertIn("LINK.LOGIN", codes) self.assertIn("LINK.CREDENTIALS", codes) self.assertIn("LINK.PRIVATE_IP", codes) def test_link_auto_phase2_emit(self) -> None: with tempfile.TemporaryDirectory() as tmp: path = Path(tmp) / "doc.md" path.write_text( "# Title\n\n" "See [Report](https://example.com/files/report.pdf).\n\n" "This pre[fix](https://example.com)ed sentence.\n\n" "See [This link label is far too long for running text in the main body](https://example.com/long).\n\n" "Download [Raw dump](https://example.com/static/pack/abc.md).\n", encoding="utf-8", ) result = lint_paths( [path], profile_id="web_pdf", fix=False, degraded_ok=True, fail_on="warn", ) diagnostics = result.report.get("diagnostics", []) codes = {d.get("code") for d in diagnostics} rule_ids = {d.get("rule_id") for d in diagnostics} self.assertIn("LINK.FILETYPE_LABEL", codes) self.assertIn("LINK.PARTIAL_WORD", codes) self.assertIn("LINK.LABEL_OVERLONG", codes) self.assertIn("LINK.HTML_FALLBACK", codes) self.assertIn("HOUSE.LINKS.AUTO.FILETYPE_LABEL.REQUIRED", rule_ids) self.assertIn("HOUSE.LINKS.AUTO.PARTIAL_WORD.AVOID", rule_ids) self.assertIn("HOUSE.LINKS.AUTO.LABEL_LENGTH.AVOID_OVERLONG", rule_ids) self.assertIn("HOUSE.LINKS.AUTO.HTML_FALLBACK.REQUIRED", rule_ids) def test_ignore_directive_suppresses_rule(self) -> None: with tempfile.TemporaryDirectory() as tmp: path = Path(tmp) / "doc.md" path.write_text( "\n" "This sentence has an unbalanced (parenthesis.\n", encoding="utf-8", ) result = lint_paths( [path], profile_id="web_pdf", fix=False, degraded_ok=True, fail_on="warn", ) codes = {d.get("code") for d in result.report.get("diagnostics", [])} self.assertNotIn("PUNCT.BALANCE", codes) def test_front_matter_ignore_suppresses_rule(self) -> None: with tempfile.TemporaryDirectory() as tmp: path = Path(tmp) / "doc.md" path.write_text( "---\n" "iftypeset_ignore:\n" " - CMOS.PUNCTUATION.PARENS_BRACKETS.BALANCE\n" "---\n" "This sentence has an unbalanced (parenthesis.\n", encoding="utf-8", ) result = lint_paths( [path], profile_id="web_pdf", fix=False, degraded_ok=True, fail_on="warn", ) codes = {d.get("code") for d in result.report.get("diagnostics", [])} self.assertNotIn("PUNCT.BALANCE", codes) def test_reference_section_allows_bare_urls(self) -> None: with tempfile.TemporaryDirectory() as tmp: path = Path(tmp) / "doc.md" path.write_text( "# Title\n" "See http://example.com/body.\n\n" "## References\n" "http://example.com/ref\n", encoding="utf-8", ) result = lint_paths( [path], profile_id="web_pdf", fix=False, degraded_ok=True, fail_on="warn", ) diagnostics = result.report.get("diagnostics", []) bare_url_lines = [d.get("line") for d in diagnostics if d.get("code") == "LINK.BARE_URL"] self.assertEqual(bare_url_lines, [2]) def test_enumeration_parenthesis_not_balanced(self) -> None: with tempfile.TemporaryDirectory() as tmp: path = Path(tmp) / "doc.md" path.write_text( "# Title\n\n## 1) Overview\n\n## Appendix A) Data\n\nThis is fine.\n", encoding="utf-8", ) result = lint_paths( [path], profile_id="web_pdf", fix=False, degraded_ok=True, fail_on="warn", ) codes = {d.get("code") for d in result.report.get("diagnostics", [])} self.assertNotIn("PUNCT.BALANCE", codes) def test_lint_fixed_rewrites_diagnostics(self) -> None: with tempfile.TemporaryDirectory() as tmp: path = Path(tmp) / "doc.md" path.write_text( "# Title\n\n" 'He said "Hello". Next sentence.\n\n' "A decimal .5 should be fixed.\n", encoding="utf-8", ) result = lint_paths( [path], profile_id="web_pdf", fix=True, fix_mode="rewrite", lint_fixed=True, degraded_ok=True, fail_on="warn", ) codes = {d.get("code") for d in result.report.get("diagnostics", [])} self.assertNotIn("PUNCT.SMART_QUOTES", codes) self.assertNotIn("TYPO.SENTENCE_SPACE", codes) self.assertNotIn("NUM.LEADING_ZERO", codes) def test_typography_emphasis_lints(self) -> None: with tempfile.TemporaryDirectory() as tmp: path = Path(tmp) / "doc.md" path.write_text( "# Title\n\n" "This has **BOLD TEXT THAT GOES ON FOR MANY WORDS TO TRIGGER A LONG BOLD WARNING**.\n\n" "This has *italics that stretch across many words and should be flagged for length in prose and reading comfort in long spans*.\n\n" "This has ***bold italics that are also long enough to trigger a warning***.\n\n" "This has **ALL CAPS TEXT** in bold.\n", encoding="utf-8", ) result = lint_paths( [path], profile_id="web_pdf", fix=False, degraded_ok=True, fail_on="warn", ) diagnostics = result.report.get("diagnostics", []) codes = {d.get("code") for d in diagnostics} self.assertIn("TYPO.BOLD_LONG", codes) self.assertIn("TYPO.ITALIC_LONG", codes) self.assertIn("TYPO.BOLD_ITALIC_LONG", codes) self.assertIn("TYPO.BOLD_ALL_CAPS", codes) def test_layout_spacing_and_empty_sections(self) -> None: with tempfile.TemporaryDirectory() as tmp: path = Path(tmp) / "doc.md" path.write_text( "# Title\n\n" "First paragraph.\n\n\n\n" "Second paragraph.\n\n" "## Empty Section\n\n" "## Next Section\n\n" "Body text.\n", encoding="utf-8", ) result = lint_paths( [path], profile_id="web_pdf", fix=False, degraded_ok=True, fail_on="warn", ) diagnostics = result.report.get("diagnostics", []) codes = {d.get("code") for d in diagnostics} self.assertIn("LAYOUT.EXTRA_BLANK_LINES", codes) self.assertIn("LAYOUT.EMPTY_SECTION", codes) def test_accessibility_link_text_checks(self) -> None: with tempfile.TemporaryDirectory() as tmp: path = Path(tmp) / "doc.md" path.write_text( "# Title\n\n" "See [Docs](https://a.example.com) and [Docs](https://b.example.com).\n\n" "See [!!](https://example.com).\n", encoding="utf-8", ) result = lint_paths( [path], profile_id="web_pdf", fix=False, degraded_ok=True, fail_on="warn", ) diagnostics = result.report.get("diagnostics", []) codes = {d.get("code") for d in diagnostics} self.assertIn("A11Y.LINK_TEXT.DUPLICATE_LABELS", codes) self.assertIn("A11Y.LINK_TEXT.SYMBOL_ONLY", codes) def test_links_angle_wrappers_and_trailing_punct(self) -> None: with tempfile.TemporaryDirectory() as tmp: path = Path(tmp) / "doc.md" path.write_text( "# Title\n\n" "\n\n" "https://example.com.\n", encoding="utf-8", ) result = lint_paths( [path], profile_id="web_pdf", fix=False, degraded_ok=True, fail_on="warn", ) diagnostics = result.report.get("diagnostics", []) codes = {d.get("code") for d in diagnostics} self.assertIn("LINK.ANGLE_WRAPPER", codes) self.assertIn("LINK.URL_ONLY_TRAILING_PUNCT", codes) def test_i18n_mixed_decimal_and_quotes(self) -> None: with tempfile.TemporaryDirectory() as tmp: path = Path(tmp) / "doc.md" path.write_text( "# Title\n\n" "He said \u201cHello\u201d and left.\n\n" "Elle a dit \u00abbonjour\u00bb et a quitt\u00e9.\n\n" "The value was 1.5 in one system and 1,5 in another.\n", encoding="utf-8", ) result = lint_paths( [path], profile_id="web_pdf", fix=False, degraded_ok=True, fail_on="warn", ) diagnostics = result.report.get("diagnostics", []) codes = {d.get("code") for d in diagnostics} rule_ids = {d.get("rule_id") for d in diagnostics} self.assertIn("I18N.MIXED_DECIMAL_SEPARATORS", codes) self.assertIn("I18N.MIXED_QUOTE_STYLES", codes) self.assertIn("HOUSE.I18N.NUMBERS.MIXED_DECIMAL_SEPARATORS", rule_ids) self.assertIn("HOUSE.I18N.QUOTES.MIXED_STYLES", rule_ids) def test_manual_checklist_emits_rules(self) -> None: spec = load_spec(ROOT / "spec") items = manual_checklist(spec) self.assertTrue(items) ids = {item.get("id") for item in items} self.assertIn("CMOS.NUMBERS.SPELLING.ONE_TO_ONE_HUNDRED.DEFAULT", ids) self.assertIn("HOUSE.EDITORIAL.STRUCTURE.SUMMARY.EARLY", ids) self.assertNotIn("HOUSE.LINKS.URLS.AVOID.INTERNAL_HOSTNAMES", ids) self.assertNotIn("HOUSE.LINKS.URLS.NO_EMBEDDED_CREDENTIALS", ids) self.assertNotIn("HOUSE.LINKS.URLS.AVOID.PASSWORD_RESET", ids) if __name__ == "__main__": unittest.main()