diff --git a/Chicago_Style_18_Notes-Bibliography.docx b/Chicago_Style_18_Notes-Bibliography.docx new file mode 100644 index 0000000000..97d1b17445 Binary files /dev/null and b/Chicago_Style_18_Notes-Bibliography.docx differ diff --git a/Chicago_Style_Author-Date__2025.docx b/Chicago_Style_Author-Date__2025.docx new file mode 100644 index 0000000000..3d70c2a366 Binary files /dev/null and b/Chicago_Style_Author-Date__2025.docx differ diff --git a/Robert Bringhurst – The Elements of Typographic Style.pdf b/Robert Bringhurst – The Elements of Typographic Style.pdf new file mode 100644 index 0000000000..aaaa68d391 Binary files /dev/null and b/Robert Bringhurst – The Elements of Typographic Style.pdf differ diff --git a/pub-style-skeleton.txt b/pub-style-skeleton.txt new file mode 100644 index 0000000000..e15f7c1600 --- /dev/null +++ b/pub-style-skeleton.txt @@ -0,0 +1,1580 @@ +--- FILE: spec/schema/rule.schema.json --- +{ +"$schema": "[https://json-schema.org/draft/2020-12/schema](https://json-schema.org/draft/2020-12/schema)", +"$id": "[https://example.invalid/pubstyle/spec/schema/rule.schema.json](https://example.invalid/pubstyle/spec/schema/rule.schema.json)", +"title": "Publication-quality rule record", +"type": "object", +"additionalProperties": false, +"required": [ +"id", +"title", +"source_refs", +"category", +"severity", +"applies_to", +"rule_text", +"rationale", +"enforcement", +"autofix", +"autofix_notes", +"tags", +"keywords", +"dependencies", +"exceptions", +"status" +], +"properties": { +"id": { +"type": "string", +"description": "Stable rule identifier. Prefix must be one of CMOS, BRING, HOUSE.", +"minLength": 6, +"maxLength": 120, +"pattern": "^(CMOS|BRING|HOUSE)\.[A-Z0-9_]+(?:\.[A-Z0-9_]+)*$" +}, +"title": { +"type": "string", +"description": "Short human-readable rule title.", +"minLength": 4, +"maxLength": 160 +}, +"source_refs": { +"type": "array", +"description": "Pointers back to sources. Must be pointers, not quotes. Prefer: "CMOS18 §X.Y pN" / "BRING §X.Y pN" / "HOUSE §X.Y pN".", +"minItems": 1, +"items": { +"type": "string", +"minLength": 8, +"maxLength": 120, +"pattern": "^(CMOS18|BRING|HOUSE)\s§[0-9A-Za-z][0-9A-Za-z.\-]*\s p[0-9ivxlcdmIVXLCDM]+(?:-[0-9ivxlcdmIVXLCDM]+)?(?:\s\(scan p[0-9]+\))?$" +} +}, +"category": { +"type": "string", +"description": "Primary taxonomy bucket.", +"enum": [ +"editorial", +"typography", +"layout", +"headings", +"citations", +"numbers", +"punctuation", +"abbreviations", +"links", +"tables", +"figures", +"code", +"frontmatter", +"backmatter", +"accessibility", +"i18n" +] +}, +"severity": { +"type": "string", +"description": "Normativity level. MUST blocks release unless downgraded by profile.", +"enum": ["must", "should", "warn"] +}, +"applies_to": { +"type": "string", +"description": "Which pipeline stage(s) the rule targets.", +"enum": ["md", "html", "pdf", "all"] +}, +"rule_text": { +"type": "string", +"description": "Paraphrased rule statement (no long quotes). If exact wording matters, note: "Exact wording required—refer to pointer".", +"minLength": 10, +"maxLength": 800 +}, +"rationale": { +"type": "string", +"description": "One-line rationale.", +"minLength": 5, +"maxLength": 200 +}, +"enforcement": { +"type": "string", +"description": "Primary enforcement mechanism.", +"enum": ["lint", "typeset", "postrender", "manual"] +}, +"autofix": { +"type": "string", +"description": "Autofix capability, if any.", +"enum": ["none", "rewrite", "reflow", "suggest"] +}, +"autofix_notes": { +"type": "string", +"description": "Notes describing what can be fixed and how/when. Keep short; never include book quotes.", +"maxLength": 400 +}, +"tags": { +"type": "array", +"description": "Compact labels for routing/search/overrides (e.g., 'manual_checklist=true', 'widows_orphans', 'hyphenation').", +"items": { +"type": "string", +"minLength": 1, +"maxLength": 48, +"pattern": "^[a-z0-9][a-z0-9_.:\-/]*(?:=[a-z0-9_.:\-/]+)?$" +}, +"maxItems": 64 +}, +"keywords": { +"type": "array", +"description": "Search keywords (human-oriented; not necessarily normalized).", +"items": { +"type": "string", +"minLength": 2, +"maxLength": 48 +}, +"maxItems": 64 +}, +"dependencies": { +"type": "array", +"description": "Rule IDs that should be applied/understood first.", +"items": { +"type": "string", +"pattern": "^(CMOS|BRING|HOUSE)\.[A-Z0-9_]+(?:\.[A-Z0-9_]+)*$" +}, +"maxItems": 32 +}, +"exceptions": { +"type": "array", +"description": "Free-text exceptions/caveats. Keep concise.", +"items": { +"type": "string", +"minLength": 3, +"maxLength": 240 +}, +"maxItems": 32 +}, +"examples_ref": { +"type": "array", +"description": "Optional references to separately stored examples (see spec/examples/README.md).", +"items": { +"type": "string", +"minLength": 6, +"maxLength": 80, +"pattern": "^EX\.[A-Z0-9_]+\.[A-Z0-9_]+\.[0-9]{3,}$" +}, +"maxItems": 64 +}, +"implementation_notes": { +"type": "string", +"description": "Optional short notes for implementers (no quotes).", +"minLength": 3, +"maxLength": 600 +}, +"status": { +"type": "string", +"description": "Lifecycle state.", +"enum": ["draft", "active", "deprecated"] +} +}, +"allOf": [ +{ +"if": { +"properties": { +"autofix": { "enum": ["rewrite", "reflow", "suggest"] } +}, +"required": ["autofix"] +}, +"then": { +"properties": { +"autofix_notes": { "minLength": 1 } +} +} +} +] +} +--- END FILE --- + +--- FILE: spec/manifest.yaml --- +version: "0.1.0" +registry_id: "pubstyle" +description: > +Machine-readable style+typesetting rules for a Markdown→HTML→PDF pipeline, +backed by primary references (Chicago / Bringhurst) and optional house rules. +Rules are paraphrases only; sources are referenced by pointer strings. + +id_naming: +prefixes: +CMOS: "Editorial/style usage rules derived primarily from Chicago." +BRING: "Typographic/layout rules derived primarily from Bringhurst." +HOUSE: "Project-specific rules not directly sourced to Chicago/Bringhurst." +pattern: "PREFIX.DOMAIN.TOPIC[.SUBTOPIC[.DETAIL...]]" +delimiter: "." +casing: "UPPER_SNAKE for segments" +stability: +rule_ids_are_immutable: true +rename_policy: "Deprecate old id; introduce new id; keep mapping in report diffs." +examples: +- "CMOS.PUNCTUATION.DASHES.EM_DASH" +- "BRING.LAYOUT.WIDOWS_ORPHANS.AVOID" +- "HOUSE.CITATIONS.DOI.PREFER_HTTPS" + +source_pointer_scheme: +goal: "Provide auditable traceability without reproducing sources." +pointer_format_primary: "CMOS18 §
p" +pointer_format_secondary: "BRING §
p" +pointer_format_house: "HOUSE §
p" +optional_scan_hint: "(scan p)" +allowed_page_numbering: ["arabic", "roman"] +notes: +- "Pointers must be sufficient for a reader with the book to locate the guidance." +- "Never store verbatim passages; paraphrase only." +- "If a rule depends on exact wording, rule_text must say: 'Exact wording required—refer to pointer'." + +category_taxonomy: + +* editorial +* typography +* layout +* headings +* citations +* numbers +* punctuation +* abbreviations +* links +* tables +* figures +* code +* frontmatter +* backmatter +* accessibility +* i18n + +profiles: + +* web_pdf +* print_pdf +* dense_tech +* memo +* slide_deck + +planned_rule_counts: +target_total_range: [800, 1500] +target_by_category: +editorial: 120 +typography: 170 +layout: 140 +headings: 70 +citations: 140 +numbers: 90 +punctuation: 120 +abbreviations: 60 +links: 50 +tables: 60 +figures: 50 +code: 70 +frontmatter: 40 +backmatter: 40 +accessibility: 90 +i18n: 60 + +coverage_contract: +must_rules: +enforceability_requirement: > +Every MUST rule must be enforceable by at least one of: lint, typeset, postrender; +otherwise it must be explicitly labeled as a manual checklist item and emitted in +a checklist output artifact. +manual_checklist_tag: "manual_checklist=true" +checklist_artifact: "manual-checklist.md (and JSON mirror)" +should_rules: +policy: "Should rules should be enforceable when practical; otherwise allowed as manual with explicit rationale." +warn_rules: +policy: "Warnings may be non-blocking and advisory; still require source pointers." +enforcement_definitions: +lint: "Static analysis over normalized Markdown/HTML AST. Deterministic." +typeset: "CSS/tokens shaping decisions prior to rendering (pagination, keeps, hyphenation parameters)." +postrender: "PDF/HTML layout inspection (widows/orphans, overflow, keep failures, numbering mismatches)." +manual: "Human review; system must still produce checklist items and traceability pointers." +ci_guardrails: +coverage_floor: +must_implemented_min_percent: 95 +overall_implemented_min_percent: 80 +regression_rule: "CI fails if implemented coverage decreases from main branch." + +degraded_mode_contract: +purpose: "Handle badly-structured inputs safely without crashing; still provide useful output." +triggers: +- "Markdown parse errors / invalid UTF-8" +- "Missing heading hierarchy (no H1/H2 etc.)" +- "Garbage extraction (e.g., line breaks every word, excessive hard wraps)" +- "Mixed language with no lang metadata" +behavior: +normalize: +attempt_repairs: +- "Normalize whitespace and line endings" +- "Detect and unwrap hard-wrapped paragraphs heuristically" +- "Infer heading levels from patterns (e.g., '1.', '1.1', ALL CAPS lines) with low confidence" +if_unrecoverable: +- "Fall back to minimal AST: paragraphs + code blocks + raw spans" +- "Mark document structure confidence = low" +enforcement_in_degraded_mode: +lint: +run_subset: ["safety", "sanity", "catastrophic typography (double spaces, broken links)"] +downgrade_some_must_to_warn: true +typeset: +use_fallback_tokens: true +disable_aggressive_hyphenation: true +postrender: +run_core_gates_only: ["overfull_lines", "table_overflow_incidents", "code_overflow_incidents"] +reporting: +always_emit: +- "layout-report.json" +- "coverage-report.json" +- "degraded-mode-report.json (what was inferred and why)" +--- END FILE --- + +--- FILE: spec/profiles/web_pdf.yaml --- +profile_id: "web_pdf" +description: "Screen-first PDF for sharing and reading; conservative pagination and strong accessibility defaults." + +page: +size: "Letter" +orientation: "portrait" +two_sided: false +margins: +top: "22mm" +bottom: "22mm" +inner: "20mm" +outer: "20mm" + +fonts: +body: +family: ["Noto Serif", "STIX Two Text", "Times New Roman", "serif"] +size: "11pt" +line_height: 1.45 +heading: +family: ["Noto Sans", "Source Sans 3", "Arial", "sans-serif"] +mono: +family: ["Noto Sans Mono", "Source Code Pro", "Consolas", "monospace"] +size: "10pt" +line_height: 1.35 + +measure_targets: +columns: 1 +body_chars_per_line: +min: 55 +ideal: 66 +max: 75 +footnote_chars_per_line: +min: 50 +ideal: 60 +max: 70 + +hyphenation: +enabled: true +strategy: "balanced" +language_driven: true +min_left: 2 +min_right: 3 +max_consecutive_hyphenated_lines: 2 +avoid_proper_names_when_possible: true +avoid_after_short_lines: true + +paragraphs: +first_paragraph_indent: "0" +indent: "1em" +block_paragraph_spacing: "0.6em" + +headings: +keep_with_next_lines: 2 +avoid_stranded_headings: true +numbering: +enabled: true +style: "decimal" +require_monotonic_increase: true + +widows_orphans: +widow_lines: 2 +orphan_lines: 2 +balance_facing_pages: false + +code: +inline: +use_mono: true +block: +font_size: "9.5pt" +line_height: 1.35 +wrap: true +max_wrap_penalty: "medium" +overflow_policy: "wrap_then_shrink_minor" +shrink_limit: 0.92 + +tables: +cell_padding: "3pt 6pt" +header_repeat: true +overflow_policy: "shrink_then_wrap" +shrink_limit: 0.9 + +severity_overrides: + +* selector: { category: "layout", tag: "widows_orphans" } + severity: "should" +* selector: { category: "accessibility" } + severity: "must" + +locale_defaults: +primary_language: "en" +fallback_languages: ["fr"] +quotation_style: "us" # curly quotes, US punctuation conventions +date_format: "YYYY-MM-DD" +number_format: +decimal_separator: "." +thousands_separator: "," +--- END FILE --- + +--- FILE: spec/profiles/print_pdf.yaml --- +profile_id: "print_pdf" +description: "Print-oriented PDF with stricter pagination, book-like rhythm, and stronger keep constraints." + +page: +size: "6in×9in" +orientation: "portrait" +two_sided: true +margins: +top: "18mm" +bottom: "20mm" +inner: "22mm" +outer: "18mm" + +fonts: +body: +family: ["STIX Two Text", "Noto Serif", "Georgia", "serif"] +size: "10.5pt" +line_height: 1.50 +heading: +family: ["STIX Two Text", "Noto Serif", "serif"] +mono: +family: ["Noto Sans Mono", "Source Code Pro", "Consolas", "monospace"] +size: "9.5pt" +line_height: 1.30 + +measure_targets: +columns: 1 +body_chars_per_line: +min: 55 +ideal: 66 +max: 72 + +hyphenation: +enabled: true +strategy: "print_quality" +min_left: 2 +min_right: 3 +max_consecutive_hyphenated_lines: 2 +avoid_proper_names_when_possible: true + +paragraphs: +first_paragraph_indent: "0" +indent: "1em" + +headings: +keep_with_next_lines: 3 +avoid_stranded_headings: true +numbering: +enabled: true +style: "decimal" +require_monotonic_increase: true + +widows_orphans: +widow_lines: 2 +orphan_lines: 2 +balance_facing_pages: true + +code: +block: +font_size: "9pt" +line_height: 1.25 +wrap: false +overflow_policy: "shrink_then_scroll_indicator" +shrink_limit: 0.90 + +tables: +cell_padding: "2.5pt 5pt" +header_repeat: true +overflow_policy: "shrink_then_rotate_if_allowed" +shrink_limit: 0.88 + +severity_overrides: + +* selector: { category: "layout", tag: "widows_orphans" } + severity: "must" +* selector: { category: "layout", tag: "keep_constraints" } + severity: "must" +* selector: { category: "typography", tag: "spacing_consistency" } + severity: "must" + +locale_defaults: +primary_language: "en" +fallback_languages: ["fr"] +quotation_style: "us" +date_format: "Month D, YYYY" +number_format: +decimal_separator: "." +thousands_separator: "," +--- END FILE --- + +--- FILE: spec/profiles/dense_tech.yaml --- +profile_id: "dense_tech" +description: "Technical papers and specs: denser copy, more code/table tolerance, strict numbering and citations." + +page: +size: "A4" +orientation: "portrait" +two_sided: false +margins: +top: "18mm" +bottom: "18mm" +inner: "18mm" +outer: "18mm" + +fonts: +body: +family: ["Noto Serif", "STIX Two Text", "serif"] +size: "10pt" +line_height: 1.35 +heading: +family: ["Noto Sans", "Source Sans 3", "sans-serif"] +mono: +family: ["Noto Sans Mono", "Source Code Pro", "monospace"] +size: "9pt" +line_height: 1.25 + +measure_targets: +columns: 1 +body_chars_per_line: +min: 65 +ideal: 75 +max: 90 + +hyphenation: +enabled: true +strategy: "balanced" +min_left: 2 +min_right: 3 +max_consecutive_hyphenated_lines: 3 +avoid_proper_names_when_possible: true + +headings: +keep_with_next_lines: 2 +avoid_stranded_headings: true +numbering: +enabled: true +style: "decimal" +require_monotonic_increase: true + +widows_orphans: +widow_lines: 2 +orphan_lines: 2 +balance_facing_pages: false + +code: +block: +font_size: "8.8pt" +line_height: 1.20 +wrap: true +overflow_policy: "wrap_then_shrink_minor" +shrink_limit: 0.90 + +tables: +cell_padding: "2pt 4pt" +header_repeat: true +overflow_policy: "shrink_then_wrap" +shrink_limit: 0.85 + +severity_overrides: + +* selector: { category: "citations" } + severity: "must" +* selector: { category: "headings", tag: "numbering" } + severity: "must" +* selector: { category: "layout", tag: "widows_orphans" } + severity: "should" + +locale_defaults: +primary_language: "en" +fallback_languages: ["fr"] +quotation_style: "us" +date_format: "YYYY-MM-DD" +number_format: +decimal_separator: "." +thousands_separator: "," +--- END FILE --- + +--- FILE: spec/profiles/memo.yaml --- +profile_id: "memo" +description: "Short internal documents: lenient pagination, strong clarity, minimal typographic complexity." + +page: +size: "Letter" +orientation: "portrait" +two_sided: false +margins: +top: "1in" +bottom: "1in" +inner: "1in" +outer: "1in" + +fonts: +body: +family: ["Noto Sans", "Source Sans 3", "Arial", "sans-serif"] +size: "11pt" +line_height: 1.40 +heading: +family: ["Noto Sans", "Source Sans 3", "Arial", "sans-serif"] +mono: +family: ["Noto Sans Mono", "Consolas", "monospace"] +size: "10pt" +line_height: 1.30 + +measure_targets: +columns: 1 +body_chars_per_line: +min: 55 +ideal: 70 +max: 85 + +hyphenation: +enabled: false +strategy: "off_for_memos" + +headings: +keep_with_next_lines: 2 +avoid_stranded_headings: true +numbering: +enabled: false + +widows_orphans: +widow_lines: 1 +orphan_lines: 1 +balance_facing_pages: false + +code: +block: +font_size: "9.5pt" +line_height: 1.25 +wrap: true +overflow_policy: "wrap" +shrink_limit: 1.0 + +tables: +cell_padding: "3pt 6pt" +header_repeat: false +overflow_policy: "wrap" +shrink_limit: 1.0 + +severity_overrides: + +* selector: { category: "layout", tag: "widows_orphans" } + severity: "warn" +* selector: { category: "accessibility" } + severity: "must" + +locale_defaults: +primary_language: "en" +fallback_languages: ["fr"] +quotation_style: "us" +date_format: "YYYY-MM-DD" +number_format: +decimal_separator: "." +thousands_separator: "," +--- END FILE --- + +--- FILE: spec/profiles/slide_deck.yaml --- +profile_id: "slide_deck" +description: "Paged slides (16:9). Emphasis on hierarchy, short lines, and avoiding overflows." + +page: +size: "13.333in×7.5in" # 16:9 at common PPT dimensions +orientation: "landscape" +two_sided: false +margins: +top: "0.5in" +bottom: "0.5in" +inner: "0.6in" +outer: "0.6in" + +fonts: +body: +family: ["Noto Sans", "Source Sans 3", "Arial", "sans-serif"] +size: "24pt" +line_height: 1.15 +heading: +family: ["Noto Sans", "Source Sans 3", "Arial", "sans-serif"] +mono: +family: ["Noto Sans Mono", "Consolas", "monospace"] +size: "20pt" +line_height: 1.10 + +measure_targets: +columns: 1 +body_chars_per_line: +min: 25 +ideal: 40 +max: 55 + +hyphenation: +enabled: false +strategy: "off_for_slides" + +headings: +keep_with_next_lines: 1 +avoid_stranded_headings: true +numbering: +enabled: false + +widows_orphans: +widow_lines: 1 +orphan_lines: 1 +balance_facing_pages: false + +code: +block: +font_size: "18pt" +line_height: 1.10 +wrap: true +overflow_policy: "wrap_then_shrink_minor" +shrink_limit: 0.92 + +tables: +cell_padding: "6pt 10pt" +header_repeat: false +overflow_policy: "shrink_then_wrap" +shrink_limit: 0.88 + +severity_overrides: + +* selector: { category: "layout", tag: "overflow" } + severity: "must" +* selector: { category: "accessibility" } + severity: "must" + +locale_defaults: +primary_language: "en" +fallback_languages: ["fr"] +quotation_style: "us" +date_format: "YYYY-MM-DD" +number_format: +decimal_separator: "." +thousands_separator: "," +--- END FILE --- + +--- FILE: spec/quality_gates.yaml --- +version: "0.1.0" +description: > +Post-render QA gates. All thresholds are hard numeric limits used to fail builds +(unless a gate is explicitly marked as "warn-only" by the invoking CLI flags). + +metrics: +max_widows_per_10_pages: "Count of widow lines across any 10 consecutive pages." +max_orphans_per_10_pages: "Count of orphan lines across any 10 consecutive pages." +max_stranded_headings: "Count of headings at page bottom with insufficient following content per keep rule." +max_overfull_lines: "Count of lines exceeding measure by overflow threshold (render-time measured)." +max_table_overflow_incidents: "Count of tables that overflow page/column bounds or are clipped." +max_code_overflow_incidents: "Count of code blocks with horizontal overflow or clipping." +max_link_wrap_incidents: "Count of wrapped URLs/DOIs/emails violating link wrap policy." +max_heading_numbering_errors: "Count of numbering sequence/format violations." +max_citation_format_errors: "Count of citations not matching configured style format." + +overflow_detection: +overfull_line_threshold_css_px: 1.0 +consider_clipping_as_overflow: true +ignore_decorative_elements: true + +profiles: +web_pdf: +default: +max_widows_per_10_pages: 1 +max_orphans_per_10_pages: 1 +max_stranded_headings: 0 +max_overfull_lines: 2 +max_table_overflow_incidents: 0 +max_code_overflow_incidents: 1 +max_link_wrap_incidents: 2 +max_heading_numbering_errors: 0 +max_citation_format_errors: 0 +strict: +max_widows_per_10_pages: 0 +max_orphans_per_10_pages: 0 +max_stranded_headings: 0 +max_overfull_lines: 0 +max_table_overflow_incidents: 0 +max_code_overflow_incidents: 0 +max_link_wrap_incidents: 0 +max_heading_numbering_errors: 0 +max_citation_format_errors: 0 + +print_pdf: +default: +max_widows_per_10_pages: 0 +max_orphans_per_10_pages: 0 +max_stranded_headings: 0 +max_overfull_lines: 0 +max_table_overflow_incidents: 0 +max_code_overflow_incidents: 0 +max_link_wrap_incidents: 0 +max_heading_numbering_errors: 0 +max_citation_format_errors: 0 +strict: +max_widows_per_10_pages: 0 +max_orphans_per_10_pages: 0 +max_stranded_headings: 0 +max_overfull_lines: 0 +max_table_overflow_incidents: 0 +max_code_overflow_incidents: 0 +max_link_wrap_incidents: 0 +max_heading_numbering_errors: 0 +max_citation_format_errors: 0 + +dense_tech: +default: +max_widows_per_10_pages: 1 +max_orphans_per_10_pages: 1 +max_stranded_headings: 0 +max_overfull_lines: 3 +max_table_overflow_incidents: 1 +max_code_overflow_incidents: 2 +max_link_wrap_incidents: 3 +max_heading_numbering_errors: 0 +max_citation_format_errors: 0 +strict: +max_widows_per_10_pages: 0 +max_orphans_per_10_pages: 0 +max_stranded_headings: 0 +max_overfull_lines: 1 +max_table_overflow_incidents: 0 +max_code_overflow_incidents: 0 +max_link_wrap_incidents: 1 +max_heading_numbering_errors: 0 +max_citation_format_errors: 0 + +memo: +default: +max_widows_per_10_pages: 3 +max_orphans_per_10_pages: 3 +max_stranded_headings: 0 +max_overfull_lines: 2 +max_table_overflow_incidents: 1 +max_code_overflow_incidents: 1 +max_link_wrap_incidents: 4 +max_heading_numbering_errors: 1 +max_citation_format_errors: 1 +strict: +max_widows_per_10_pages: 1 +max_orphans_per_10_pages: 1 +max_stranded_headings: 0 +max_overfull_lines: 0 +max_table_overflow_incidents: 0 +max_code_overflow_incidents: 0 +max_link_wrap_incidents: 2 +max_heading_numbering_errors: 0 +max_citation_format_errors: 0 + +slide_deck: +default: +max_widows_per_10_pages: 5 +max_orphans_per_10_pages: 5 +max_stranded_headings: 0 +max_overfull_lines: 0 +max_table_overflow_incidents: 0 +max_code_overflow_incidents: 0 +max_link_wrap_incidents: 0 +max_heading_numbering_errors: 0 +max_citation_format_errors: 1 +strict: +max_widows_per_10_pages: 2 +max_orphans_per_10_pages: 2 +max_stranded_headings: 0 +max_overfull_lines: 0 +max_table_overflow_incidents: 0 +max_code_overflow_incidents: 0 +max_link_wrap_incidents: 0 +max_heading_numbering_errors: 0 +max_citation_format_errors: 0 +--- END FILE --- + +--- FILE: spec/indexes/README.md --- + +# Indexes + +This project builds small, fast indexes so the runtime can answer questions like: + +* “Which rules mention *en dash*?” +* “Which rules cite *CMOS18 §6.88 p412*?” +* “Which rules apply to `postrender` QA?” +* “What rules are overridden by the `print_pdf` profile?” + +Indexes are derived artifacts (rebuildable) and should not be hand-edited. + +## Indexes the app will build + +### 1) keyword → rule IDs + +**Purpose:** fast search/autocomplete and lint explanations. + +* **Path:** `spec/indexes/keywords_all.json` and per-category deltas: + + * `spec/indexes/keywords_.json` +* **Format (JSON):** + + * keys: normalized keyword (lowercased) + * values: array of rule IDs sorted stable (lexicographic) + +Normalization (default): + +* Unicode NFKC +* lowercase +* collapse whitespace +* strip surrounding punctuation + +### 2) source_ref → rule IDs + +**Purpose:** audit trail back to references without embedding book text. + +* **Path:** `spec/indexes/source_refs_all.json` and per-category deltas: + + * `spec/indexes/source_refs_.json` +* **Format (JSON):** + + * keys: exact `source_ref` pointer strings + * values: array of rule IDs + +### 3) category → rule IDs + +**Purpose:** batch reporting, extraction coverage, profile scoping. + +* **Path:** `spec/indexes/category.json` +* **Format (JSON):** + + * keys: category name + * values: array of rule IDs + +### 4) enforcement → rule IDs + +**Purpose:** quickly decide which engine (lint/typeset/postrender/manual) handles which rules. + +* **Path:** `spec/indexes/enforcement.json` + +### 5) profile overrides + +**Purpose:** allow profiles to override severity or token parameters without editing rules. + +* **Path:** `spec/indexes/profile_overrides.json` +* **Format (JSON):** + + * per profile: list of override objects (selector + action) + * selectors may match category, tags, applies_to, or explicit rule IDs + +## Build guarantees + +* Index builds are deterministic from: + + * `spec/rules/**.ndjson` + * `spec/profiles/*.yaml` + * `spec/manifest.yaml` + +* The runtime must treat indexes as **cacheable**: + + * if index missing/outdated → rebuild or fallback to scanning rule files. + +--- END FILE --- + +--- FILE: spec/examples/README.md --- + +# Examples + +Rules stay compact and machine-enforceable; examples live separately to avoid bloating the rule registry. + +## Goals + +* Provide **concrete fixtures** for: + + * unit tests (lint, autofix, typeset transforms) + * integration tests (render + QA gates) + * documentation (human-readable “why this matters”) + +* Keep examples **small** (a few lines) and **targeted** (each example triggers a known set of rules). + +## Example ID format + +`EX...` + +* `CATEGORY` must match the category taxonomy (e.g., `PUNCTUATION`, `NUMBERS`, `CITATIONS`) +* `TOPIC` is an uppercase short slug +* `NNN` is a zero-padded integer (000–999+) + +Example: + +* `EX.PUNCTUATION.DASHES.001` + +## Suggested on-disk layout + +* `spec/examples//EX....yaml` +* `spec/examples//fixtures/.md` (optional) + +## Example YAML format (recommended) + +Fields: + +* `id` (required): example ID +* `rules` (required): list of rule IDs the example is meant to exercise +* `before` (required): inline Markdown or a reference to a fixture file +* `after` (optional): expected Markdown after autofix (if autofix exists) +* `expected` (optional): expected diagnostics/gates + + * `lint_errors`: array of rule IDs expected as errors + * `lint_warnings`: array of rule IDs expected as warnings + * `qa_failures`: array of gate keys expected to fail +* `notes` (optional): short human explanation (no book quotes) + +Minimal example skeleton: + +* id: EX.PUNCTUATION.DASHES.001 + rules: + + * CMOS.PUNCTUATION.DASHES.EM_DASH + before: | + ... + after: | + ... + expected: + lint_errors: [CMOS.PUNCTUATION.DASHES.EM_DASH] + +## Test corpus strategy + +Maintain a small, curated corpus that triggers: + +1. Lint-only issues (AST-level) + +* punctuation spacing +* numeral formatting +* heading numbering patterns +* link normalization / unsafe URLs +* citation field completeness + +2. Typeset-only issues (token/CSS decisions) + +* paragraph indentation patterns +* code block wrapping rules +* table overflow strategies + +3. Post-render QA issues (PDF/HTML layout) + +* widows/orphans +* stranded headings (keep-with-next) +* overfull lines (especially monospace/code) +* table/caption overflow and clipping + +Recommended corpus sizing: + +* 30–80 fixtures total +* each fixture should target 3–10 rules max +* include “degraded mode” fixtures (intentionally malformed Markdown) + +--- END FILE --- + +--- FILE: app/ARCHITECTURE.md --- + +# Runtime Architecture + +This is a thin, deterministic runtime that: + +A) ingests Markdown → normalizes a document AST → applies editorial lint (Chicago-derived) +B) applies typeset tokens/profile (Bringhurst-derived) +C) renders HTML and PDF deterministically +D) runs post-render QA gates (widows/orphans, heading keeps, overflow) +E) generates `layout-report.json` and fails builds when thresholds are exceeded + +Primary reference PDFs provided to the system (for pointer-based rules and traceability only): + +* The Chicago Manual of Style (18th ed).pdf +* Robert Bringhurst – The Elements of Typographic Style.pdf + +No bulk transcription is performed; rules are paraphrases and cite sources only by pointer. + +## Components + +### 1) Registry Loader + +Inputs: + +* `spec/rules/**.ndjson` (Phase 2 output) +* `spec/schema/rule.schema.json` +* `spec/manifest.yaml` +* `spec/profiles/*.yaml` +* `spec/quality_gates.yaml` + +Responsibilities: + +* validate each rule against JSON Schema +* enforce ID uniqueness and stable sorting +* build or load indexes in `spec/indexes/*.json` +* compute coverage (implemented vs unimplemented; by enforcement) + +Output (in-memory): + +* `RuleStore` (rules + indexes + profile overrides + gate thresholds) + +### 2) Markdown Ingest + AST Normalization + +Steps: + +1. Parse Markdown to an AST (mdast or equivalent). +2. Normalize to a stable internal schema: + + * heading levels and numbering metadata + * lists and list tight/loose semantics + * code spans/blocks with language tags + * tables (GFM) to a consistent representation + * links normalized (url, title, text) + * citations normalized (if present as syntax/extensions) +3. Produce `normalized-doc.json` for debugging reproducibility. + +Degraded mode: + +* If parsing fails or structure is missing, switch to minimal node set and mark `structure_confidence: low`. +* Run the “degraded mode contract” from `spec/manifest.yaml`. + +### 3) Editorial Lint Engine + +What it does: + +* Runs `lint`-enforced rules against normalized AST. +* Emits diagnostics: + + * `severity` (must/should/warn after profile overrides) + * `rule_id` + * location (source span) and node path + * message (generated from rule metadata + implementation hints) + +Autofix: + +* If a rule’s `autofix` is `rewrite` or `suggest`, produce: + + * patched Markdown (rewrite) OR + * suggestion blocks with exact spans to edit (suggest) +* Autofix must be deterministic and reversible (keep a patch log). + +Artifacts: + +* `lint-report.json` +* `lint-report.sarif` (optional for CI UIs) +* `lint-fixed.md` (optional, if autofix applied) + +### 4) Typeset Profile Engine + +Goal: + +* Convert “typographic intent” into deterministic render inputs: + + * CSS tokens (variables) + * layout policies (widows/orphans strategy, keeps, hyphenation params) + * code/table overflow strategies + +Inputs: + +* normalized AST +* profile tokens from `spec/profiles/.yaml` + +Outputs: + +* `render.css` (tokenized CSS + paged-media rules) +* `render.html` (deterministic HTML with stable classnames/data attributes) +* `typeset-report.json` (what tokens were used, resolved font stack, measure targets) + +Design principle: + +* “Soft rules” are tokens; “hard rules” are lint/QA gates. + +### 5) Deterministic Rendering + +The runtime should treat rendering as an adapter layer. + +Minimum requirement: + +* Deterministic HTML generation (stable DOM order, stable IDs, stable whitespace). +* Deterministic PDF generation with pinned renderer/version and embedded fonts when possible. + +Adapter concept: + +* `RendererHTML`: emits HTML+CSS. +* `RendererPDF`: converts HTML+CSS to PDF using a configured engine. + +Recommended renderer capabilities: + +* CSS Paged Media support (page size/margins, running headers, footnotes if used) +* hyphenation dictionaries +* font embedding/subsetting + +Artifacts: + +* `out/.html` +* `out/.pdf` +* `out/render-log.json` (versions, timings, warnings) + +### 6) Post-render QA Analyzer + +Runs on: + +* PDF (preferred for final layout truth) and optionally HTML. + +Detects: + +* widows/orphans (by paragraph line runs across pages) +* stranded headings (heading at bottom violating keep-with-next) +* overfull lines (glyph boxes exceed text block) +* table overflow/clipping +* code overflow/clipping +* link wrap incidents (URLs/DOIs split against policy) +* heading numbering errors (cross-check against AST numbering) +* citation format errors (cross-check against configured citation style) + +Artifacts: + +* `layout-report.json` (the canonical QA report) +* `qa-report.json` (gate evaluation + failures + excerpts as coordinates, not text) + +Fail behavior: + +* Compare measured metrics to `spec/quality_gates.yaml` for the chosen profile. +* Exit non-zero if any MUST-equivalent gate fails (or if `--strict` chosen, strict thresholds apply). + +## Coverage Reporting and CI Guardrails + +Coverage is computed from: + +* total active rules +* rules with an implemented enforcement handler: + + * lint implemented if rule_id has an evaluator in lint engine + * typeset implemented if token/policy exists and is applied deterministically + * postrender implemented if analyzer has a detector for that rule/tag + * manual implemented if checklist output includes it + +Artifacts: + +* `coverage-report.json` (counts by category, enforcement, severity, profile) +* `coverage-diff.json` (compares to baseline on main branch) + +CI policy (from manifest): + +* fail if MUST coverage drops +* fail if overall implemented coverage drops +* fail if rule IDs changed without deprecation mapping + +## Assumptions (Phase 1 defaults) + +1. The pipeline targets a CSS-based HTML→PDF engine (paged media capable). +2. Default language is `en` with optional `fr` fallback. +3. Citation style defaults to a Chicago-aligned style, but the registry will encode the exact variant in rules (Notes/Bibliography vs Author-Date) during extraction. +4. Fonts default to Noto/STIX families for broad coverage and consistent embedding, but can be overridden per profile. + +No questions are strictly required to proceed with Phase 2 extraction; these assumptions can be adjusted via profiles and house rules. + +--- END FILE --- + +--- FILE: app/CLI_SPEC.md --- + +# CLI Specification + +The CLI is designed for CI use: deterministic outputs, stable exit codes, and JSON artifacts for tooling. + +## Common flags (all commands) + +* `--input `: Markdown file or directory. +* `--out `: Output directory (default: `out/`). +* `--rules `: Rules root directory (default: `spec/rules/`). +* `--profile `: One of: `web_pdf`, `print_pdf`, `dense_tech`, `memo`, `slide_deck`. +* `--strict`: Use strict thresholds in `spec/quality_gates.yaml`. +* `--format `: Diagnostic output format (where applicable). +* `--fail-on `: Lowest severity that fails the command (default: `must`). +* `--degraded-ok`: Allow degraded mode without failing (still emits degraded-mode report). +* `--version`: Print tool + renderer versions. + +## Command: `lint` + +Purpose: + +* Parse Markdown → normalize AST → run lint rules. +* Optionally apply autofixes. + +Args: + +* `--fix`: Apply autofix where `autofix != none` and safe. +* `--fix-mode `: Whether to rewrite output Markdown or emit suggestions only. +* `--baseline `: Compare diagnostics to an existing lint report and show diff. + +Outputs: + +* `out/lint-report.json` +* `out/lint-report.sarif` (if `--format sarif`) +* `out/lint-fixed.md` (if `--fix` and `--fix-mode rewrite`) +* `out/manual-checklist.md` (includes manual rules tagged `manual_checklist=true`) + +Exit codes: + +* `0`: no failing diagnostics +* `1`: lint failures at or above `--fail-on` +* `4`: config/schema error +* `5`: internal error + +## Command: `render-html` + +Purpose: + +* Generate deterministic HTML + CSS from normalized AST + profile tokens. + +Args: + +* `--emit-normalized`: also write `normalized-doc.json` +* `--assets `: static assets dir (images, fonts, etc.) +* `--self-contained`: embed assets in HTML where possible + +Outputs: + +* `out/render.html` +* `out/render.css` +* `out/typeset-report.json` +* `out/normalized-doc.json` (optional) + +Exit codes: + +* `0`: success +* `3`: render error +* `4`: config/schema error +* `5`: internal error + +## Command: `render-pdf` + +Purpose: + +* Render PDF deterministically from HTML + CSS + assets. + +Args: + +* `--engine `: renderer adapter selection (implementation-defined) +* `--engine-opts `: pass-through engine options +* `--keep-html`: keep intermediate HTML/CSS even if PDF fails + +Outputs: + +* `out/render.pdf` +* `out/render.html` + `out/render.css` (always or if `--keep-html`) +* `out/render-log.json` + +Exit codes: + +* `0`: success +* `3`: render error +* `4`: config/schema error +* `5`: internal error + +## Command: `qa` + +Purpose: + +* Run post-render QA analysis and evaluate quality gates. + +Args: + +* `--pdf `: PDF to analyze (default: `out/render.pdf`) +* `--html `: optional HTML for cross-checks +* `--gates `: override gates file (default: `spec/quality_gates.yaml`) + +Outputs: + +* `out/layout-report.json` +* `out/qa-report.json` + +Exit codes: + +* `0`: all gates pass +* `2`: gates failed (at or above `--fail-on` / strictness) +* `4`: config/schema error +* `5`: internal error + +## Command: `report` + +Purpose: + +* Produce a consolidated report: + + * coverage (implemented vs unimplemented) + * diffs vs baseline + * per-category enforcement breakdown + +Args: + +* `--baseline `: baseline coverage report to diff against +* `--since `: optionally compute diffs since a git ref (implementation-defined) + +Outputs: + +* `out/coverage-report.json` +* `out/coverage-diff.json` (if baseline provided) +* `out/coverage-summary.md` + +Exit codes: + +* `0`: report built and coverage passes configured floors +* `2`: coverage floor violated +* `4`: config/schema error +* `5`: internal error + +--- END FILE --- + +--- FILE: spec/extraction_plan.md --- + +# Phase 2 Extraction Plan + +This plan defines how rules will be produced in controlled batches without reproducing the books. + +## Non-negotiables (carried into Phase 2) + +* No full-book OCR/transcription. +* No long verbatim passages. +* Rules are paraphrased and capped (`rule_text` ≤ 800 chars). +* Every rule includes at least one source pointer in `source_refs`. +* If a rule depends on exact wording, the rule still paraphrases but must include: + + * `rule_text`: “Exact wording required—refer to pointer” + * plus a usable pointer. + +Primary reference PDFs for pointer extraction: + +* The Chicago Manual of Style (18th ed).pdf +* Robert Bringhurst – The Elements of Typographic Style.pdf + +## Output batching format + +When you say: `EXTRACT []` + +I will output a bundle that includes: + +1. **Rules NDJSON** (150–250 rule records) + +* Path: `spec/rules//.ndjson` +* One JSON object per line, validated against `spec/schema/rule.schema.json`. + +2. **Index deltas** for that category + +* `spec/indexes/keywords_.json` +* `spec/indexes/source_refs_.json` +* `spec/indexes/coverage_delta_.json` + +3. **Coverage notes** report + +* A short Markdown report describing enforcement split: + + * lint vs typeset vs postrender vs manual + * plus any known gaps or “manual-only” areas + +## Batch naming + +`` format: + +* `v1__` + + * e.g., `v1_punctuation_001` + +Batches are append-only: + +* If rules need revision, mark old rule `deprecated`, add a new rule ID (or new version segment) and keep both records. + +## Pointer scheme details + +Pointer strings live in `source_refs[]` and are **not** quotes. + +Preferred pointer format: + +* `CMOS18 §
p` +* `BRING §
p` +* Optional disambiguation: `(scan p)` + +Example pattern (not a quote): + +* `CMOS18 §6.1 p377 (scan p10)` + +Notes: + +* “book_page” uses the printed page number in the book when present (arabic or roman). +* “scan p” uses the PDF page index when printed page numbers are ambiguous. + +## Recommended extraction order (high-impact first) + +1. numbers +2. punctuation +3. citations +4. headings +5. tables +6. figures +7. links +8. code +9. layout (widows/orphans, keeps, overflow) +10. front/back matter +11. accessibility +12. i18n + +Rationale: + +* Numbers/punctuation/citations most directly affect correctness, consistency, and auditability. +* Layout rules benefit from having structure and tokens in place. + +## Scope parameter + +`[]` can constrain extraction, e.g.: + +* `EXTRACT punctuation basic` +* `EXTRACT citations notes_bibliography` +* `EXTRACT numbers en_only` +* `EXTRACT layout widows_orphans` + +If scope is omitted: + +* extract the most generally applicable rules for that category first. + +## Enforcement mapping guidelines (honest labeling) + +* `lint`: detectable from AST or text normalization (e.g., spacing, punctuation patterns, citation fields). +* `typeset`: enforced via CSS/tokens/paged-media decisions (e.g., indent policy, measure targets, hyphenation params). +* `postrender`: requires layout inspection after rendering (e.g., widows/orphans, overfull lines, table clipping). +* `manual`: cannot be reliably automated; must include `tags: ["manual_checklist=true"]` and be emitted into checklist outputs. + +If a concept spans multiple enforcement layers: + +* Prefer splitting into two rules: + + * one lint rule (source cleanliness) + * one postrender rule (layout outcome) +* Use `dependencies` to link them. + +## Extraction workflow per category (repeatable) + +For each category: + +1. Build a topic map (subtopics, recurring failure modes). +2. Extract rules in clusters: + + * MUST rules first (enforceable or checklist) + * SHOULD rules next + * WARN rules last +3. For each rule: + + * add `keywords` for searchability + * add tags for overrides/routing + * add minimal `exceptions` when needed (avoid overfitting) + +## “Degraded mode” considerations during extraction + +For each category batch, include some rules that specifically target degraded inputs: + +* hard-wrap repair suggestions +* heading inference warnings +* link sanitation and encoding fixes +* Unicode normalization notes + +These rules should generally be `warn` or `should`, unless they prevent corruption (then `must`). + +--- END FILE ---