version: "0.1.0" registry_id: "pubstyle" description: > Machine-readable style+typesetting rules for a Markdown→HTML→PDF pipeline, backed by primary references (Chicago / Bringhurst) and optional house rules. Rules are paraphrases only; sources are referenced by pointer strings. id_naming: prefixes: CMOS: "Editorial/style usage rules derived primarily from Chicago." BRING: "Typographic/layout rules derived primarily from Bringhurst." HOUSE: "Project-specific rules not directly sourced to Chicago/Bringhurst." pattern: "PREFIX.DOMAIN.TOPIC[.SUBTOPIC[.DETAIL...]]" delimiter: "." casing: "UPPER_SNAKE for segments" stability: rule_ids_are_immutable: true rename_policy: "Deprecate old id; introduce new id; keep mapping in report diffs." examples: - "CMOS.PUNCTUATION.DASHES.EM_DASH" - "BRING.LAYOUT.WIDOWS_ORPHANS.AVOID" - "HOUSE.CITATIONS.DOI.PREFER_HTTPS" source_pointer_scheme: goal: "Provide auditable traceability without reproducing sources." pointer_format_primary: "CMOS18 §
p" pointer_format_secondary: "BRING §
p" pointer_format_house: "HOUSE §
p" optional_scan_hint: "(scan p)" allowed_page_numbering: - arabic - roman notes: - "Pointers must be sufficient for a reader with the book to locate the guidance." - "Never store verbatim passages; paraphrase only." - "If a rule depends on exact wording, rule_text must say: Exact wording required—refer to pointer." category_taxonomy: - editorial - typography - layout - headings - citations - numbers - punctuation - abbreviations - links - tables - figures - code - frontmatter - backmatter - accessibility - i18n profiles: - web_pdf - print_pdf - dense_tech - memo - slide_deck planned_rule_counts: target_total_range: [800, 1500] target_by_category: editorial: 120 typography: 170 layout: 140 headings: 70 citations: 140 numbers: 90 punctuation: 120 abbreviations: 60 links: 50 tables: 60 figures: 50 code: 70 frontmatter: 40 backmatter: 40 accessibility: 90 i18n: 60 coverage_contract: must_rules: enforceability_requirement: > Every MUST rule must be enforceable by at least one of: lint, typeset, postrender; otherwise it must be explicitly labeled as a manual checklist item and emitted in a checklist output artifact. manual_checklist_tag: "manual_checklist=true" checklist_artifact: "manual-checklist.md (and JSON mirror)" should_rules: policy: "Should rules should be enforceable when practical; otherwise allowed as manual with explicit rationale." warn_rules: policy: "Warnings may be non-blocking and advisory; still require source pointers." enforcement_definitions: lint: "Static analysis over normalized Markdown/HTML AST. Deterministic." typeset: "CSS/tokens shaping decisions prior to rendering (pagination, keeps, hyphenation parameters)." postrender: "PDF/HTML layout inspection (widows/orphans, overflow, keep failures, numbering mismatches)." manual: "Human review; system must still produce checklist items and traceability pointers." ci_guardrails: coverage_floor: must_implemented_min_percent: 95 overall_implemented_min_percent: 80 regression_rule: "CI fails if implemented coverage decreases from main branch." degraded_mode_contract: purpose: "Handle badly-structured inputs safely without crashing; still provide useful output." triggers: - "Markdown parse errors / invalid UTF-8" - "Missing heading hierarchy (no H1/H2 etc.)" - "Garbage extraction (e.g., line breaks every word, excessive hard wraps)" - "Mixed language with no lang metadata" behavior: normalize: attempt_repairs: - "Normalize whitespace and line endings" - "Detect and unwrap hard-wrapped paragraphs heuristically" - "Infer heading levels from patterns (e.g., 1., 1.1, ALL CAPS lines) with low confidence" if_unrecoverable: - "Fall back to minimal AST: paragraphs + code blocks + raw spans" - "Mark document structure confidence = low" enforcement_in_degraded_mode: lint: run_subset: - "safety" - "sanity" - "catastrophic typography (double spaces, broken links)" downgrade_some_must_to_warn: true typeset: use_fallback_tokens: true disable_aggressive_hyphenation: true postrender: run_core_gates_only: - "overfull_lines" - "table_overflow_incidents" - "code_overflow_incidents" reporting: always_emit: - "layout-report.json" - "coverage-report.json" - "degraded-mode-report.json (what was inferred and why)"