iftypeset/spec/manifest.yaml

version: "0.1.0"
registry_id: "pubstyle"
description: >
  Machine-readable style+typesetting rules for a Markdown→HTML→PDF pipeline,
  backed by primary references (Chicago / Bringhurst) and optional house rules.
  Rules are paraphrases only; sources are referenced by pointer strings.

id_naming:
  prefixes:
    CMOS: "Editorial/style usage rules derived primarily from Chicago."
    BRING: "Typographic/layout rules derived primarily from Bringhurst."
    HOUSE: "Project-specific rules not directly sourced to Chicago/Bringhurst."
  pattern: "PREFIX.DOMAIN.TOPIC[.SUBTOPIC[.DETAIL...]]"
  delimiter: "."
  casing: "UPPER_SNAKE for segments"
  stability:
    rule_ids_are_immutable: true
    rename_policy: "Deprecate old id; introduce new id; keep mapping in report diffs."
  examples:
    - "CMOS.PUNCTUATION.DASHES.EM_DASH"
    - "BRING.LAYOUT.WIDOWS_ORPHANS.AVOID"
    - "HOUSE.CITATIONS.DOI.PREFER_HTTPS"

source_pointer_scheme:
  goal: "Provide auditable traceability without reproducing sources."
  pointer_format_primary: "CMOS18 §<section> p<book_page>"
  pointer_format_secondary: "BRING §<section> p<book_page>"
  pointer_format_house: "HOUSE §<section> p<doc_page>"
  optional_scan_hint: "(scan p<pdf_page_index>)"
  allowed_page_numbering:
    - arabic
    - roman
  notes:
    - "Pointers must be sufficient for a reader with the book to locate the guidance."
    - "Never store verbatim passages; paraphrase only."
    - "If a rule depends on exact wording, rule_text must say: Exact wording required—refer to pointer."

category_taxonomy:
  - editorial
  - typography
  - layout
  - headings
  - citations
  - numbers
  - punctuation
  - abbreviations
  - links
  - tables
  - figures
  - code
  - frontmatter
  - backmatter
  - accessibility
  - i18n

profiles:
  - web_pdf
  - print_pdf
  - dense_tech
  - memo
  - slide_deck
  - webtypography_nc
  - cv_onepage

planned_rule_counts:
  target_total_range: [800, 1500]
  target_by_category:
    editorial: 120
    typography: 170
    layout: 140
    headings: 70
    citations: 140
    numbers: 90
    punctuation: 120
    abbreviations: 60
    links: 50
    tables: 60
    figures: 50
    code: 70
    frontmatter: 40
    backmatter: 40
    accessibility: 90
    i18n: 60

coverage_contract:
  must_rules:
    enforceability_requirement: >
      Every MUST rule must be enforceable by at least one of: lint, typeset, postrender;
      otherwise it must be explicitly labeled as a manual checklist item and emitted in
      a checklist output artifact.
    manual_checklist_tag: "manual_checklist=true"
    checklist_artifact: "manual-checklist.md (and JSON mirror)"
  should_rules:
    policy: "Should rules should be enforceable when practical; otherwise allowed as manual with explicit rationale."
  warn_rules:
    policy: "Warnings may be non-blocking and advisory; still require source pointers."
  enforcement_definitions:
    lint: "Static analysis over normalized Markdown/HTML AST. Deterministic."
    typeset: "CSS/tokens shaping decisions prior to rendering (pagination, keeps, hyphenation parameters)."
    postrender: "PDF/HTML layout inspection (widows/orphans, overflow, keep failures, numbering mismatches)."
    manual: "Human review; system must still produce checklist items and traceability pointers."
  ci_guardrails:
    coverage_floor:
      must_implemented_min_percent: 95
      overall_implemented_min_percent: 80
    regression_rule: "CI fails if implemented coverage decreases from main branch."

degraded_mode_contract:
  purpose: "Handle badly-structured inputs safely without crashing; still provide useful output."
  triggers:
    - "Markdown parse errors / invalid UTF-8"
    - "Missing heading hierarchy (no H1/H2 etc.)"
    - "Garbage extraction (e.g., line breaks every word, excessive hard wraps)"
    - "Mixed language with no lang metadata"
  behavior:
    normalize:
      attempt_repairs:
        - "Normalize whitespace and line endings"
        - "Detect and unwrap hard-wrapped paragraphs heuristically"
        - "Infer heading levels from patterns (e.g., 1., 1.1, ALL CAPS lines) with low confidence"
    if_unrecoverable:
      - "Fall back to minimal AST: paragraphs + code blocks + raw spans"
      - "Mark document structure confidence = low"
  enforcement_in_degraded_mode:
    lint:
      run_subset:
        - "safety"
        - "sanity"
        - "catastrophic typography (double spaces, broken links)"
      downgrade_some_must_to_warn: true
    typeset:
      use_fallback_tokens: true
      disable_aggressive_hyphenation: true
    postrender:
      run_core_gates_only:
        - "overfull_lines"
        - "table_overflow_incidents"
        - "code_overflow_incidents"
  reporting:
    always_emit:
      - "layout-report.json"
      - "coverage-report.json"
      - "degraded-mode-report.json (what was inferred and why)"