iftypeset/spec/manifest.yaml
2026-01-03 20:29:35 +00:00

142 lines
4.7 KiB
YAML

version: "0.1.0"
registry_id: "pubstyle"
description: >
Machine-readable style+typesetting rules for a Markdown→HTML→PDF pipeline,
backed by primary references (Chicago / Bringhurst) and optional house rules.
Rules are paraphrases only; sources are referenced by pointer strings.
id_naming:
prefixes:
CMOS: "Editorial/style usage rules derived primarily from Chicago."
BRING: "Typographic/layout rules derived primarily from Bringhurst."
HOUSE: "Project-specific rules not directly sourced to Chicago/Bringhurst."
pattern: "PREFIX.DOMAIN.TOPIC[.SUBTOPIC[.DETAIL...]]"
delimiter: "."
casing: "UPPER_SNAKE for segments"
stability:
rule_ids_are_immutable: true
rename_policy: "Deprecate old id; introduce new id; keep mapping in report diffs."
examples:
- "CMOS.PUNCTUATION.DASHES.EM_DASH"
- "BRING.LAYOUT.WIDOWS_ORPHANS.AVOID"
- "HOUSE.CITATIONS.DOI.PREFER_HTTPS"
source_pointer_scheme:
goal: "Provide auditable traceability without reproducing sources."
pointer_format_primary: "CMOS18 §<section> p<book_page>"
pointer_format_secondary: "BRING §<section> p<book_page>"
pointer_format_house: "HOUSE §<section> p<doc_page>"
optional_scan_hint: "(scan p<pdf_page_index>)"
allowed_page_numbering:
- arabic
- roman
notes:
- "Pointers must be sufficient for a reader with the book to locate the guidance."
- "Never store verbatim passages; paraphrase only."
- "If a rule depends on exact wording, rule_text must say: Exact wording required—refer to pointer."
category_taxonomy:
- editorial
- typography
- layout
- headings
- citations
- numbers
- punctuation
- abbreviations
- links
- tables
- figures
- code
- frontmatter
- backmatter
- accessibility
- i18n
profiles:
- web_pdf
- print_pdf
- dense_tech
- memo
- slide_deck
planned_rule_counts:
target_total_range: [800, 1500]
target_by_category:
editorial: 120
typography: 170
layout: 140
headings: 70
citations: 140
numbers: 90
punctuation: 120
abbreviations: 60
links: 50
tables: 60
figures: 50
code: 70
frontmatter: 40
backmatter: 40
accessibility: 90
i18n: 60
coverage_contract:
must_rules:
enforceability_requirement: >
Every MUST rule must be enforceable by at least one of: lint, typeset, postrender;
otherwise it must be explicitly labeled as a manual checklist item and emitted in
a checklist output artifact.
manual_checklist_tag: "manual_checklist=true"
checklist_artifact: "manual-checklist.md (and JSON mirror)"
should_rules:
policy: "Should rules should be enforceable when practical; otherwise allowed as manual with explicit rationale."
warn_rules:
policy: "Warnings may be non-blocking and advisory; still require source pointers."
enforcement_definitions:
lint: "Static analysis over normalized Markdown/HTML AST. Deterministic."
typeset: "CSS/tokens shaping decisions prior to rendering (pagination, keeps, hyphenation parameters)."
postrender: "PDF/HTML layout inspection (widows/orphans, overflow, keep failures, numbering mismatches)."
manual: "Human review; system must still produce checklist items and traceability pointers."
ci_guardrails:
coverage_floor:
must_implemented_min_percent: 95
overall_implemented_min_percent: 80
regression_rule: "CI fails if implemented coverage decreases from main branch."
degraded_mode_contract:
purpose: "Handle badly-structured inputs safely without crashing; still provide useful output."
triggers:
- "Markdown parse errors / invalid UTF-8"
- "Missing heading hierarchy (no H1/H2 etc.)"
- "Garbage extraction (e.g., line breaks every word, excessive hard wraps)"
- "Mixed language with no lang metadata"
behavior:
normalize:
attempt_repairs:
- "Normalize whitespace and line endings"
- "Detect and unwrap hard-wrapped paragraphs heuristically"
- "Infer heading levels from patterns (e.g., 1., 1.1, ALL CAPS lines) with low confidence"
if_unrecoverable:
- "Fall back to minimal AST: paragraphs + code blocks + raw spans"
- "Mark document structure confidence = low"
enforcement_in_degraded_mode:
lint:
run_subset:
- "safety"
- "sanity"
- "catastrophic typography (double spaces, broken links)"
downgrade_some_must_to_warn: true
typeset:
use_fallback_tokens: true
disable_aggressive_hyphenation: true
postrender:
run_core_gates_only:
- "overfull_lines"
- "table_overflow_incidents"
- "code_overflow_incidents"
reporting:
always_emit:
- "layout-report.json"
- "coverage-report.json"
- "degraded-mode-report.json (what was inferred and why)"