Add VoiceDNA profile from manual replies

This commit is contained in:
danny 2025-12-24 12:09:08 +00:00
parent 817995ac6b
commit 25e306d4b0
3 changed files with 951 additions and 0 deletions

View file

@ -141,6 +141,18 @@ Generate the deeper “no raw quotes” report directly from an Instagram export
- `python3 -m sergio_instagram_messaging.generate_dm_report_detailed --export-input /path/to/export-root --out /root/tmp/dm_history_report_en_detailed.md`
## VoiceDNA (manual reply style)
`voice_dna/socialmediatorr.voice-dna.json` is a **safe-to-store** style fingerprint generated from the last 6 months of **manual (non-template) DM replies** (no raw DM quotes are included).
It also encodes a hard rule for the bot:
- Always reply in the **users input language** (English / Spanish / French / Catalan), with a short clarification if the users message is too short to detect.
Regenerate from a local Instagram export folder:
- `python3 -m sergio_instagram_messaging.generate_voice_dna --export-input /path/to/export-root --out voice_dna/socialmediatorr.voice-dna.json --owner-name "Sergio de Vocht" --window-months 6`
## Webhooks (new messages → auto-reply)
Meta webhooks are two steps:

View file

@ -0,0 +1,471 @@
from __future__ import annotations
import argparse
import json
import re
import statistics
from collections import Counter, defaultdict
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Iterable, Literal
from .analyze_instagram_export import canonicalize_text
from .generate_dm_report_detailed import _infer_owner_name, _load_events, _resolve_export_root
VoiceLang = Literal["English", "Spanish", "French", "Catalan", "Too short to tell"]
_RE_URL = re.compile(r"(?i)\b(?:https?://|www\.)\S+")
_RE_HANDLE = re.compile(r"(?<!\w)@[\w._]{2,}")
_RE_DIGIT = re.compile(r"\d")
def _now_utc_iso() -> str:
return datetime.now(timezone.utc).replace(microsecond=0).isoformat()
def _parse_iso_utc(s: str) -> datetime:
"""
Accept an ISO timestamp like:
- 2025-12-24T12:34:56Z
- 2025-12-24T12:34:56+00:00
- 2025-12-24T12:34:56
Defaults to UTC if tzinfo is missing.
"""
raw = (s or "").strip()
if not raw:
raise ValueError("empty datetime")
if raw.endswith("Z"):
raw = raw[:-1] + "+00:00"
dt = datetime.fromisoformat(raw)
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
return dt.astimezone(timezone.utc).replace(microsecond=0)
def _days_in_month(year: int, month: int) -> int:
if month in (1, 3, 5, 7, 8, 10, 12):
return 31
if month in (4, 6, 9, 11):
return 30
# February
leap = (year % 4 == 0 and year % 100 != 0) or (year % 400 == 0)
return 29 if leap else 28
def _subtract_months(dt: datetime, months: int) -> datetime:
if months <= 0:
return dt
year = dt.year
month = dt.month - months
while month <= 0:
month += 12
year -= 1
day = min(dt.day, _days_in_month(year, month))
return dt.replace(year=year, month=month, day=day)
def _tokenize(text: str) -> list[str]:
return re.findall(r"\b[\w']+\b", (text or "").lower(), flags=re.UNICODE)
def _guess_lang(text: str) -> VoiceLang:
# Keep this consistent with generate_dm_report_detailed.py
from .generate_dm_report_detailed import _guess_lang as _guess_lang_impl
return _guess_lang_impl(text)
def _is_system_new_follower_message(text: str) -> bool:
s = (text or "").strip().lower()
return s.startswith("you messaged") and "followed your account" in s
def _iter_emojis(text: str) -> Iterable[str]:
# Heuristic: treat unicode "Symbol, other" as emoji-like and ignore punctuation/formatting.
# This avoids counting typographic quotes (e.g., “ ”) and ellipses (…) as emojis.
import unicodedata
for ch in (text or ""):
if ch.isspace():
continue
cat = unicodedata.category(ch)
if cat != "So":
continue
yield ch
def _quantile(values: list[int], q: float) -> int | None:
if not values:
return None
if q <= 0:
return min(values)
if q >= 1:
return max(values)
xs = sorted(values)
idx = int(round((len(xs) - 1) * q))
return xs[max(0, min(len(xs) - 1, idx))]
def _pct(num: int, den: int) -> float:
return 0.0 if den <= 0 else (num / den) * 100.0
_GREETING_WORDS: dict[VoiceLang, set[str]] = {
"English": {"hi", "hey", "hello"},
"Spanish": {"hola", "buenas", "buenos"},
"French": {"salut", "bonjour"},
"Catalan": {"hola", "bon"},
"Too short to tell": set(),
}
_THANKS_WORDS: dict[VoiceLang, set[str]] = {
"English": {"thanks", "thank"},
"Spanish": {"gracias"},
"French": {"merci"},
"Catalan": {"gracies", "gràcies"},
"Too short to tell": set(),
}
_CTA_WORDS: dict[VoiceLang, set[str]] = {
"English": {"link", "call", "book", "ebook", "price"},
"Spanish": {"enlace", "link", "llamada", "libro", "precio"},
"French": {"lien", "appel", "livre", "prix"},
"Catalan": {"enllaç", "enllac", "trucada", "llibre", "preu"},
"Too short to tell": set(),
}
@dataclass(frozen=True)
class OutgoingSample:
ts_ms: int
canon: str
lang: VoiceLang
chars: int
words: int
has_emoji: bool
emoji: tuple[str, ...]
has_question: bool
ends_with_question: bool
has_exclamation: bool
has_linebreak: bool
has_url: bool
has_handle: bool
has_number: bool
starts_with_greeting: bool
contains_thanks: bool
contains_cta_terms: bool
def _sample_from_text(*, ts_ms: int, canon: str, text: str) -> OutgoingSample:
s = (text or "").strip()
lang = _guess_lang(s)
toks = _tokenize(s)
emojis = tuple(_iter_emojis(s))
has_question = "?" in s or "¿" in s
ends_with_question = s.endswith("?")
has_exclamation = "!" in s or "¡" in s
has_linebreak = "\n" in s
has_url = bool(_RE_URL.search(s))
has_handle = bool(_RE_HANDLE.search(s))
has_number = bool(_RE_DIGIT.search(s))
first_word = toks[0] if toks else ""
starts_with_greeting = bool(first_word and first_word in _GREETING_WORDS.get(lang, set()))
contains_thanks = bool(set(toks) & _THANKS_WORDS.get(lang, set()))
contains_cta_terms = bool(set(toks) & _CTA_WORDS.get(lang, set()))
return OutgoingSample(
ts_ms=int(ts_ms),
canon=canon,
lang=lang,
chars=len(s),
words=len(toks),
has_emoji=bool(emojis),
emoji=emojis,
has_question=has_question,
ends_with_question=ends_with_question,
has_exclamation=has_exclamation,
has_linebreak=has_linebreak,
has_url=has_url,
has_handle=has_handle,
has_number=has_number,
starts_with_greeting=starts_with_greeting,
contains_thanks=contains_thanks,
contains_cta_terms=contains_cta_terms,
)
def _lang_bucket(samples: list[OutgoingSample]) -> dict[VoiceLang, list[OutgoingSample]]:
by: dict[VoiceLang, list[OutgoingSample]] = defaultdict(list)
for s in samples:
by[s.lang].append(s)
# Ensure stable order in output (even if empty).
for lang in ("English", "Spanish", "French", "Catalan", "Too short to tell"):
by.setdefault(lang, [])
return dict(by)
def _summarize_samples(samples: list[OutgoingSample]) -> dict[str, Any]:
if not samples:
return {
"count": 0,
"length": {"chars": {}, "words": {}},
"rates": {},
"top_emojis": [],
}
chars = [s.chars for s in samples]
words = [s.words for s in samples]
emoji_counter: Counter[str] = Counter()
for s in samples:
emoji_counter.update(s.emoji)
def rates() -> dict[str, float]:
n = len(samples)
return {
"emoji_messages_pct": _pct(sum(1 for s in samples if s.has_emoji), n),
"question_messages_pct": _pct(sum(1 for s in samples if s.has_question), n),
"ends_with_question_pct": _pct(sum(1 for s in samples if s.ends_with_question), n),
"exclamation_messages_pct": _pct(sum(1 for s in samples if s.has_exclamation), n),
"linebreak_messages_pct": _pct(sum(1 for s in samples if s.has_linebreak), n),
"url_messages_pct": _pct(sum(1 for s in samples if s.has_url), n),
"handle_messages_pct": _pct(sum(1 for s in samples if s.has_handle), n),
"number_messages_pct": _pct(sum(1 for s in samples if s.has_number), n),
"starts_with_greeting_pct": _pct(sum(1 for s in samples if s.starts_with_greeting), n),
"contains_thanks_pct": _pct(sum(1 for s in samples if s.contains_thanks), n),
"contains_cta_terms_pct": _pct(sum(1 for s in samples if s.contains_cta_terms), n),
}
def length_summary(values: list[int]) -> dict[str, int | float | None]:
return {
"min": min(values) if values else None,
"p10": _quantile(values, 0.10),
"median": int(statistics.median(values)) if values else None,
"p90": _quantile(values, 0.90),
"max": max(values) if values else None,
"mean": (sum(values) / len(values)) if values else None,
}
return {
"count": len(samples),
"length": {"chars": length_summary(chars), "words": length_summary(words)},
"rates": rates(),
"top_emojis": [{"emoji": e, "count": c} for e, c in emoji_counter.most_common(20)],
}
def _choose_language_policy(supported_from_inbox: Counter[VoiceLang], supported_from_owner: Counter[VoiceLang]) -> dict[str, Any]:
supported = []
for lang in ("English", "Spanish", "French", "Catalan"):
if supported_from_inbox.get(lang, 0) > 0 or supported_from_owner.get(lang, 0) > 0:
supported.append(lang)
if not supported:
supported = ["English", "Spanish", "French", "Catalan"]
return {
"mode": "mirror_user_input_language",
"supported_languages": supported,
"rules": [
"Reply in the same language as the user's most recent message that contains enough text to classify.",
"Do not translate the user's message unless they explicitly ask for a translation.",
"Do not mix languages inside a single reply unless the user mixes languages first.",
"If the user's message is too short to classify, reuse the last confidently detected language in the same thread.",
"If there is still no signal, ask a 1-line clarification asking which language they prefer (keep it short).",
],
}
def generate_voice_dna(
*,
export_input: Path,
out_path: Path,
owner_name: str | None,
window_months: int,
response_window_hours: float,
scripted_min_count: int,
as_of_utc: datetime,
) -> Path:
export_root = _resolve_export_root(export_input)
out_path.parent.mkdir(parents=True, exist_ok=True)
if not owner_name or not owner_name.strip():
owner_name = _infer_owner_name(export_root)
if not owner_name:
raise RuntimeError("Could not infer owner name. Pass --owner-name.")
as_of_utc = as_of_utc.astimezone(timezone.utc).replace(microsecond=0)
start_utc = _subtract_months(as_of_utc, int(window_months))
start_ts_ms = int(start_utc.timestamp() * 1000)
end_ts_ms = int(as_of_utc.timestamp() * 1000)
response_window_ms = int(float(response_window_hours) * 3600 * 1000)
template_counts: Counter[str] = Counter()
inbound_lang_counts_window: Counter[VoiceLang] = Counter()
candidate_responses: list[OutgoingSample] = []
scanned_conversations = 0
scanned_message_files = 0
inbox_root = export_root / "messages" / "inbox"
for conv_dir in inbox_root.iterdir():
if not conv_dir.is_dir():
continue
scanned_conversations += 1
parts = sorted(conv_dir.glob("message*.json"), key=lambda p: p.name)
if not parts:
continue
scanned_message_files += len(parts)
events = _load_events(parts, owner_name=owner_name)
if not events:
continue
last_inbound_ts: int | None = None
for e in events:
# Track inbound language (for mirroring policy) for the same recent window.
if not e.is_owner and isinstance(e.text, str) and e.text.strip():
if start_ts_ms <= e.ts_ms <= end_ts_ms:
inbound_lang_counts_window[_guess_lang(e.text)] += 1
last_inbound_ts = e.ts_ms
continue
if not e.is_owner:
last_inbound_ts = e.ts_ms
continue
if not isinstance(e.text, str) or not e.text.strip():
continue
canon = canonicalize_text(e.text)
if canon:
template_counts[canon] += 1
# Style samples: only use manual "responses" in the recent window.
if _is_system_new_follower_message(e.text):
continue
if not (start_ts_ms <= e.ts_ms <= end_ts_ms):
continue
if last_inbound_ts is None:
continue
if e.ts_ms < last_inbound_ts:
continue
if (e.ts_ms - last_inbound_ts) > response_window_ms:
continue
if not canon:
continue
candidate_responses.append(_sample_from_text(ts_ms=e.ts_ms, canon=canon, text=e.text))
scripted_templates = {canon for canon, cnt in template_counts.items() if cnt >= int(scripted_min_count)}
manual_responses = [s for s in candidate_responses if s.canon not in scripted_templates]
owner_lang_counts = Counter([s.lang for s in manual_responses])
policy = _choose_language_policy(inbound_lang_counts_window, owner_lang_counts)
by_lang = _lang_bucket(manual_responses)
per_lang_summary = {lang: _summarize_samples(by_lang[lang]) for lang in by_lang}
voice_dna: dict[str, Any] = {
"schema_version": "voice_dna/v1",
"created_at_utc": _now_utc_iso(),
"subject": {
"account": "@socialmediatorr",
"owner_name": owner_name,
"scope": "Instagram DMs",
},
"source": {
"type": "instagram_export",
"window": {
"months": int(window_months),
"start_utc": start_utc.isoformat(),
"end_utc": as_of_utc.isoformat(),
"response_window_hours": float(response_window_hours),
},
"classification": {
"manual_reply_definition": "owner text message within response_window_hours of the most recent inbound message, excluding repeated templates",
"scripted_template_definition": f"owner canonicalized text sent >= {int(scripted_min_count)} times across full export",
"system_messages_excluded": ["you messaged <user> because they followed your account"],
},
"scan": {
"export_root_hint": export_root.name,
"scanned_conversations": scanned_conversations,
"scanned_message_files": scanned_message_files,
"candidate_responses_in_window": len(candidate_responses),
"manual_responses_in_window": len(manual_responses),
"scripted_template_count": len(scripted_templates),
},
},
"policies": {
"language": policy,
},
"language_observed": {
"inbound_last_window_counts": dict(inbound_lang_counts_window),
"manual_reply_counts": dict(owner_lang_counts),
},
"style": {
"manual_replies": {
"overall": _summarize_samples(manual_responses),
"by_language": per_lang_summary,
}
},
}
out_path.write_text(json.dumps(voice_dna, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
return out_path
def main(argv: list[str] | None = None) -> int:
ap = argparse.ArgumentParser(
description="Generate a VoiceDNA JSON file for recent manual DM replies (style fingerprint + language mirroring policy)."
)
ap.add_argument("--export-input", required=True, help="Instagram export root (contains messages/inbox)")
ap.add_argument("--out", required=True, help="output path (JSON)")
ap.add_argument("--owner-name", default=None, help='owner sender_name (e.g., "Sergio de Vocht")')
ap.add_argument("--window-months", type=int, default=6, help="how many recent months to use")
ap.add_argument(
"--response-window-hours",
type=float,
default=72.0,
help="max hours after inbound message for an outgoing message to count as a response",
)
ap.add_argument(
"--scripted-min-count",
type=int,
default=50,
help="owner canonicalized text sent >= this count is treated as scripted/templated",
)
ap.add_argument(
"--as-of-utc",
default=None,
help="analysis end time (UTC ISO); default: now",
)
args = ap.parse_args(argv)
as_of = _parse_iso_utc(args.as_of_utc) if args.as_of_utc else datetime.now(timezone.utc).replace(microsecond=0)
try:
out = generate_voice_dna(
export_input=Path(args.export_input),
out_path=Path(args.out),
owner_name=(args.owner_name.strip() if args.owner_name else None),
window_months=int(args.window_months),
response_window_hours=float(args.response_window_hours),
scripted_min_count=int(args.scripted_min_count),
as_of_utc=as_of,
)
print(json.dumps({"ok": True, "out": str(out)}, ensure_ascii=False))
return 0
except FileNotFoundError as e:
print(str(e))
return 2
except Exception as e:
print(f"VoiceDNA generation failed: {e}")
return 1
if __name__ == "__main__":
raise SystemExit(main())

View file

@ -0,0 +1,468 @@
{
"schema_version": "voice_dna/v1",
"created_at_utc": "2025-12-24T12:08:24+00:00",
"subject": {
"account": "@socialmediatorr",
"owner_name": "Sergio de Vocht",
"scope": "Instagram DMs"
},
"source": {
"type": "instagram_export",
"window": {
"months": 6,
"start_utc": "2025-06-24T12:08:20+00:00",
"end_utc": "2025-12-24T12:08:20+00:00",
"response_window_hours": 72.0
},
"classification": {
"manual_reply_definition": "owner text message within response_window_hours of the most recent inbound message, excluding repeated templates",
"scripted_template_definition": "owner canonicalized text sent >= 50 times across full export",
"system_messages_excluded": [
"you messaged <user> because they followed your account"
]
},
"scan": {
"export_root_hint": "socialmediatorr-ig-export-raw-20251224",
"scanned_conversations": 10100,
"scanned_message_files": 10061,
"candidate_responses_in_window": 18934,
"manual_responses_in_window": 825,
"scripted_template_count": 24
}
},
"policies": {
"language": {
"mode": "mirror_user_input_language",
"supported_languages": [
"English",
"Spanish",
"French",
"Catalan"
],
"rules": [
"Reply in the same language as the user's most recent message that contains enough text to classify.",
"Do not translate the user's message unless they explicitly ask for a translation.",
"Do not mix languages inside a single reply unless the user mixes languages first.",
"If the user's message is too short to classify, reuse the last confidently detected language in the same thread.",
"If there is still no signal, ask a 1-line clarification asking which language they prefer (keep it short)."
]
}
},
"language_observed": {
"inbound_last_window_counts": {
"English": 2828,
"Too short to tell": 5024,
"Spanish": 2176,
"French": 191,
"Catalan": 16
},
"manual_reply_counts": {
"Spanish": 541,
"Too short to tell": 255,
"English": 17,
"French": 1,
"Catalan": 11
}
},
"style": {
"manual_replies": {
"overall": {
"count": 825,
"length": {
"chars": {
"min": 2,
"p10": 13,
"median": 54,
"p90": 199,
"max": 928,
"mean": 87.47878787878788
},
"words": {
"min": 1,
"p10": 2,
"median": 11,
"p90": 34,
"max": 169,
"mean": 15.436363636363636
}
},
"rates": {
"emoji_messages_pct": 16.363636363636363,
"question_messages_pct": 32.484848484848484,
"ends_with_question_pct": 27.151515151515156,
"exclamation_messages_pct": 18.303030303030305,
"linebreak_messages_pct": 9.212121212121211,
"url_messages_pct": 4.121212121212121,
"handle_messages_pct": 0.0,
"number_messages_pct": 9.454545454545455,
"starts_with_greeting_pct": 4.96969696969697,
"contains_thanks_pct": 4.484848484848484,
"contains_cta_terms_pct": 4.848484848484849
},
"top_emojis": [
{
"emoji": "🙌",
"count": 41
},
{
"emoji": "🫶",
"count": 23
},
{
"emoji": "👋",
"count": 19
},
{
"emoji": "🎁",
"count": 17
},
{
"emoji": "💪",
"count": 14
},
{
"emoji": "👇",
"count": 12
},
{
"emoji": "😜",
"count": 7
},
{
"emoji": "😉",
"count": 4
},
{
"emoji": "🤣",
"count": 2
},
{
"emoji": "🙏",
"count": 2
},
{
"emoji": "⬆",
"count": 2
},
{
"emoji": "👀",
"count": 2
},
{
"emoji": "¦",
"count": 1
},
{
"emoji": "😅",
"count": 1
},
{
"emoji": "🤝",
"count": 1
},
{
"emoji": "☺",
"count": 1
},
{
"emoji": "🫡",
"count": 1
},
{
"emoji": "😁",
"count": 1
},
{
"emoji": "👌",
"count": 1
},
{
"emoji": "🚀",
"count": 1
}
]
},
"by_language": {
"Spanish": {
"count": 541,
"length": {
"chars": {
"min": 5,
"p10": 35,
"median": 99,
"p90": 211,
"max": 928,
"mean": 116.06469500924214
},
"words": {
"min": 1,
"p10": 6,
"median": 18,
"p90": 38,
"max": 169,
"mean": 20.478743068391868
}
},
"rates": {
"emoji_messages_pct": 20.70240295748614,
"question_messages_pct": 39.55637707948244,
"ends_with_question_pct": 32.34750462107209,
"exclamation_messages_pct": 16.266173752310536,
"linebreak_messages_pct": 13.67837338262477,
"url_messages_pct": 2.2181146025878005,
"handle_messages_pct": 0.0,
"number_messages_pct": 9.242144177449168,
"starts_with_greeting_pct": 7.578558225508318,
"contains_thanks_pct": 6.839186691312385,
"contains_cta_terms_pct": 7.024029574861368
},
"top_emojis": [
{
"emoji": "🙌",
"count": 24
},
{
"emoji": "🫶",
"count": 23
},
{
"emoji": "👋",
"count": 19
},
{
"emoji": "🎁",
"count": 17
},
{
"emoji": "👇",
"count": 12
},
{
"emoji": "💪",
"count": 10
},
{
"emoji": "😜",
"count": 7
},
{
"emoji": "😉",
"count": 4
},
{
"emoji": "🤣",
"count": 2
},
{
"emoji": "🙏",
"count": 2
},
{
"emoji": "⬆",
"count": 2
},
{
"emoji": "👀",
"count": 2
},
{
"emoji": "😅",
"count": 1
},
{
"emoji": "🤝",
"count": 1
},
{
"emoji": "☺",
"count": 1
},
{
"emoji": "😁",
"count": 1
},
{
"emoji": "👌",
"count": 1
},
{
"emoji": "🚀",
"count": 1
},
{
"emoji": "⏱",
"count": 1
},
{
"emoji": "🫂",
"count": 1
}
]
},
"Too short to tell": {
"count": 255,
"length": {
"chars": {
"min": 2,
"p10": 10,
"median": 30,
"p90": 53,
"max": 151,
"mean": 32.01176470588236
},
"words": {
"min": 1,
"p10": 1,
"median": 4,
"p90": 11,
"max": 29,
"mean": 5.749019607843137
}
},
"rates": {
"emoji_messages_pct": 7.8431372549019605,
"question_messages_pct": 19.607843137254903,
"ends_with_question_pct": 17.647058823529413,
"exclamation_messages_pct": 23.52941176470588,
"linebreak_messages_pct": 0.39215686274509803,
"url_messages_pct": 4.705882352941177,
"handle_messages_pct": 0.0,
"number_messages_pct": 7.0588235294117645,
"starts_with_greeting_pct": 0.0,
"contains_thanks_pct": 0.0,
"contains_cta_terms_pct": 0.0
},
"top_emojis": [
{
"emoji": "🙌",
"count": 15
},
{
"emoji": "💪",
"count": 4
},
{
"emoji": "🫡",
"count": 1
}
]
},
"English": {
"count": 17,
"length": {
"chars": {
"min": 4,
"p10": 11,
"median": 23,
"p90": 38,
"max": 40,
"mean": 23.176470588235293
},
"words": {
"min": 2,
"p10": 3,
"median": 4,
"p90": 6,
"max": 8,
"mean": 4.588235294117647
}
},
"rates": {
"emoji_messages_pct": 11.76470588235294,
"question_messages_pct": 23.52941176470588,
"ends_with_question_pct": 23.52941176470588,
"exclamation_messages_pct": 17.647058823529413,
"linebreak_messages_pct": 0.0,
"url_messages_pct": 0.0,
"handle_messages_pct": 0.0,
"number_messages_pct": 0.0,
"starts_with_greeting_pct": 0.0,
"contains_thanks_pct": 0.0,
"contains_cta_terms_pct": 11.76470588235294
},
"top_emojis": [
{
"emoji": "🙌",
"count": 2
}
]
},
"French": {
"count": 1,
"length": {
"chars": {
"min": 29,
"p10": 29,
"median": 29,
"p90": 29,
"max": 29,
"mean": 29.0
},
"words": {
"min": 5,
"p10": 5,
"median": 5,
"p90": 5,
"max": 5,
"mean": 5.0
}
},
"rates": {
"emoji_messages_pct": 100.0,
"question_messages_pct": 0.0,
"ends_with_question_pct": 0.0,
"exclamation_messages_pct": 0.0,
"linebreak_messages_pct": 0.0,
"url_messages_pct": 0.0,
"handle_messages_pct": 0.0,
"number_messages_pct": 0.0,
"starts_with_greeting_pct": 0.0,
"contains_thanks_pct": 0.0,
"contains_cta_terms_pct": 0.0
},
"top_emojis": [
{
"emoji": "¦",
"count": 1
}
]
},
"Catalan": {
"count": 11,
"length": {
"chars": {
"min": 26,
"p10": 48,
"median": 52,
"p90": 99,
"max": 205,
"mean": 72.0909090909091
},
"words": {
"min": 5,
"p10": 5,
"median": 8,
"p90": 10,
"max": 34,
"mean": 9.727272727272727
}
},
"rates": {
"emoji_messages_pct": 0.0,
"question_messages_pct": 0.0,
"ends_with_question_pct": 0.0,
"exclamation_messages_pct": 0.0,
"linebreak_messages_pct": 9.090909090909092,
"url_messages_pct": 90.9090909090909,
"handle_messages_pct": 0.0,
"number_messages_pct": 90.9090909090909,
"starts_with_greeting_pct": 0.0,
"contains_thanks_pct": 0.0,
"contains_cta_terms_pct": 0.0
},
"top_emojis": []
}
}
}
}
}