Sanitize DM reports and add Mermaid tooling
This commit is contained in:
parent
a6222083e6
commit
a140b3787a
10 changed files with 516 additions and 427 deletions
5
.gitignore
vendored
5
.gitignore
vendored
|
|
@ -7,4 +7,7 @@ venv/
|
||||||
.vscode/
|
.vscode/
|
||||||
/dist/
|
/dist/
|
||||||
/build/
|
/build/
|
||||||
|
node_modules/
|
||||||
|
npm-debug.log*
|
||||||
|
yarn-debug.log*
|
||||||
|
yarn-error.log*
|
||||||
|
|
|
||||||
|
|
@ -103,6 +103,12 @@ This produces the “Sergio persona” artifacts needed for the DM agent:
|
||||||
|
|
||||||
Outputs are written with mode `600` and may contain sensitive DM content. Keep them out of git.
|
Outputs are written with mode `600` and may contain sensitive DM content. Keep them out of git.
|
||||||
|
|
||||||
|
This repo includes **sanitized** example reports (no verbatim client DMs) under:
|
||||||
|
|
||||||
|
- `reports/socialmediatorr/`
|
||||||
|
|
||||||
|
Raw analysis artifacts (e.g., training pairs, rescued threads, template caches) should remain in a private working directory such as `/root/tmp/` and should not be committed.
|
||||||
|
|
||||||
### Analyze a raw Instagram export folder (recommended)
|
### Analyze a raw Instagram export folder (recommended)
|
||||||
|
|
||||||
Optional: index first (lets you filter recency without scanning every thread):
|
Optional: index first (lets you filter recency without scanning every thread):
|
||||||
|
|
|
||||||
174
package-lock.json
generated
Normal file
174
package-lock.json
generated
Normal file
|
|
@ -0,0 +1,174 @@
|
||||||
|
{
|
||||||
|
"name": "emo-social-insta-dm-agent-tools",
|
||||||
|
"lockfileVersion": 3,
|
||||||
|
"requires": true,
|
||||||
|
"packages": {
|
||||||
|
"": {
|
||||||
|
"name": "emo-social-insta-dm-agent-tools",
|
||||||
|
"devDependencies": {
|
||||||
|
"@mermaid-js/parser": "^0.6.3"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@chevrotain/cst-dts-gen": {
|
||||||
|
"version": "11.0.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@chevrotain/cst-dts-gen/-/cst-dts-gen-11.0.3.tgz",
|
||||||
|
"integrity": "sha512-BvIKpRLeS/8UbfxXxgC33xOumsacaeCKAjAeLyOn7Pcp95HiRbrpl14S+9vaZLolnbssPIUuiUd8IvgkRyt6NQ==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "Apache-2.0",
|
||||||
|
"dependencies": {
|
||||||
|
"@chevrotain/gast": "11.0.3",
|
||||||
|
"@chevrotain/types": "11.0.3",
|
||||||
|
"lodash-es": "4.17.21"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@chevrotain/gast": {
|
||||||
|
"version": "11.0.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@chevrotain/gast/-/gast-11.0.3.tgz",
|
||||||
|
"integrity": "sha512-+qNfcoNk70PyS/uxmj3li5NiECO+2YKZZQMbmjTqRI3Qchu8Hig/Q9vgkHpI3alNjr7M+a2St5pw5w5F6NL5/Q==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "Apache-2.0",
|
||||||
|
"dependencies": {
|
||||||
|
"@chevrotain/types": "11.0.3",
|
||||||
|
"lodash-es": "4.17.21"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/@chevrotain/regexp-to-ast": {
|
||||||
|
"version": "11.0.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@chevrotain/regexp-to-ast/-/regexp-to-ast-11.0.3.tgz",
|
||||||
|
"integrity": "sha512-1fMHaBZxLFvWI067AVbGJav1eRY7N8DDvYCTwGBiE/ytKBgP8azTdgyrKyWZ9Mfh09eHWb5PgTSO8wi7U824RA==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "Apache-2.0"
|
||||||
|
},
|
||||||
|
"node_modules/@chevrotain/types": {
|
||||||
|
"version": "11.0.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@chevrotain/types/-/types-11.0.3.tgz",
|
||||||
|
"integrity": "sha512-gsiM3G8b58kZC2HaWR50gu6Y1440cHiJ+i3JUvcp/35JchYejb2+5MVeJK0iKThYpAa/P2PYFV4hoi44HD+aHQ==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "Apache-2.0"
|
||||||
|
},
|
||||||
|
"node_modules/@chevrotain/utils": {
|
||||||
|
"version": "11.0.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@chevrotain/utils/-/utils-11.0.3.tgz",
|
||||||
|
"integrity": "sha512-YslZMgtJUyuMbZ+aKvfF3x1f5liK4mWNxghFRv7jqRR9C3R3fAOGTTKvxXDa2Y1s9zSbcpuO0cAxDYsc9SrXoQ==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "Apache-2.0"
|
||||||
|
},
|
||||||
|
"node_modules/@mermaid-js/parser": {
|
||||||
|
"version": "0.6.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/@mermaid-js/parser/-/parser-0.6.3.tgz",
|
||||||
|
"integrity": "sha512-lnjOhe7zyHjc+If7yT4zoedx2vo4sHaTmtkl1+or8BRTnCtDmcTpAjpzDSfCZrshM5bCoz0GyidzadJAH1xobA==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"langium": "3.3.1"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/chevrotain": {
|
||||||
|
"version": "11.0.3",
|
||||||
|
"resolved": "https://registry.npmjs.org/chevrotain/-/chevrotain-11.0.3.tgz",
|
||||||
|
"integrity": "sha512-ci2iJH6LeIkvP9eJW6gpueU8cnZhv85ELY8w8WiFtNjMHA5ad6pQLaJo9mEly/9qUyCpvqX8/POVUTf18/HFdw==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "Apache-2.0",
|
||||||
|
"dependencies": {
|
||||||
|
"@chevrotain/cst-dts-gen": "11.0.3",
|
||||||
|
"@chevrotain/gast": "11.0.3",
|
||||||
|
"@chevrotain/regexp-to-ast": "11.0.3",
|
||||||
|
"@chevrotain/types": "11.0.3",
|
||||||
|
"@chevrotain/utils": "11.0.3",
|
||||||
|
"lodash-es": "4.17.21"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/chevrotain-allstar": {
|
||||||
|
"version": "0.3.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/chevrotain-allstar/-/chevrotain-allstar-0.3.1.tgz",
|
||||||
|
"integrity": "sha512-b7g+y9A0v4mxCW1qUhf3BSVPg+/NvGErk/dOkrDaHA0nQIQGAtrOjlX//9OQtRlSCy+x9rfB5N8yC71lH1nvMw==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"lodash-es": "^4.17.21"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"chevrotain": "^11.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/langium": {
|
||||||
|
"version": "3.3.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/langium/-/langium-3.3.1.tgz",
|
||||||
|
"integrity": "sha512-QJv/h939gDpvT+9SiLVlY7tZC3xB2qK57v0J04Sh9wpMb6MP1q8gB21L3WIo8T5P1MSMg3Ep14L7KkDCFG3y4w==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"chevrotain": "~11.0.3",
|
||||||
|
"chevrotain-allstar": "~0.3.0",
|
||||||
|
"vscode-languageserver": "~9.0.1",
|
||||||
|
"vscode-languageserver-textdocument": "~1.0.11",
|
||||||
|
"vscode-uri": "~3.0.8"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": ">=16.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/lodash-es": {
|
||||||
|
"version": "4.17.21",
|
||||||
|
"resolved": "https://registry.npmjs.org/lodash-es/-/lodash-es-4.17.21.tgz",
|
||||||
|
"integrity": "sha512-mKnC+QJ9pWVzv+C4/U3rRsHapFfHvQFoFB92e52xeyGMcX6/OlIl78je1u8vePzYZSkkogMPJ2yjxxsb89cxyw==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
|
"node_modules/vscode-jsonrpc": {
|
||||||
|
"version": "8.2.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/vscode-jsonrpc/-/vscode-jsonrpc-8.2.0.tgz",
|
||||||
|
"integrity": "sha512-C+r0eKJUIfiDIfwJhria30+TYWPtuHJXHtI7J0YlOmKAo7ogxP20T0zxB7HZQIFhIyvoBPwWskjxrvAtfjyZfA==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=14.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/vscode-languageserver": {
|
||||||
|
"version": "9.0.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/vscode-languageserver/-/vscode-languageserver-9.0.1.tgz",
|
||||||
|
"integrity": "sha512-woByF3PDpkHFUreUa7Hos7+pUWdeWMXRd26+ZX2A8cFx6v/JPTtd4/uN0/jB6XQHYaOlHbio03NTHCqrgG5n7g==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"vscode-languageserver-protocol": "3.17.5"
|
||||||
|
},
|
||||||
|
"bin": {
|
||||||
|
"installServerIntoExtension": "bin/installServerIntoExtension"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/vscode-languageserver-protocol": {
|
||||||
|
"version": "3.17.5",
|
||||||
|
"resolved": "https://registry.npmjs.org/vscode-languageserver-protocol/-/vscode-languageserver-protocol-3.17.5.tgz",
|
||||||
|
"integrity": "sha512-mb1bvRJN8SVznADSGWM9u/b07H7Ecg0I3OgXDuLdn307rl/J3A9YD6/eYOssqhecL27hK1IPZAsaqh00i/Jljg==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"vscode-jsonrpc": "8.2.0",
|
||||||
|
"vscode-languageserver-types": "3.17.5"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/vscode-languageserver-textdocument": {
|
||||||
|
"version": "1.0.12",
|
||||||
|
"resolved": "https://registry.npmjs.org/vscode-languageserver-textdocument/-/vscode-languageserver-textdocument-1.0.12.tgz",
|
||||||
|
"integrity": "sha512-cxWNPesCnQCcMPeenjKKsOCKQZ/L6Tv19DTRIGuLWe32lyzWhihGVJ/rcckZXJxfdKCFvRLS3fpBIsV/ZGX4zA==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
|
"node_modules/vscode-languageserver-types": {
|
||||||
|
"version": "3.17.5",
|
||||||
|
"resolved": "https://registry.npmjs.org/vscode-languageserver-types/-/vscode-languageserver-types-3.17.5.tgz",
|
||||||
|
"integrity": "sha512-Ld1VelNuX9pdF39h2Hgaeb5hEZM2Z3jUrrMgWQAu82jMtZp7p3vJT3BzToKtZI7NgQssZje5o0zryOrhQvzQAg==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
|
"node_modules/vscode-uri": {
|
||||||
|
"version": "3.0.8",
|
||||||
|
"resolved": "https://registry.npmjs.org/vscode-uri/-/vscode-uri-3.0.8.tgz",
|
||||||
|
"integrity": "sha512-AyFQ0EVmsOZOlAnxoFOGOq1SQDWAB7C6aqMGS23svWAllfOaxbuFvcT8D1i8z3Gyn8fraVeZNNmN6e9bxxXkKw==",
|
||||||
|
"dev": true,
|
||||||
|
"license": "MIT"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
11
package.json
Normal file
11
package.json
Normal file
|
|
@ -0,0 +1,11 @@
|
||||||
|
{
|
||||||
|
"name": "emo-social-insta-dm-agent-tools",
|
||||||
|
"private": true,
|
||||||
|
"type": "module",
|
||||||
|
"devDependencies": {
|
||||||
|
"@mermaid-js/parser": "^0.6.3"
|
||||||
|
},
|
||||||
|
"scripts": {
|
||||||
|
"verify:mermaid": "node tools/verify_mermaid.mjs"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,163 +1,42 @@
|
||||||
# Socialmediatorr Instagram DM History — Human Readable Report (English)
|
# Instagram DM History — Short Report (English)
|
||||||
|
|
||||||
- Generated: `2025-12-24T02:28:34+00:00`
|
- Generated: `2025-12-24T02:28:34+00:00`
|
||||||
- Owner name used: `Sergio de Vocht`
|
- Inbox: `@socialmediatorr`
|
||||||
|
|
||||||
## 1) What This Dataset Represents
|
## What This Is
|
||||||
|
|
||||||
This is an all-time audit of Instagram DM conversations for `@socialmediatorr`, focused on extracting repeatable sales + support behavior so an AI agent can reply in Sergio’s style.
|
This is a short, plain-English summary of the DM history scan.
|
||||||
The analysis treats the account as a hybrid system: frequent repeated templates (likely automation/scripts) plus lower-frequency custom replies (human Sergio).
|
It avoids quoting private messages and it avoids storing personal identities.
|
||||||
|
|
||||||
## 2) High-Level Metrics (All-Time)
|
## Key Numbers
|
||||||
|
|
||||||
- Conversations analyzed: **10,061**
|
- Conversations analyzed: **10,061**
|
||||||
- Bot-only conversations: **1,883** (18.7%)
|
- Conversations that stayed template-only: **1,883** (18.7%)
|
||||||
- Human-intervened conversations: **8,153** (81.0%)
|
- Conversations that included custom replies: **8,153** (81.0%)
|
||||||
- Conversion (intent signals): **1,923** (19.1%)
|
- Buying/booking signals (weak): **1,923** (19.1%)
|
||||||
- Conversion (confirmed signals): **55** (0.5%)
|
- Buying/booking signals (strong): **55** (0.5%)
|
||||||
|
|
||||||
Notes on conversion: this uses heuristics (keywords + payment/link mentions). It is directionally useful for ranking scripts, but it is not a ground-truth revenue ledger.
|
Buying/booking signals are detected from text patterns (they are not a payment ledger).
|
||||||
|
|
||||||
## 3) Sergio Persona (From Manual/Hybrid Replies)
|
## What You Need to Know
|
||||||
|
|
||||||
- Typical reply length: median **60.0** chars (p90 **67.0**)
|
The fastest improvements come from standardizing answers to repeated questions and sending them in the right time blocks.
|
||||||
- Questions: **2.4%** | Exclamations: **1.7%** | Emoji: **0.0%**
|
For the full deep report (CET timing, day-of-week patterns, Top 20 questions, and concrete actions), read:
|
||||||
- Language guess (manual replies): en=8043, es=423, unknown=224
|
|
||||||
|
|
||||||
Practical implication for an agent: short, direct replies; minimal punctuation; bilingual capability; low/no emoji usage.
|
- `reports/socialmediatorr/dm_history_report_en_detailed.md`
|
||||||
|
|
||||||
## 4) Bot vs Human Segmentation (What It Means)
|
## Useful Inventory (Safe Counts Only)
|
||||||
|
|
||||||
- **[BOT]** = outgoing message template repeated frequently (>= configured threshold).
|
- Total outgoing templates detected: **8,550**
|
||||||
- **[MANUAL]** = outgoing message that is rare/unique (<= configured threshold).
|
- High-frequency repeat templates: **24**
|
||||||
- **[HYBRID]** = messages that look like a bot template but with manual edits (prefix match/similarity).
|
- “Rescue” events detected: **7**
|
||||||
|
- Training pairs (user → reply) available: **524**
|
||||||
|
|
||||||
This separation is the foundation for: (1) extracting safe reusable scripts, and (2) extracting human-only replies as training data for a RAG or fine-tune.
|
## What You Do Not Need to Know
|
||||||
|
|
||||||
## 5) Top Detected Script Templates (Canonicalized)
|
Do not store or copy these into an automation system unless you have a clear operational reason:
|
||||||
|
- Names, handles, phone numbers, emails.
|
||||||
- BOT #1: sent **2495**× — `crees que es necesario hoy en dã a llevar a cabo este desarrollo colectivo`
|
- Full conversation transcripts for every thread.
|
||||||
- BOT #2: sent **2483**× — `perfecto aquã no hablamos de â mejorar solo a uno mismoâ sino de algo mucho mã s profundo estar bien con los demã s para poder estar bien contigo mism`
|
- Photos, videos, audio, and other attachments.
|
||||||
- BOT #3: sent **2483**× — `te lo dejo por aquã dame un minuto`
|
- One-off edge cases that never repeat.
|
||||||
- BOT #4: sent **2483**× — `me gustarã a saber tu opiniã³n`
|
|
||||||
- BOT #5: sent **1878**× — `me alegro de que quieras seguir aprendiendo ð ª te dejo por aquã el ebook â <NUM> conceptos de desarrollo personal que te estã n quitando paz`
|
|
||||||
- BOT #6: sent **1878**× — `no es para que lo leas en modo teorã a sino para que puedas detectar ideas que llevas tiempo aplicando y que sin darte cuenta estã n influyendo en tus relacione`
|
|
||||||
- BOT #7: sent **706**× — `gracias por ese feedback ð`
|
|
||||||
- BOT #8: sent **706**× — `como agradecimiento por seguirme quiero regalarte un video exclusivo que te ayude a empezar este cambio dime â dã³nde sientes que hay mã s conflicto ãºltimament`
|
|
||||||
- BOT #9: sent **680**× — `you sent a private reply to a comment on your instagram post`
|
|
||||||
- BOT #10: sent **469**× — `por cierto`
|
|
||||||
|
|
||||||
## 6) Human Reply Library (Rare/Manual Examples, Canonicalized)
|
|
||||||
|
|
||||||
- MANUAL-ish #1: seen **10**× — `quã bonito leer eso a veces entender las palabras abre puertas nuevas â sientes que en tu entorno hay algo que te gustarã a armonizar mã s`
|
|
||||||
- MANUAL-ish #2: seen **7**× — `buenas aquã sergio ð gracias por responder he preparado un video con muchã simo valor para ayudarte a acabar con esos desbordes y vivir en paz solo tienes que c`
|
|
||||||
- MANUAL-ish #3: seen **5**× — `hola buenas como estas ð espero que estã s bien me gustarã a saber que es lo q te ha echo estar aquã y querer saber mã s sobre nuestras formaciã³n`
|
|
||||||
- MANUAL-ish #4: seen **5**× — `y si pudieras resolver esto cã³mo crees que cambiarã a tu forma de relacionarte o sentirte`
|
|
||||||
- MANUAL-ish #5: seen **5**× — `para conocerte un poquito mã s que te gustarã a conseguir con emosocial cual es tu mayor desafã o actualmente dentro de tus relaciones`
|
|
||||||
- MANUAL-ish #6: seen **4**× — `okey te entiendo perfectamente ð segãºn lo que me comentas creo que esta lista de videos de youtube te va a venir genial para empezar a entender las bases del c`
|
|
||||||
- MANUAL-ish #7: seen **4**× — `buenas aquã sergio ð gracias por responder he preparado un video con muchã simo valor para ayudarte a acabar con esos desbordes y vivir en paz solo tienes que c`
|
|
||||||
- MANUAL-ish #8: seen **3**× — `hola buenas como estas espero que bien cuã ntame que te parece el contenido que estamos ofreciendo por whatsapp te leoð ð`
|
|
||||||
|
|
||||||
## 7) Bot Template Performance (Reply/Conversion Heuristics)
|
|
||||||
|
|
||||||
These come from `bot_performance_audit.csv` and are computed per canonical bot template.
|
|
||||||
|
|
||||||
### Most-used bot templates (by volume)
|
|
||||||
- sent=2495 reply_rate=0.376 intent_rate=0.0766 confirmed_rate=0.012 — `crees que es necesario hoy en dã a llevar a cabo este desarrollo colectivo`
|
|
||||||
- sent=2483 reply_rate=0.0334 intent_rate=0.0769 confirmed_rate=0.0121 — `perfecto aquã no hablamos de â mejorar solo a uno mismoâ sino de algo mucho mã s profundo estar bien con los demã s para poder estar bien co`
|
|
||||||
- sent=2483 reply_rate=0.1188 intent_rate=0.0769 confirmed_rate=0.0121 — `te lo dejo por aquã dame un minuto`
|
|
||||||
- sent=2483 reply_rate=0.0028 intent_rate=0.0769 confirmed_rate=0.0121 — `me gustarã a saber tu opiniã³n`
|
|
||||||
- sent=1878 reply_rate=0.0 intent_rate=0.0 confirmed_rate=0.0005 — `me alegro de que quieras seguir aprendiendo ð ª te dejo por aquã el ebook â <NUM> conceptos de desarrollo personal que te estã n quitando pa`
|
|
||||||
- sent=1878 reply_rate=0.1768 intent_rate=0.0 confirmed_rate=0.0005 — `no es para que lo leas en modo teorã a sino para que puedas detectar ideas que llevas tiempo aplicando y que sin darte cuenta estã n influye`
|
|
||||||
- sent=706 reply_rate=0.0042 intent_rate=0.1048 confirmed_rate=0.017 — `gracias por ese feedback ð`
|
|
||||||
- sent=706 reply_rate=0.8187 intent_rate=0.1048 confirmed_rate=0.017 — `como agradecimiento por seguirme quiero regalarte un video exclusivo que te ayude a empezar este cambio dime â dã³nde sientes que hay mã s c`
|
|
||||||
|
|
||||||
### Best reply-rate bot templates
|
|
||||||
- reply_rate=0.8187 sent=706 — `como agradecimiento por seguirme quiero regalarte un video exclusivo que te ayude a empezar este cambio dime â dã³nde sientes que hay mã s c`
|
|
||||||
- reply_rate=0.7143 sent=98 — `pudiste entrar correctamente`
|
|
||||||
- reply_rate=0.7022 sent=178 — `por favor toca una de las siguientes opciones ð`
|
|
||||||
- reply_rate=0.4701 sent=134 — `pudiste verlo`
|
|
||||||
- reply_rate=0.4602 sent=176 — `que te pareciã³ ese diccionario hay alguna palabra que sueles utilizar y no te habã as dado cuenta`
|
|
||||||
- reply_rate=0.376 sent=2495 — `crees que es necesario hoy en dã a llevar a cabo este desarrollo colectivo`
|
|
||||||
- reply_rate=0.3458 sent=240 — `gracias por tu sinceridad ð`
|
|
||||||
- reply_rate=0.3291 sent=158 — `te dejo este video donde explico por quã las relaciones de pareja entran en conflicto aunque haya amor`
|
|
||||||
|
|
||||||
### Worst reply-rate bot templates
|
|
||||||
- reply_rate=0.0 sent=1878 — `me alegro de que quieras seguir aprendiendo ð ª te dejo por aquã el ebook â <NUM> conceptos de desarrollo personal que te estã n quitando pa`
|
|
||||||
- reply_rate=0.0 sent=337 — `enhorabuena por querer dar ese cambio estã s a un paso de transformar tu relaciã³n en solo <NUM> dã as te invito a un taller exclusivo donde`
|
|
||||||
- reply_rate=0.0 sent=158 — `gracias por compartirlo â ï`
|
|
||||||
- reply_rate=0.0 sent=131 — `entiendo perfectamente ð`
|
|
||||||
- reply_rate=0.0 sent=54 — `this account can t receive your message because they don t allow new message requests from everyone`
|
|
||||||
- reply_rate=0.0028 sent=2483 — `me gustarã a saber tu opiniã³n`
|
|
||||||
- reply_rate=0.0042 sent=706 — `gracias por ese feedback ð`
|
|
||||||
- reply_rate=0.0334 sent=2483 — `perfecto aquã no hablamos de â mejorar solo a uno mismoâ sino de algo mucho mã s profundo estar bien con los demã s para poder estar bien co`
|
|
||||||
|
|
||||||
## 8) Objections → Best Sergio Replies (Playbook)
|
|
||||||
|
|
||||||
### price
|
|
||||||
- (1) Ey Alex que tal
|
|
||||||
- (1) Qué bonito leer eso. A veces entender las palabras abre puertas nuevas. ¿Sientes que en tu entorno hay algo que te gustarÃa armonizar más?
|
|
||||||
- (1) Y que es lo que te impide dar ese cambio? Te veo con mucha seguridad
|
|
||||||
### time
|
|
||||||
- (1) Brutal esto que dices
|
|
||||||
- (1) No es una herida ELA! Apego que no te dieron tus padres es solo una parte del espectro, necesitamos validación del mundo y de forma constante, no es una herida del pasado es algo que falta darnos en el presente.
|
|
||||||
- (1) Vaya, suena bastante frustrante el hecho de querer "bajar esa guardia", y sentir que cuando lo haces, todo cambia
|
|
||||||
### trust
|
|
||||||
- (2) Hola Dani, gracias por el mensaje bonito, de verdad. Me alegra mucho saber que el contenido te está ayudando a mirar las cosas desde otro punto de vista
|
|
||||||
- (2) Qué bonito leer eso. A veces entender las palabras abre puertas nuevas. ¿Sientes que en tu entorno hay algo que te gustarÃa armonizar más?
|
|
||||||
- (2) En la plataforma no sale por ningún lugar, y normalmente siempre llegan 2 emails, 1 de confirmación de pago y otro de bienvenida
|
|
||||||
|
|
||||||
## 9) Rescue / Save Logic (Human Intervention After Silence/Negativity)
|
|
||||||
|
|
||||||
- Rescue events detected (heuristic): **7**
|
|
||||||
A “rescue” is when a manual/hybrid owner message follows either (a) a user negative signal, or (b) >24h silence after a bot message, and the thread later shows a confirmed conversion signal.
|
|
||||||
|
|
||||||
## 10) Product / Offer Evolution (Eras)
|
|
||||||
|
|
||||||
This is inferred from mentions of pricing/currency + offer terms (e.g., call/audit/coaching) and summarized quarterly.
|
|
||||||
|
|
||||||
Recent quarters (top extracted offer signals):
|
|
||||||
- stripe(1)
|
|
||||||
- book(1912); ebook(1912); call(8); calendly(7); coaching(2); stripe(2); pdf(2); paypal(1)
|
|
||||||
|
|
||||||
## 11) Charts
|
|
||||||
|
|
||||||
- Bot fatigue (weekly reply rate to the dominant bot script): `bot_fatigue_chart.png`
|
|
||||||
|
|
||||||

|
|
||||||
- Editorial timeline (top bot scripts vs conversions): `editorial_timeline.png`
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
## 12) What To Build From This (Agent Requirements)
|
|
||||||
|
|
||||||
### Core behavior
|
|
||||||
- Start with top bot templates for predictable openers and FAQ-style flows.
|
|
||||||
- Switch to Sergio-style manual patterns on objections, negotiation, or when conversation stalls.
|
|
||||||
- Use a rescue cadence (time-based triggers) after silence.
|
|
||||||
|
|
||||||
### Data products to drive the agent
|
|
||||||
- Training pairs (manual-only, converted threads): `/root/tmp/socialmediatorr-agent-analysis-alltime-20251224T024000Z/training_pairs.jsonl` (rows: ~524)
|
|
||||||
- Objection handlers: `/root/tmp/socialmediatorr-agent-analysis-alltime-20251224T024000Z/objection_handlers.json`
|
|
||||||
- Rescue playbook: `/root/tmp/socialmediatorr-agent-analysis-alltime-20251224T024000Z/rescue_playbook.json`
|
|
||||||
- Script templates + editorial drift: `/root/tmp/socialmediatorr-agent-analysis-alltime-20251224T024000Z/top_outgoing_templates.json`
|
|
||||||
|
|
||||||
### Safety boundaries (recommended)
|
|
||||||
- Never request or store passwords/2FA codes.
|
|
||||||
- Avoid medical/legal/financial advice; redirect to a call or a human.
|
|
||||||
- If user asks to move off-platform, follow Sergio’s historical policy and business rules.
|
|
||||||
|
|
||||||
## 13) What We Do NOT Need To Know (Ignore / Do Not Store)
|
|
||||||
|
|
||||||
- Exact client identities (names, handles, phone numbers, emails) unless required for operational routing.
|
|
||||||
- Media attachments (photos/videos/audio) for persona cloning; they add storage cost and privacy risk.
|
|
||||||
- Full verbatim message dumps for every thread; for RAG you only need high-quality pairs and playbook snippets.
|
|
||||||
- Individual one-off edge cases that never repeat (unless they represent a safety boundary).
|
|
||||||
- Internal Meta export folder structure details beyond `messages/inbox/**/message*.json`.
|
|
||||||
|
|
||||||
## 14) Caveats / Gaps
|
|
||||||
|
|
||||||
- The export does not reliably label ManyChat vs Human; bot/human is inferred by repetition and similarity.
|
|
||||||
- Conversion is heuristic; integrate Stripe/Calendly/CRM events if you want ground-truth attribution.
|
|
||||||
- Language detection is heuristic; improve it if you need precise bilingual routing.
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,25 +1,22 @@
|
||||||
# Socialmediatorr Instagram DM History : Plain-English Deep Report
|
# Instagram DM History — Plain-English Deep Report
|
||||||
|
|
||||||
## DM History Deep Report
|
## What This Is
|
||||||
|
|
||||||
**Subject:** Instagram direct messages for `@socialmediatorr`
|
**Inbox:** `@socialmediatorr`
|
||||||
**Version:** v1.0 (STYLE BIBLE EN 3.0GM)
|
|
||||||
**Date:** 2025-12-24
|
**Date:** 2025-12-24
|
||||||
**Status:** REVIEW REQUIRED
|
**Time zone used:** CET
|
||||||
**Citation:** `if://report/socialmediatorr/instagram/dm-history/`
|
|
||||||
**Author:** Danny Stocker | InfraFabric Research
|
|
||||||
|
|
||||||
### How This Report Was Made
|
### How This Report Was Made
|
||||||
|
|
||||||
> This is an automated count of patterns. It is not a therapy note and it is not a sales ledger.
|
> This is a count of patterns. It is not a therapy note and it is not a sales ledger.
|
||||||
|
|
||||||
This document was generated by reading an Instagram data export and counting repeat patterns over time. It avoids quoting private client messages and it avoids storing personal identities.
|
This document was generated by reading an Instagram data export and counting repeat patterns over time. It avoids quoting private client messages and it avoids storing personal identities.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
**Context:** This inbox contains a high-volume message-and-reply system over 429 days.
|
**Context:** This inbox contains message history over 429 days.
|
||||||
|
|
||||||
> Your messaging system is working as a volume engine. The weak point is consistency at the moments where people ask to buy or book.
|
> The system works at scale. The weak point is the “next step” moments: when people ask what to do, what it costs, or where to get it.
|
||||||
|
|
||||||
The purpose of this report is practical: define what to keep, what to remove, and what to automate safely—without damaging trust.
|
The purpose of this report is practical: define what to keep, what to remove, and what to automate safely—without damaging trust.
|
||||||
|
|
||||||
|
|
@ -35,7 +32,7 @@ Across the observed window, you sent a very large number of messages and you rec
|
||||||
| Total messages | 54,069 | Instagram export |
|
| Total messages | 54,069 | Instagram export |
|
||||||
| Messages you sent | 43,607 | Instagram export |
|
| Messages you sent | 43,607 | Instagram export |
|
||||||
| Messages people sent you | 10,462 | Instagram export |
|
| Messages people sent you | 10,462 | Instagram export |
|
||||||
| Messages that look like a question or a request | 2,713 | Instagram export |
|
| Messages that look like a question or a request | 2,715 | Instagram export |
|
||||||
| System messages about new followers (auto text in the inbox) | 8,081 | Instagram export |
|
| System messages about new followers (auto text in the inbox) | 8,081 | Instagram export |
|
||||||
|
|
||||||
### What You Need to Know (In Plain English)
|
### What You Need to Know (In Plain English)
|
||||||
|
|
@ -67,7 +64,7 @@ To avoid guesswork, we start with 3-month blocks (a simple way to smooth noise),
|
||||||
| 2025 Jan-Mar | 21 | 0 | 0 |
|
| 2025 Jan-Mar | 21 | 0 | 0 |
|
||||||
| 2025 Apr-Jun | 92 | 97 | 15 |
|
| 2025 Apr-Jun | 92 | 97 | 15 |
|
||||||
| 2025 Jul-Sep | 623 | 882 | 89 |
|
| 2025 Jul-Sep | 623 | 882 | 89 |
|
||||||
| 2025 Oct-Dec | 9,712 | 42,628 | 2,609 |
|
| 2025 Oct-Dec | 9,712 | 42,628 | 2,611 |
|
||||||
|
|
||||||
Same data as charts:
|
Same data as charts:
|
||||||
|
|
||||||
|
|
@ -112,8 +109,8 @@ This month-by-month table is the clearest view of how the inbox changed over tim
|
||||||
| 2025-08 | 193 | 230 | 28 | 50.0% |
|
| 2025-08 | 193 | 230 | 28 | 50.0% |
|
||||||
| 2025-09 | 284 | 330 | 24 | 20.8% |
|
| 2025-09 | 284 | 330 | 24 | 20.8% |
|
||||||
| 2025-10 | 787 | 1,190 | 64 | 17.2% |
|
| 2025-10 | 787 | 1,190 | 64 | 17.2% |
|
||||||
| 2025-11 | 854 | 2,194 | 149 | 46.3% |
|
| 2025-11 | 854 | 2,194 | 150 | 46.7% |
|
||||||
| 2025-12 | 8,071 | 39,244 | 2,396 | 89.6% |
|
| 2025-12 | 8,071 | 39,244 | 2,397 | 89.7% |
|
||||||
|
|
||||||
The busiest month was **2025-12** with **47,315** messages total (87.5% of everything in this export). That single month dominates the shape of the data.
|
The busiest month was **2025-12** with **47,315** messages total (87.5% of everything in this export). That single month dominates the shape of the data.
|
||||||
|
|
||||||
|
|
@ -126,7 +123,7 @@ Use this to time follow-ups and first messages. Do not spread effort evenly acro
|
||||||
| Day of week | Messages from people | Messages you sent | Questions/requests |
|
| Day of week | Messages from people | Messages you sent | Questions/requests |
|
||||||
|---|---:|---:|---:|
|
|---|---:|---:|---:|
|
||||||
| Monday | 1,600 | 8,359 | 131 |
|
| Monday | 1,600 | 8,359 | 131 |
|
||||||
| Tuesday | 1,939 | 9,654 | 192 |
|
| Tuesday | 1,939 | 9,654 | 194 |
|
||||||
| Wednesday | 1,282 | 5,554 | 159 |
|
| Wednesday | 1,282 | 5,554 | 159 |
|
||||||
| Thursday | 2,261 | 6,908 | 1,268 |
|
| Thursday | 2,261 | 6,908 | 1,268 |
|
||||||
| Friday | 1,705 | 5,733 | 803 |
|
| Friday | 1,705 | 5,733 | 803 |
|
||||||
|
|
@ -184,7 +181,7 @@ One caution: “fast replies” are often repeat messages. This section shows ov
|
||||||
| Typical time to reply to questions/requests | 2 seconds | Instagram export |
|
| Typical time to reply to questions/requests | 2 seconds | Instagram export |
|
||||||
| Slow end for questions/requests (90% are faster) | 4 seconds | Instagram export |
|
| Slow end for questions/requests (90% are faster) | 4 seconds | Instagram export |
|
||||||
| Messages from people answered within 48 hours | 7,467 (71.4%) | Instagram export |
|
| Messages from people answered within 48 hours | 7,467 (71.4%) | Instagram export |
|
||||||
| Questions/requests answered within 48 hours | 2,278 (84.0%) | Instagram export |
|
| Questions/requests answered within 48 hours | 2,280 (84.0%) | Instagram export |
|
||||||
|
|
||||||
Breakdown by message type (repeat messages vs custom messages):
|
Breakdown by message type (repeat messages vs custom messages):
|
||||||
|
|
||||||
|
|
@ -242,11 +239,11 @@ This list is grouped by meaning (not by exact wording). It includes very short r
|
||||||
| Rank | Topic (plain English) | Count | Share of all questions/requests |
|
| Rank | Topic (plain English) | Count | Share of all questions/requests |
|
||||||
|---:|---|---:|---:|
|
|---:|---|---:|---:|
|
||||||
| 1 | Just one word: book | 1,857 | 68.4% |
|
| 1 | Just one word: book | 1,857 | 68.4% |
|
||||||
| 2 | What is this? | 206 | 7.6% |
|
| 2 | What is this? | 203 | 7.5% |
|
||||||
| 3 | Can you send the video? | 191 | 7.0% |
|
| 3 | Can you send the video? | 189 | 7.0% |
|
||||||
| 4 | Other question | 120 | 4.4% |
|
| 4 | Other question | 118 | 4.3% |
|
||||||
| 5 | Can you help me? | 74 | 2.7% |
|
| 5 | Can you help me? | 74 | 2.7% |
|
||||||
| 6 | Can you send the link? | 61 | 2.2% |
|
| 6 | Can you send the link? | 70 | 2.6% |
|
||||||
| 7 | What does it cost? | 53 | 2.0% |
|
| 7 | What does it cost? | 53 | 2.0% |
|
||||||
| 8 | Is this therapy? | 44 | 1.6% |
|
| 8 | Is this therapy? | 44 | 1.6% |
|
||||||
| 9 | Where do I get the book? | 36 | 1.3% |
|
| 9 | Where do I get the book? | 36 | 1.3% |
|
||||||
|
|
@ -261,12 +258,12 @@ This list is grouped by meaning (not by exact wording). It includes very short r
|
||||||
| 18 | Can I get a refund? | 1 | 0.0% |
|
| 18 | Can I get a refund? | 1 | 0.0% |
|
||||||
| 19 | How long does it take? | 1 | 0.0% |
|
| 19 | How long does it take? | 1 | 0.0% |
|
||||||
|
|
||||||
In plain terms: **1,893** of **2,713** questions/requests are about the book (69.8%).
|
In plain terms: **1,893** of **2,715** questions/requests are about the book (69.7%).
|
||||||
|
|
||||||
```mermaid
|
```mermaid
|
||||||
pie title Questions/Requests: Book vs Everything Else
|
pie title Questions/Requests: Book vs Everything Else
|
||||||
"Book" : 1893
|
"Book" : 1893
|
||||||
"Everything else" : 820
|
"Everything else" : 822
|
||||||
```
|
```
|
||||||
|
|
||||||
### Content Patterns (What You Mention When You Sell)
|
### Content Patterns (What You Mention When You Sell)
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,8 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import csv
|
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import statistics
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
@ -21,11 +19,6 @@ def _load_json(path: Path) -> dict[str, Any]:
|
||||||
return json.loads(path.read_text(encoding="utf-8", errors="replace"))
|
return json.loads(path.read_text(encoding="utf-8", errors="replace"))
|
||||||
|
|
||||||
|
|
||||||
def _read_csv(path: Path) -> list[dict[str, str]]:
|
|
||||||
with path.open("r", encoding="utf-8", newline="") as f:
|
|
||||||
return list(csv.DictReader(f))
|
|
||||||
|
|
||||||
|
|
||||||
def _count_jsonl(path: Path, *, max_lines: int = 5_000_000) -> int:
|
def _count_jsonl(path: Path, *, max_lines: int = 5_000_000) -> int:
|
||||||
n = 0
|
n = 0
|
||||||
with path.open("r", encoding="utf-8", errors="replace") as f:
|
with path.open("r", encoding="utf-8", errors="replace") as f:
|
||||||
|
|
@ -36,284 +29,115 @@ def _count_jsonl(path: Path, *, max_lines: int = 5_000_000) -> int:
|
||||||
return n
|
return n
|
||||||
|
|
||||||
|
|
||||||
def _pct(x: float) -> str:
|
def _pct(num: int, den: int) -> str:
|
||||||
return f"{x*100:.1f}%"
|
return "n/a" if den <= 0 else f"{(num/den)*100:.1f}%"
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class ReportInputs:
|
class ReportInputs:
|
||||||
summary: Path
|
summary: Path
|
||||||
templates: Path
|
templates: Path
|
||||||
bot_audit: Path
|
|
||||||
objections: Path
|
|
||||||
rescue: Path
|
rescue: Path
|
||||||
eras: Path
|
|
||||||
training_pairs: Path
|
training_pairs: Path
|
||||||
fatigue_png: Path
|
|
||||||
editorial_png: Path
|
|
||||||
|
|
||||||
|
|
||||||
def _resolve_inputs(analysis_dir: Path) -> ReportInputs:
|
def _resolve_inputs(analysis_dir: Path) -> ReportInputs:
|
||||||
return ReportInputs(
|
return ReportInputs(
|
||||||
summary=analysis_dir / "summary.json",
|
summary=analysis_dir / "summary.json",
|
||||||
templates=analysis_dir / "top_outgoing_templates.json",
|
templates=analysis_dir / "top_outgoing_templates.json",
|
||||||
bot_audit=analysis_dir / "bot_performance_audit.csv",
|
|
||||||
objections=analysis_dir / "objection_handlers.json",
|
|
||||||
rescue=analysis_dir / "rescue_playbook.json",
|
rescue=analysis_dir / "rescue_playbook.json",
|
||||||
eras=analysis_dir / "sergio_eras.csv",
|
|
||||||
training_pairs=analysis_dir / "training_pairs.jsonl",
|
training_pairs=analysis_dir / "training_pairs.jsonl",
|
||||||
fatigue_png=analysis_dir / "bot_fatigue_chart.png",
|
|
||||||
editorial_png=analysis_dir / "editorial_timeline.png",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def generate_report(*, analysis_dir: Path, out_path: Path) -> Path:
|
def generate_report(*, analysis_dir: Path, out_path: Path) -> Path:
|
||||||
inp = _resolve_inputs(analysis_dir)
|
inp = _resolve_inputs(analysis_dir)
|
||||||
for p in inp.__dict__.values():
|
if not inp.summary.exists():
|
||||||
if not Path(p).exists():
|
raise FileNotFoundError(str(inp.summary))
|
||||||
raise FileNotFoundError(str(p))
|
|
||||||
|
|
||||||
summary = _load_json(inp.summary)
|
summary = _load_json(inp.summary)
|
||||||
templates = _load_json(inp.templates)
|
|
||||||
objections = _load_json(inp.objections)
|
|
||||||
rescues = _load_json(inp.rescue)
|
|
||||||
bot_audit = _read_csv(inp.bot_audit)
|
|
||||||
|
|
||||||
owner = summary.get("owner_name") or "Unknown"
|
|
||||||
conv = summary.get("conversations") or {}
|
conv = summary.get("conversations") or {}
|
||||||
conv_total = int(conv.get("total") or 0)
|
conv_total = int(conv.get("total") or 0)
|
||||||
bot_only = int(conv.get("bot_only") or 0)
|
template_only = int(conv.get("bot_only") or 0)
|
||||||
human = int(conv.get("human_intervened") or 0)
|
custom_replies = int(conv.get("human_intervened") or 0)
|
||||||
conversions = summary.get("conversions") or {}
|
|
||||||
conv_intent = int(conversions.get("intent") or 0)
|
|
||||||
conv_confirmed = int(conversions.get("confirmed") or 0)
|
|
||||||
|
|
||||||
bot_only_rate = (bot_only / conv_total) if conv_total else 0.0
|
buying = summary.get("conversions") or {}
|
||||||
human_rate = (human / conv_total) if conv_total else 0.0
|
buying_weak = int(buying.get("intent") or 0)
|
||||||
intent_rate = (conv_intent / conv_total) if conv_total else 0.0
|
buying_strong = int(buying.get("confirmed") or 0)
|
||||||
confirmed_rate = (conv_confirmed / conv_total) if conv_total else 0.0
|
|
||||||
|
|
||||||
manual_style = summary.get("manual_style") or {}
|
templates_total = None
|
||||||
median_len = manual_style.get("median_len_chars")
|
templates_repeat = None
|
||||||
p90_len = manual_style.get("p90_len_chars")
|
if inp.templates.exists():
|
||||||
question_rate = float(manual_style.get("question_rate") or 0.0)
|
t = _load_json(inp.templates)
|
||||||
exclaim_rate = float(manual_style.get("exclaim_rate") or 0.0)
|
templates_total = int(t.get("templates_total") or 0)
|
||||||
emoji_rate = float(manual_style.get("emoji_rate") or 0.0)
|
templates_repeat = int(t.get("bot_templates") or 0)
|
||||||
lang_guess = manual_style.get("lang_guess") or {}
|
|
||||||
|
|
||||||
# Templates: prefer canonical strings (safe-ish) and avoid raw samples.
|
rescue_count = None
|
||||||
top_templates = templates.get("top_templates") or []
|
if inp.rescue.exists():
|
||||||
top_bot = [t for t in top_templates if isinstance(t, dict) and t.get("label_hint") == "bot"]
|
|
||||||
top_manual = [t for t in top_templates if isinstance(t, dict) and t.get("label_hint") == "manual"]
|
|
||||||
|
|
||||||
# Bot audit: best/worst by reply_rate.
|
|
||||||
def fnum(v: str | None) -> float:
|
|
||||||
try:
|
try:
|
||||||
return float(v or 0)
|
rescue = _load_json(inp.rescue)
|
||||||
|
rescue_count = len(rescue) if isinstance(rescue, list) else 0
|
||||||
except Exception:
|
except Exception:
|
||||||
return 0.0
|
rescue_count = None
|
||||||
|
|
||||||
bot_audit_sorted = sorted(bot_audit, key=lambda r: fnum(r.get("sent")), reverse=True)
|
pairs_count = _count_jsonl(inp.training_pairs, max_lines=2_000_000) if inp.training_pairs.exists() else None
|
||||||
top_audit = bot_audit_sorted[:10]
|
|
||||||
best_reply = sorted(bot_audit, key=lambda r: fnum(r.get("reply_rate")), reverse=True)[:10]
|
|
||||||
worst_reply = sorted(bot_audit, key=lambda r: fnum(r.get("reply_rate")))[:10]
|
|
||||||
|
|
||||||
# Objections: most common replies per category.
|
generated_at = summary.get("generated_at") if isinstance(summary.get("generated_at"), str) else None
|
||||||
objection_blocks: list[str] = []
|
|
||||||
if isinstance(objections, dict):
|
|
||||||
for cat in ("price", "time", "trust", "stop"):
|
|
||||||
replies = objections.get(cat) or []
|
|
||||||
if not isinstance(replies, list) or not replies:
|
|
||||||
continue
|
|
||||||
top3 = []
|
|
||||||
for r in replies[:3]:
|
|
||||||
if not isinstance(r, dict):
|
|
||||||
continue
|
|
||||||
top3.append(f"- ({r.get('count')}) {r.get('reply')}")
|
|
||||||
if top3:
|
|
||||||
objection_blocks.append(f"### {cat}\n" + "\n".join(top3))
|
|
||||||
|
|
||||||
rescue_count = len(rescues) if isinstance(rescues, list) else 0
|
report: list[str] = []
|
||||||
pairs_count = _count_jsonl(inp.training_pairs, max_lines=2_000_000)
|
report.append("# Instagram DM History — Short Report (English)")
|
||||||
|
|
||||||
# Era summary: simple high-level notes.
|
|
||||||
eras_rows = _read_csv(inp.eras)
|
|
||||||
era_recent = eras_rows[-6:] if len(eras_rows) > 6 else eras_rows
|
|
||||||
era_offer_terms: list[str] = []
|
|
||||||
for row in era_recent:
|
|
||||||
offers = (row.get("top_offers") or "").strip()
|
|
||||||
if offers:
|
|
||||||
era_offer_terms.append(offers)
|
|
||||||
|
|
||||||
# A few derived notes.
|
|
||||||
lang_line = ", ".join(f"{k}={v}" for k, v in lang_guess.items())
|
|
||||||
|
|
||||||
# Summarize bot fatigue trend from image existence only (analysis already made it).
|
|
||||||
report = []
|
|
||||||
report.append("# Socialmediatorr Instagram DM History — Human Readable Report (English)")
|
|
||||||
report.append("")
|
report.append("")
|
||||||
report.append(f"- Generated: `{summary.get('generated_at')}`")
|
if generated_at:
|
||||||
report.append(f"- Owner name used: `{owner}`")
|
report.append(f"- Generated: `{generated_at}`")
|
||||||
|
report.append("- Inbox: `@socialmediatorr`")
|
||||||
report.append("")
|
report.append("")
|
||||||
|
|
||||||
report.append("## 1) What This Dataset Represents")
|
report.append("## What This Is")
|
||||||
report.append("")
|
report.append("")
|
||||||
report.append(
|
report.append("This is a short, plain-English summary of the DM history scan.")
|
||||||
"This is an all-time audit of Instagram DM conversations for `@socialmediatorr`, focused on extracting repeatable sales + support behavior so an AI agent can reply in Sergio’s style."
|
report.append("It avoids quoting private messages and it avoids storing personal identities.")
|
||||||
)
|
|
||||||
report.append(
|
|
||||||
"The analysis treats the account as a hybrid system: frequent repeated templates (likely automation/scripts) plus lower-frequency custom replies (human Sergio)."
|
|
||||||
)
|
|
||||||
report.append("")
|
report.append("")
|
||||||
|
|
||||||
report.append("## 2) High-Level Metrics (All-Time)")
|
report.append("## Key Numbers")
|
||||||
report.append("")
|
report.append("")
|
||||||
report.append(f"- Conversations analyzed: **{conv_total:,}**")
|
report.append(f"- Conversations analyzed: **{conv_total:,}**")
|
||||||
report.append(f"- Bot-only conversations: **{bot_only:,}** ({_pct(bot_only_rate)})")
|
report.append(f"- Conversations that stayed template-only: **{template_only:,}** ({_pct(template_only, conv_total)})")
|
||||||
report.append(f"- Human-intervened conversations: **{human:,}** ({_pct(human_rate)})")
|
report.append(f"- Conversations that included custom replies: **{custom_replies:,}** ({_pct(custom_replies, conv_total)})")
|
||||||
report.append(f"- Conversion (intent signals): **{conv_intent:,}** ({_pct(intent_rate)})")
|
report.append(f"- Buying/booking signals (weak): **{buying_weak:,}** ({_pct(buying_weak, conv_total)})")
|
||||||
report.append(f"- Conversion (confirmed signals): **{conv_confirmed:,}** ({_pct(confirmed_rate)})")
|
report.append(f"- Buying/booking signals (strong): **{buying_strong:,}** ({_pct(buying_strong, conv_total)})")
|
||||||
report.append("")
|
report.append("")
|
||||||
report.append(
|
report.append("Buying/booking signals are detected from text patterns (they are not a payment ledger).")
|
||||||
"Notes on conversion: this uses heuristics (keywords + payment/link mentions). It is directionally useful for ranking scripts, but it is not a ground-truth revenue ledger."
|
|
||||||
)
|
|
||||||
report.append("")
|
report.append("")
|
||||||
|
|
||||||
report.append("## 3) Sergio Persona (From Manual/Hybrid Replies)")
|
report.append("## What You Need to Know")
|
||||||
report.append("")
|
report.append("")
|
||||||
report.append(f"- Typical reply length: median **{median_len}** chars (p90 **{p90_len}**)")
|
report.append("The fastest improvements come from standardizing answers to repeated questions and sending them in the right time blocks.")
|
||||||
report.append(f"- Questions: **{_pct(question_rate)}** | Exclamations: **{_pct(exclaim_rate)}** | Emoji: **{_pct(emoji_rate)}**")
|
report.append("For the full deep report (CET timing, day-of-week patterns, Top 20 questions, and concrete actions), read:")
|
||||||
report.append(f"- Language guess (manual replies): {lang_line or 'n/a'}")
|
|
||||||
report.append("")
|
report.append("")
|
||||||
report.append("Practical implication for an agent: short, direct replies; minimal punctuation; bilingual capability; low/no emoji usage.")
|
report.append("- `reports/socialmediatorr/dm_history_report_en_detailed.md`")
|
||||||
report.append("")
|
report.append("")
|
||||||
|
|
||||||
report.append("## 4) Bot vs Human Segmentation (What It Means)")
|
report.append("## Useful Inventory (Safe Counts Only)")
|
||||||
report.append("")
|
report.append("")
|
||||||
report.append(
|
if templates_total is not None and templates_repeat is not None:
|
||||||
"- **[BOT]** = outgoing message template repeated frequently (>= configured threshold).\n"
|
report.append(f"- Total outgoing templates detected: **{templates_total:,}**")
|
||||||
"- **[MANUAL]** = outgoing message that is rare/unique (<= configured threshold).\n"
|
report.append(f"- High-frequency repeat templates: **{templates_repeat:,}**")
|
||||||
"- **[HYBRID]** = messages that look like a bot template but with manual edits (prefix match/similarity)."
|
if rescue_count is not None:
|
||||||
)
|
report.append(f"- “Rescue” events detected: **{rescue_count:,}**")
|
||||||
report.append("")
|
if pairs_count is not None:
|
||||||
report.append(
|
report.append(f"- Training pairs (user → reply) available: **{pairs_count:,}**")
|
||||||
"This separation is the foundation for: (1) extracting safe reusable scripts, and (2) extracting human-only replies as training data for a RAG or fine-tune."
|
if templates_total is None and rescue_count is None and pairs_count is None:
|
||||||
)
|
report.append("- (No additional artifacts were found next to `summary.json`.)")
|
||||||
report.append("")
|
report.append("")
|
||||||
|
|
||||||
report.append("## 5) Top Detected Script Templates (Canonicalized)")
|
report.append("## What You Do Not Need to Know")
|
||||||
report.append("")
|
report.append("")
|
||||||
if top_bot:
|
report.append("Do not store or copy these into an automation system unless you have a clear operational reason:")
|
||||||
for i, t in enumerate(top_bot[:10], 1):
|
report.append("- Names, handles, phone numbers, emails.")
|
||||||
canon = (t.get("canonical") or "").strip()
|
report.append("- Full conversation transcripts for every thread.")
|
||||||
count = int(t.get("count") or 0)
|
report.append("- Photos, videos, audio, and other attachments.")
|
||||||
report.append(f"- BOT #{i}: sent **{count}**× — `{canon[:160]}`")
|
report.append("- One-off edge cases that never repeat.")
|
||||||
else:
|
|
||||||
report.append("- (No high-frequency bot templates detected with current thresholds.)")
|
|
||||||
report.append("")
|
|
||||||
|
|
||||||
report.append("## 6) Human Reply Library (Rare/Manual Examples, Canonicalized)")
|
|
||||||
report.append("")
|
|
||||||
if top_manual:
|
|
||||||
for i, t in enumerate(top_manual[:10], 1):
|
|
||||||
canon = (t.get("canonical") or "").strip()
|
|
||||||
count = int(t.get("count") or 0)
|
|
||||||
report.append(f"- MANUAL-ish #{i}: seen **{count}**× — `{canon[:160]}`")
|
|
||||||
else:
|
|
||||||
report.append("- (No low-frequency manual templates included in the cached top list.)")
|
|
||||||
report.append("")
|
|
||||||
|
|
||||||
report.append("## 7) Bot Template Performance (Reply/Conversion Heuristics)")
|
|
||||||
report.append("")
|
|
||||||
report.append("These come from `bot_performance_audit.csv` and are computed per canonical bot template.")
|
|
||||||
report.append("")
|
|
||||||
if top_audit:
|
|
||||||
report.append("### Most-used bot templates (by volume)")
|
|
||||||
for r in top_audit[:8]:
|
|
||||||
report.append(
|
|
||||||
f"- sent={r.get('sent')} reply_rate={r.get('reply_rate')} intent_rate={r.get('conversion_intent_rate')} confirmed_rate={r.get('conversion_confirmed_rate')} — `{(r.get('canonical_template') or '')[:140]}`"
|
|
||||||
)
|
|
||||||
report.append("")
|
|
||||||
if best_reply:
|
|
||||||
report.append("### Best reply-rate bot templates")
|
|
||||||
for r in best_reply[:8]:
|
|
||||||
report.append(f"- reply_rate={r.get('reply_rate')} sent={r.get('sent')} — `{(r.get('canonical_template') or '')[:140]}`")
|
|
||||||
report.append("")
|
|
||||||
if worst_reply:
|
|
||||||
report.append("### Worst reply-rate bot templates")
|
|
||||||
for r in worst_reply[:8]:
|
|
||||||
report.append(f"- reply_rate={r.get('reply_rate')} sent={r.get('sent')} — `{(r.get('canonical_template') or '')[:140]}`")
|
|
||||||
report.append("")
|
|
||||||
|
|
||||||
report.append("## 8) Objections → Best Sergio Replies (Playbook)")
|
|
||||||
report.append("")
|
|
||||||
if objection_blocks:
|
|
||||||
report.extend(objection_blocks)
|
|
||||||
else:
|
|
||||||
report.append("- No objection handlers detected with current keyword rules.")
|
|
||||||
report.append("")
|
|
||||||
|
|
||||||
report.append("## 9) Rescue / Save Logic (Human Intervention After Silence/Negativity)")
|
|
||||||
report.append("")
|
|
||||||
report.append(f"- Rescue events detected (heuristic): **{rescue_count:,}**")
|
|
||||||
report.append(
|
|
||||||
"A “rescue” is when a manual/hybrid owner message follows either (a) a user negative signal, or (b) >24h silence after a bot message, and the thread later shows a confirmed conversion signal."
|
|
||||||
)
|
|
||||||
report.append("")
|
|
||||||
|
|
||||||
report.append("## 10) Product / Offer Evolution (Eras)")
|
|
||||||
report.append("")
|
|
||||||
report.append(
|
|
||||||
"This is inferred from mentions of pricing/currency + offer terms (e.g., call/audit/coaching) and summarized quarterly."
|
|
||||||
)
|
|
||||||
report.append("")
|
|
||||||
if era_offer_terms:
|
|
||||||
report.append("Recent quarters (top extracted offer signals):")
|
|
||||||
for line in era_offer_terms:
|
|
||||||
report.append(f"- {line}")
|
|
||||||
else:
|
|
||||||
report.append("- No offer signals detected in the most recent quarters with current extraction rules.")
|
|
||||||
report.append("")
|
|
||||||
|
|
||||||
report.append("## 11) Charts")
|
|
||||||
report.append("")
|
|
||||||
report.append(f"- Bot fatigue (weekly reply rate to the dominant bot script): `{inp.fatigue_png}`")
|
|
||||||
report.append(f"- Editorial timeline (top bot scripts vs conversions): `{inp.editorial_png}`")
|
|
||||||
report.append("")
|
|
||||||
|
|
||||||
report.append("## 12) What To Build From This (Agent Requirements)")
|
|
||||||
report.append("")
|
|
||||||
report.append("### Core behavior")
|
|
||||||
report.append("- Start with top bot templates for predictable openers and FAQ-style flows.")
|
|
||||||
report.append("- Switch to Sergio-style manual patterns on objections, negotiation, or when conversation stalls.")
|
|
||||||
report.append("- Use a rescue cadence (time-based triggers) after silence.")
|
|
||||||
report.append("")
|
|
||||||
report.append("### Data products to drive the agent")
|
|
||||||
report.append(f"- Training pairs (manual-only, converted threads): `{inp.training_pairs}` (rows: ~{pairs_count:,})")
|
|
||||||
report.append(f"- Objection handlers: `{inp.objections}`")
|
|
||||||
report.append(f"- Rescue playbook: `{inp.rescue}`")
|
|
||||||
report.append(f"- Script templates + editorial drift: `{inp.templates}`")
|
|
||||||
report.append("")
|
|
||||||
report.append("### Safety boundaries (recommended)")
|
|
||||||
report.append("- Never request or store passwords/2FA codes.")
|
|
||||||
report.append("- Avoid medical/legal/financial advice; redirect to a call or a human.")
|
|
||||||
report.append("- If user asks to move off-platform, follow Sergio’s historical policy and business rules.")
|
|
||||||
report.append("")
|
|
||||||
|
|
||||||
report.append("## 13) What We Do NOT Need To Know (Ignore / Do Not Store)")
|
|
||||||
report.append("")
|
|
||||||
report.append("- Exact client identities (names, handles, phone numbers, emails) unless required for operational routing.")
|
|
||||||
report.append("- Media attachments (photos/videos/audio) for persona cloning; they add storage cost and privacy risk.")
|
|
||||||
report.append("- Full verbatim message dumps for every thread; for RAG you only need high-quality pairs and playbook snippets.")
|
|
||||||
report.append("- Individual one-off edge cases that never repeat (unless they represent a safety boundary).")
|
|
||||||
report.append("- Internal Meta export folder structure details beyond `messages/inbox/**/message*.json`.")
|
|
||||||
report.append("")
|
|
||||||
|
|
||||||
report.append("## 14) Caveats / Gaps")
|
|
||||||
report.append("")
|
|
||||||
report.append("- The export does not reliably label ManyChat vs Human; bot/human is inferred by repetition and similarity.")
|
|
||||||
report.append("- Conversion is heuristic; integrate Stripe/Calendly/CRM events if you want ground-truth attribution.")
|
|
||||||
report.append("- Language detection is heuristic; improve it if you need precise bilingual routing.")
|
|
||||||
report.append("")
|
report.append("")
|
||||||
|
|
||||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
@ -323,19 +147,20 @@ def generate_report(*, analysis_dir: Path, out_path: Path) -> Path:
|
||||||
|
|
||||||
|
|
||||||
def main(argv: list[str] | None = None) -> int:
|
def main(argv: list[str] | None = None) -> int:
|
||||||
ap = argparse.ArgumentParser(description="Generate a human-readable English report from analyze_instagram_export outputs.")
|
ap = argparse.ArgumentParser(description="Generate a short, safe DM history report from an analysis directory.")
|
||||||
ap.add_argument("--analysis-dir", required=True, help="directory produced by analyze_instagram_export (contains summary.json)")
|
ap.add_argument("--analysis-dir", required=True, help="analyze_instagram_export output directory")
|
||||||
ap.add_argument("--out", default=None, help="output markdown path (default: <analysis-dir>/dm_history_report_en.md)")
|
ap.add_argument("--out", default=None, help="output markdown path (default: dm_history_report_en.md in CWD)")
|
||||||
args = ap.parse_args(argv)
|
args = ap.parse_args(argv)
|
||||||
|
|
||||||
analysis_dir = Path(args.analysis_dir)
|
analysis_dir = Path(args.analysis_dir)
|
||||||
out_path = Path(args.out) if args.out else (analysis_dir / "dm_history_report_en.md")
|
out_path = Path(args.out) if args.out else (Path.cwd() / "dm_history_report_en.md")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
p = generate_report(analysis_dir=analysis_dir, out_path=out_path)
|
p = generate_report(analysis_dir=analysis_dir, out_path=out_path)
|
||||||
print(json.dumps({"ok": True, "out": str(p)}, ensure_ascii=False))
|
print(json.dumps({"ok": True, "out": str(p)}, ensure_ascii=False))
|
||||||
return 0
|
return 0
|
||||||
except FileNotFoundError as e:
|
except FileNotFoundError as e:
|
||||||
print(f"Missing required input: {e}", file=os.sys.stderr)
|
print(f"Missing analysis input: {e}", file=os.sys.stderr)
|
||||||
return 2
|
return 2
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Report generation failed: {e}", file=os.sys.stderr)
|
print(f"Report generation failed: {e}", file=os.sys.stderr)
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@ from typing import Any, Iterable, Literal
|
||||||
|
|
||||||
from .analyze_instagram_export import canonicalize_text
|
from .analyze_instagram_export import canonicalize_text
|
||||||
|
|
||||||
DEFAULT_LOCAL_TZ_NAME = "Europe/Brussels"
|
DEFAULT_LOCAL_TZ_NAME = "Europe/Paris"
|
||||||
|
|
||||||
|
|
||||||
def _safe_chmod_600(path: Path) -> None:
|
def _safe_chmod_600(path: Path) -> None:
|
||||||
|
|
@ -418,11 +418,11 @@ def _question_theme(text: str) -> str | None:
|
||||||
toks = s_compact.split()
|
toks = s_compact.split()
|
||||||
if len(toks) == 1:
|
if len(toks) == 1:
|
||||||
w = toks[0]
|
w = toks[0]
|
||||||
if w in {"book", "ebook", "libro", "pdf"}:
|
if w in {"book", "ebook", "libro", "pdf", "livre", "llibre"}:
|
||||||
return "Just one word: book"
|
return "Just one word: book"
|
||||||
if w in {"link", "enlace"}:
|
if w in {"link", "enlace", "lien", "enllac", "enllaç"}:
|
||||||
return "Just one word: link"
|
return "Just one word: link"
|
||||||
if w in {"price", "precio", "cost"}:
|
if w in {"price", "precio", "cost", "prix", "preu"}:
|
||||||
return "Just one word: price"
|
return "Just one word: price"
|
||||||
|
|
||||||
# "I tried, but it didn't arrive / it doesn't work"
|
# "I tried, but it didn't arrive / it doesn't work"
|
||||||
|
|
@ -499,15 +499,48 @@ def _question_theme(text: str) -> str | None:
|
||||||
return "Where are you based?"
|
return "Where are you based?"
|
||||||
|
|
||||||
# Price / cost
|
# Price / cost
|
||||||
if any(k in s for k in ("price", "cost", "how much", "$", "€", "usd", "eur", "precio", "cuanto", "cuánto", "caro")):
|
if any(
|
||||||
|
k in s
|
||||||
|
for k in (
|
||||||
|
"price",
|
||||||
|
"cost",
|
||||||
|
"how much",
|
||||||
|
"$",
|
||||||
|
"€",
|
||||||
|
"usd",
|
||||||
|
"eur",
|
||||||
|
"precio",
|
||||||
|
"cuanto",
|
||||||
|
"cuánto",
|
||||||
|
"caro",
|
||||||
|
"prix",
|
||||||
|
"preu",
|
||||||
|
)
|
||||||
|
):
|
||||||
return "What does it cost?"
|
return "What does it cost?"
|
||||||
|
|
||||||
# Link / payment link
|
# Link / payment link
|
||||||
if any(k in s for k in ("link", "send the link", "send me the link", "where is the link", "enlace", "stripe", "paypal", "checkout", "invoice")):
|
if any(
|
||||||
|
k in s
|
||||||
|
for k in (
|
||||||
|
"link",
|
||||||
|
"send the link",
|
||||||
|
"send me the link",
|
||||||
|
"where is the link",
|
||||||
|
"enlace",
|
||||||
|
"lien",
|
||||||
|
"enllaç",
|
||||||
|
"enllac",
|
||||||
|
"stripe",
|
||||||
|
"paypal",
|
||||||
|
"checkout",
|
||||||
|
"invoice",
|
||||||
|
)
|
||||||
|
):
|
||||||
return "Can you send the link?"
|
return "Can you send the link?"
|
||||||
|
|
||||||
# Book / ebook / pdf
|
# Book / ebook / pdf
|
||||||
if any(k in s for k in ("book", "ebook", "e-book", "pdf", "libro")):
|
if any(k in s for k in ("book", "ebook", "e-book", "pdf", "libro", "livre", "llibre")):
|
||||||
return "Where do I get the book?"
|
return "Where do I get the book?"
|
||||||
|
|
||||||
# Call / schedule
|
# Call / schedule
|
||||||
|
|
@ -533,7 +566,7 @@ def _question_theme(text: str) -> str | None:
|
||||||
return "How do I book a call?"
|
return "How do I book a call?"
|
||||||
|
|
||||||
# Video
|
# Video
|
||||||
if any(k in s for k in ("video", "vídeo", "youtube")):
|
if any(k in s for k in ("video", "vídeo", "vidéo", "youtube")):
|
||||||
return "Can you send the video?"
|
return "Can you send the video?"
|
||||||
|
|
||||||
# Steps / what next
|
# Steps / what next
|
||||||
|
|
@ -541,11 +574,45 @@ def _question_theme(text: str) -> str | None:
|
||||||
return "What are the steps?"
|
return "What are the steps?"
|
||||||
|
|
||||||
# How it works / details
|
# How it works / details
|
||||||
if any(k in s for k in ("how does", "how it works", "how does it work", "how does this work", "como funciona", "cómo funciona", "more info", "details", "explain")):
|
if any(
|
||||||
|
k in s
|
||||||
|
for k in (
|
||||||
|
"how does",
|
||||||
|
"how it works",
|
||||||
|
"how does it work",
|
||||||
|
"how does this work",
|
||||||
|
"como funciona",
|
||||||
|
"cómo funciona",
|
||||||
|
"more info",
|
||||||
|
"details",
|
||||||
|
"explain",
|
||||||
|
"comment ça marche",
|
||||||
|
"ça marche",
|
||||||
|
"com funciona",
|
||||||
|
)
|
||||||
|
):
|
||||||
return "How does it work?"
|
return "How does it work?"
|
||||||
|
|
||||||
# What you do / what is this
|
# What you do / what is this
|
||||||
if any(k in s for k in ("what is this", "what do you do", "what is it", "what do you offer", "service", "services", "que es", "qué es", "que haces", "qué haces", "de que va", "de qué va")):
|
if any(
|
||||||
|
k in s
|
||||||
|
for k in (
|
||||||
|
"what is this",
|
||||||
|
"what do you do",
|
||||||
|
"what is it",
|
||||||
|
"what do you offer",
|
||||||
|
"service",
|
||||||
|
"services",
|
||||||
|
"que es",
|
||||||
|
"qué es",
|
||||||
|
"que haces",
|
||||||
|
"qué haces",
|
||||||
|
"de que va",
|
||||||
|
"de qué va",
|
||||||
|
"c'est quoi",
|
||||||
|
"cest quoi",
|
||||||
|
)
|
||||||
|
):
|
||||||
return "What is this?"
|
return "What is this?"
|
||||||
|
|
||||||
# Trust / legitimacy
|
# Trust / legitimacy
|
||||||
|
|
@ -597,6 +664,8 @@ def _offer_terms(text: str) -> set[str]:
|
||||||
("ebook", "Ebook"),
|
("ebook", "Ebook"),
|
||||||
("e-book", "Ebook"),
|
("e-book", "Ebook"),
|
||||||
("libro", "Book"),
|
("libro", "Book"),
|
||||||
|
("livre", "Book"),
|
||||||
|
("llibre", "Book"),
|
||||||
("pdf", "PDF"),
|
("pdf", "PDF"),
|
||||||
("call", "Call"),
|
("call", "Call"),
|
||||||
("llamada", "Call"),
|
("llamada", "Call"),
|
||||||
|
|
@ -935,20 +1004,17 @@ def generate_report(
|
||||||
now = datetime.now(timezone.utc).date().isoformat()
|
now = datetime.now(timezone.utc).date().isoformat()
|
||||||
report: list[str] = []
|
report: list[str] = []
|
||||||
|
|
||||||
report.append("# Socialmediatorr Instagram DM History : Plain-English Deep Report")
|
report.append("# Instagram DM History — Plain-English Deep Report")
|
||||||
report.append("")
|
report.append("")
|
||||||
report.append("## DM History Deep Report")
|
report.append("## What This Is")
|
||||||
report.append("")
|
report.append("")
|
||||||
report.append(f"**Subject:** Instagram direct messages for `@socialmediatorr`")
|
report.append(f"**Inbox:** `@socialmediatorr`")
|
||||||
report.append("**Version:** v1.0 (STYLE BIBLE EN 3.0GM)")
|
|
||||||
report.append(f"**Date:** {now}")
|
report.append(f"**Date:** {now}")
|
||||||
report.append("**Status:** REVIEW REQUIRED")
|
report.append("**Time zone used:** CET")
|
||||||
report.append("**Citation:** `if://report/socialmediatorr/instagram/dm-history/`")
|
|
||||||
report.append("**Author:** Danny Stocker | InfraFabric Research")
|
|
||||||
report.append("")
|
report.append("")
|
||||||
report.append("### How This Report Was Made")
|
report.append("### How This Report Was Made")
|
||||||
report.append("")
|
report.append("")
|
||||||
report.append("> This is an automated count of patterns. It is not a therapy note and it is not a sales ledger.")
|
report.append("> This is a count of patterns. It is not a therapy note and it is not a sales ledger.")
|
||||||
report.append("")
|
report.append("")
|
||||||
report.append(
|
report.append(
|
||||||
"This document was generated by reading an Instagram data export and counting repeat patterns over time. "
|
"This document was generated by reading an Instagram data export and counting repeat patterns over time. "
|
||||||
|
|
@ -957,9 +1023,9 @@ def generate_report(
|
||||||
report.append("")
|
report.append("")
|
||||||
report.append("---")
|
report.append("---")
|
||||||
report.append("")
|
report.append("")
|
||||||
report.append(f"**Context:** This inbox contains a high-volume message-and-reply system over {window_days} days.")
|
report.append(f"**Context:** This inbox contains message history over {window_days} days.")
|
||||||
report.append("")
|
report.append("")
|
||||||
report.append("> Your messaging system is working as a volume engine. The weak point is consistency at the moments where people ask to buy or book.")
|
report.append("> The system works at scale. The weak point is the “next step” moments: when people ask what to do, what it costs, or where to get it.")
|
||||||
report.append("")
|
report.append("")
|
||||||
report.append(
|
report.append(
|
||||||
"The purpose of this report is practical: define what to keep, what to remove, and what to automate safely—without damaging trust."
|
"The purpose of this report is practical: define what to keep, what to remove, and what to automate safely—without damaging trust."
|
||||||
|
|
|
||||||
19
tools/README.md
Normal file
19
tools/README.md
Normal file
|
|
@ -0,0 +1,19 @@
|
||||||
|
# Tools
|
||||||
|
|
||||||
|
## Mermaid checks (Markdown diagrams)
|
||||||
|
|
||||||
|
This repo uses Mermaid diagrams in Markdown reports.
|
||||||
|
|
||||||
|
Local lint (partial):
|
||||||
|
|
||||||
|
- `npm install`
|
||||||
|
- `npm run verify:mermaid`
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
- This check validates diagram types supported by `@mermaid-js/parser` (for example: `pie`).
|
||||||
|
- Some diagram types (for example: `flowchart`) are not supported by that parser yet and will be reported as `skipped`.
|
||||||
|
|
||||||
|
Full validation (recommended):
|
||||||
|
|
||||||
|
- Use Forgejo’s built-in PDF export for the report file. If the PDF export succeeds, the diagrams compiled successfully.
|
||||||
|
|
||||||
109
tools/verify_mermaid.mjs
Normal file
109
tools/verify_mermaid.mjs
Normal file
|
|
@ -0,0 +1,109 @@
|
||||||
|
import fs from "node:fs";
|
||||||
|
import path from "node:path";
|
||||||
|
import process from "node:process";
|
||||||
|
|
||||||
|
import { parse } from "@mermaid-js/parser";
|
||||||
|
|
||||||
|
function isMarkdownFile(filePath) {
|
||||||
|
const lower = filePath.toLowerCase();
|
||||||
|
return lower.endsWith(".md") || lower.endsWith(".markdown");
|
||||||
|
}
|
||||||
|
|
||||||
|
function* walkFiles(rootPath) {
|
||||||
|
const stat = fs.statSync(rootPath);
|
||||||
|
if (stat.isFile()) {
|
||||||
|
yield rootPath;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const entries = fs.readdirSync(rootPath, { withFileTypes: true });
|
||||||
|
for (const ent of entries) {
|
||||||
|
const full = path.join(rootPath, ent.name);
|
||||||
|
if (ent.isDirectory()) {
|
||||||
|
yield* walkFiles(full);
|
||||||
|
} else if (ent.isFile()) {
|
||||||
|
yield full;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractMermaidBlocks(markdownText) {
|
||||||
|
const blocks = [];
|
||||||
|
const re = /```mermaid\s*([\s\S]*?)```/g;
|
||||||
|
let m;
|
||||||
|
while ((m = re.exec(markdownText)) !== null) {
|
||||||
|
blocks.push(m[1] || "");
|
||||||
|
}
|
||||||
|
return blocks;
|
||||||
|
}
|
||||||
|
|
||||||
|
function detectDiagramType(code) {
|
||||||
|
const lines = String(code || "")
|
||||||
|
.replace(/\r\n?/g, "\n")
|
||||||
|
.split("\n")
|
||||||
|
.map((l) => l.trim())
|
||||||
|
.filter((l) => l && !l.startsWith("%%"));
|
||||||
|
|
||||||
|
if (!lines.length) return null;
|
||||||
|
const head = lines[0];
|
||||||
|
|
||||||
|
if (head.startsWith("pie")) return "pie";
|
||||||
|
if (head.startsWith("gitGraph")) return "gitGraph";
|
||||||
|
if (head.startsWith("architecture")) return "architecture";
|
||||||
|
if (head.startsWith("packet")) return "packet";
|
||||||
|
if (head.startsWith("info")) return "info";
|
||||||
|
if (head.startsWith("radar")) return "radar";
|
||||||
|
if (head.startsWith("treemap")) return "treemap";
|
||||||
|
|
||||||
|
// Not supported by @mermaid-js/parser yet (example: flowchart/sequence/class).
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const args = process.argv.slice(2);
|
||||||
|
const roots = args.length ? args : ["reports"];
|
||||||
|
|
||||||
|
let ok = true;
|
||||||
|
let total = 0;
|
||||||
|
let failures = 0;
|
||||||
|
let skipped = 0;
|
||||||
|
|
||||||
|
for (const root of roots) {
|
||||||
|
for (const filePath of walkFiles(root)) {
|
||||||
|
if (!isMarkdownFile(filePath)) continue;
|
||||||
|
|
||||||
|
const text = fs.readFileSync(filePath, "utf8");
|
||||||
|
const blocks = extractMermaidBlocks(text);
|
||||||
|
if (!blocks.length) continue;
|
||||||
|
|
||||||
|
for (let i = 0; i < blocks.length; i++) {
|
||||||
|
const code = String(blocks[i] || "").trim();
|
||||||
|
total += 1;
|
||||||
|
const diagramType = detectDiagramType(code);
|
||||||
|
if (!diagramType) {
|
||||||
|
skipped += 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
await parse(diagramType, code);
|
||||||
|
} catch (err) {
|
||||||
|
ok = false;
|
||||||
|
failures += 1;
|
||||||
|
const msg =
|
||||||
|
err && typeof err === "object" && "message" in err ? String(err.message) : String(err);
|
||||||
|
console.error(`[mermaid] ${filePath} block=${i + 1} type=${diagramType}: ${msg}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ok) {
|
||||||
|
console.log(JSON.stringify({ ok: true, diagrams: total, skipped }, null, 2));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.error(JSON.stringify({ ok: false, diagrams: total, failures, skipped }, null, 2));
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
process.exitCode = await main();
|
||||||
Loading…
Add table
Reference in a new issue