From 9286a313f09c57ab9358394c23bf25085ac58d42 Mon Sep 17 00:00:00 2001 From: danny Date: Wed, 24 Dec 2025 10:56:56 +0000 Subject: [PATCH] Use CET for time buckets --- .../dm_history_report_en_detailed.md | 66 +++++++++---------- .../generate_dm_report_detailed.py | 42 ++++++++---- 2 files changed, 64 insertions(+), 44 deletions(-) diff --git a/reports/socialmediatorr/dm_history_report_en_detailed.md b/reports/socialmediatorr/dm_history_report_en_detailed.md index e8902c1..d0fcd3f 100644 --- a/reports/socialmediatorr/dm_history_report_en_detailed.md +++ b/reports/socialmediatorr/dm_history_report_en_detailed.md @@ -49,7 +49,7 @@ This inbox has a few dominant patterns. They tell you what an auto-reply system | Most activity happened in **2025-12** | 47,315 messages in one month (87.5% of all messages in this export) | | The #1 question/topic is **Just one word: book** | 1,857 times (68.4% of all questions/requests) | | Questions/requests cluster on **Thursday, Friday** | Those two days contain most of the asking in this export | -| Most messages arrive in **18:00-23:59, 12:00-17:59 (UTC)** | If you are present in those blocks, reply rates usually improve | +| Most messages arrive in **18:00-23:59, 12:00-17:59 (CET)** | If you are present in those blocks, reply rates usually improve | | Repeat messages make up **67.6%** of your text messages | Fast replies are often repeats; custom replies are where delays happen | | Language used by people (approx) | Spanish 25.7%, English 18.1%, Unknown 56.2% | | Language used in your messages (approx) | Spanish 63.8%, English 29.2%, Unknown 7.0% | @@ -65,8 +65,8 @@ To avoid guesswork, we start with 3-month blocks (a simple way to smooth noise), |---|---:|---:|---:| | 2024 Oct-Dec | 14 | 0 | 0 | | 2025 Jan-Mar | 21 | 0 | 0 | -| 2025 Apr-Jun | 93 | 100 | 16 | -| 2025 Jul-Sep | 622 | 879 | 88 | +| 2025 Apr-Jun | 92 | 97 | 15 | +| 2025 Jul-Sep | 623 | 882 | 89 | | 2025 Oct-Dec | 9,712 | 42,628 | 2,609 | Same data as charts: @@ -75,8 +75,8 @@ Same data as charts: pie title Messages From People by 3-Month Block "2024 Oct-Dec" : 14 "2025 Jan-Mar" : 21 - "2025 Apr-Jun" : 93 - "2025 Jul-Sep" : 622 + "2025 Apr-Jun" : 92 + "2025 Jul-Sep" : 623 "2025 Oct-Dec" : 9712 ``` @@ -84,8 +84,8 @@ This shows when people replied most. A spike here usually means you posted somet ```mermaid pie title Messages You Sent by 3-Month Block - "2025 Apr-Jun" : 100 - "2025 Jul-Sep" : 879 + "2025 Apr-Jun" : 97 + "2025 Jul-Sep" : 882 "2025 Oct-Dec" : 42628 ``` @@ -107,8 +107,8 @@ This month-by-month table is the clearest view of how the inbox changed over tim | 2025-03 | 5 | 0 | 0 | n/a | | 2025-04 | 8 | 5 | 0 | n/a | | 2025-05 | 48 | 28 | 8 | 12.5% | -| 2025-06 | 37 | 67 | 8 | 87.5% | -| 2025-07 | 145 | 319 | 36 | 63.9% | +| 2025-06 | 36 | 64 | 7 | 85.7% | +| 2025-07 | 146 | 322 | 37 | 64.9% | | 2025-08 | 193 | 230 | 28 | 50.0% | | 2025-09 | 284 | 330 | 24 | 20.8% | | 2025-10 | 787 | 1,190 | 64 | 17.2% | @@ -125,24 +125,24 @@ Use this to time follow-ups and first messages. Do not spread effort evenly acro | Day of week | Messages from people | Messages you sent | Questions/requests | |---|---:|---:|---:| -| Monday | 1,627 | 8,547 | 140 | -| Tuesday | 1,952 | 9,622 | 189 | -| Wednesday | 1,242 | 5,396 | 155 | -| Thursday | 2,349 | 7,126 | 1,340 | -| Friday | 1,610 | 5,494 | 728 | -| Saturday | 840 | 3,579 | 88 | -| Sunday | 842 | 3,843 | 73 | +| Monday | 1,600 | 8,359 | 131 | +| Tuesday | 1,939 | 9,654 | 192 | +| Wednesday | 1,282 | 5,554 | 159 | +| Thursday | 2,261 | 6,908 | 1,268 | +| Friday | 1,705 | 5,733 | 803 | +| Saturday | 833 | 3,602 | 87 | +| Sunday | 842 | 3,797 | 73 | Same data as a chart: ```mermaid pie title Messages From People by Day of Week - "Monday" : 1627 - "Tuesday" : 1952 - "Wednesday" : 1242 - "Thursday" : 2349 - "Friday" : 1610 - "Saturday" : 840 + "Monday" : 1600 + "Tuesday" : 1939 + "Wednesday" : 1282 + "Thursday" : 2261 + "Friday" : 1705 + "Saturday" : 833 "Sunday" : 842 ``` @@ -150,23 +150,23 @@ pie title Messages From People by Day of Week > Most replies happen in a few time blocks. -Time zone here is UTC (a standard clock). If you work in another time zone, shift the blocks before you schedule. +Time zone here is CET (Central Europe). If you work in another time zone, shift the blocks before you schedule. -| Time of day (UTC) | Messages from people | Messages you sent | +| Time of day (CET) | Messages from people | Messages you sent | |---|---:|---:| -| 00:00-05:59 | 1,885 | 8,304 | -| 06:00-11:59 | 1,374 | 6,889 | -| 12:00-17:59 | 3,092 | 12,937 | -| 18:00-23:59 | 4,111 | 15,477 | +| 00:00-05:59 | 2,113 | 8,907 | +| 06:00-11:59 | 1,274 | 6,637 | +| 12:00-17:59 | 2,333 | 10,883 | +| 18:00-23:59 | 4,742 | 17,180 | Same data as a chart: ```mermaid -pie title Messages From People by Time of Day (UTC) - "00:00-05:59" : 1885 - "06:00-11:59" : 1374 - "12:00-17:59" : 3092 - "18:00-23:59" : 4111 +pie title Messages From People by Time of Day (CET) + "00:00-05:59" : 2113 + "06:00-11:59" : 1274 + "12:00-17:59" : 2333 + "18:00-23:59" : 4742 ``` ### Reply Speed (Why It Matters) diff --git a/sergio_instagram_messaging/generate_dm_report_detailed.py b/sergio_instagram_messaging/generate_dm_report_detailed.py index 9e6f307..7df9b04 100644 --- a/sergio_instagram_messaging/generate_dm_report_detailed.py +++ b/sergio_instagram_messaging/generate_dm_report_detailed.py @@ -13,6 +13,8 @@ from typing import Any, Iterable, Literal from .analyze_instagram_export import canonicalize_text +DEFAULT_LOCAL_TZ_NAME = "Europe/Brussels" + def _safe_chmod_600(path: Path) -> None: try: @@ -25,6 +27,17 @@ def _dt_from_ts_ms(ts_ms: int) -> datetime: return datetime.fromtimestamp(ts_ms / 1000.0, tz=timezone.utc) +def _load_tz(name: str | None) -> timezone: + tz_name = (name or "").strip() or DEFAULT_LOCAL_TZ_NAME + try: + from zoneinfo import ZoneInfo # type: ignore + + return ZoneInfo(tz_name) # type: ignore[return-value] + except Exception: + # Fallback: fixed CET (UTC+1). DST is not represented in this mode. + return timezone(timedelta(hours=1)) + + def _iso(ts_ms: int | None) -> str | None: if not ts_ms: return None @@ -488,8 +501,11 @@ def generate_report( owner_name: str | None, reply_window_hours: float, scripted_min_count: int, + local_tz_name: str | None, ) -> Path: export_root = _resolve_export_root(export_input) + local_tz = _load_tz(local_tz_name) + time_zone_label = "CET" summary: dict[str, Any] | None = None if analysis_dir: @@ -572,11 +588,13 @@ def generate_report( min_ts = e.ts_ms if min_ts is None or e.ts_ms < min_ts else min_ts max_ts = e.ts_ms if max_ts is None or e.ts_ms > max_ts else max_ts - month = _month_key(e.ts_ms) - quarter = _quarter_key(e.ts_ms) - weekday = _weekday_name(e.ts_ms) - hb = _hour_bucket(_hour(e.ts_ms)) - day = _date_key(e.ts_ms) + dt_local = _dt_from_ts_ms(e.ts_ms).astimezone(local_tz) + month = f"{dt_local.year:04d}-{dt_local.month:02d}" + q = (dt_local.month - 1) // 3 + 1 + quarter = f"{dt_local.year:04d}-Q{q}" + weekday = dt_local.strftime("%A") + hb = _hour_bucket(int(dt_local.hour)) + day = dt_local.date().isoformat() if e.is_owner: outbound_msgs += 1 @@ -691,8 +709,8 @@ def generate_report( if min_ts is None or max_ts is None: raise RuntimeError("No messages found in export.") - window_start = _dt_from_ts_ms(min_ts) - window_end = _dt_from_ts_ms(max_ts) + window_start = _dt_from_ts_ms(min_ts).astimezone(local_tz) + window_end = _dt_from_ts_ms(max_ts).astimezone(local_tz) window_days = max(1, int((window_end - window_start).total_seconds() / 86400) + 1) def median_seconds(values: list[int]) -> int | None: @@ -835,7 +853,7 @@ def generate_report( report.append(f"| Most activity happened in **{m}** | {m_total:,} messages in one month ({_pct(m_total, total_msgs)} of all messages in this export) |") report.append(f"| The #1 question/topic is **{top_theme}** | {top_theme_cnt:,} times ({_pct(top_theme_cnt, inbound_questions)} of all questions/requests) |") report.append(f"| Questions/requests cluster on **{top_question_days_s}** | Those two days contain most of the asking in this export |") - report.append(f"| Most messages arrive in **{top_time_blocks_s} (UTC)** | If you are present in those blocks, reply rates usually improve |") + report.append(f"| Most messages arrive in **{top_time_blocks_s} ({time_zone_label})** | If you are present in those blocks, reply rates usually improve |") report.append(f"| Repeat messages make up **{_pct(scripted_outbound_with_text, outbound_with_text)}** of your text messages | Fast replies are often repeats; custom replies are where delays happen |") report.append(f"| Language used by people (approx) | {in_lang_summary} |") report.append(f"| Language used in your messages (approx) | {out_lang_summary} |") @@ -935,11 +953,11 @@ def generate_report( report.append("> Most replies happen in a few time blocks.") report.append("") report.append( - "Time zone here is UTC (a standard clock). If you work in another time zone, shift the blocks before you schedule." + f"Time zone here is {time_zone_label} (Central Europe). If you work in another time zone, shift the blocks before you schedule." ) report.append("") hb_order = ["00:00-05:59", "06:00-11:59", "12:00-17:59", "18:00-23:59"] - report.append("| Time of day (UTC) | Messages from people | Messages you sent |") + report.append(f"| Time of day ({time_zone_label}) | Messages from people | Messages you sent |") report.append("|---|---:|---:|") for hb in hb_order: st = by_hour_bucket.get(hb, TimeBucketStats()) @@ -948,7 +966,7 @@ def generate_report( report.append("Same data as a chart:") report.append("") report.append("```mermaid") - report.append('pie title Messages From People by Time of Day (UTC)') + report.append(f'pie title Messages From People by Time of Day ({time_zone_label})') for hb in hb_order: v = by_hour_bucket.get(hb, TimeBucketStats()).inbound if v: @@ -1208,6 +1226,7 @@ def main(argv: list[str] | None = None) -> int: ap.add_argument("--analysis-dir", default=None, help="optional analyze_instagram_export output dir (for summary.json)") ap.add_argument("--out", default=None, help="output markdown path") ap.add_argument("--owner-name", default=None, help="owner sender_name (default: infer)") + ap.add_argument("--local-tz", default=DEFAULT_LOCAL_TZ_NAME, help="IANA time zone name (default: Europe/Brussels)") ap.add_argument("--reply-window-hours", type=float, default=48.0, help="how long counts as a response") ap.add_argument( "--scripted-min-count", @@ -1229,6 +1248,7 @@ def main(argv: list[str] | None = None) -> int: owner_name=(args.owner_name.strip() if args.owner_name else None), reply_window_hours=float(args.reply_window_hours), scripted_min_count=int(args.scripted_min_count), + local_tz_name=(args.local_tz.strip() if args.local_tz else None), ) print(json.dumps({"ok": True, "out": str(p)}, ensure_ascii=False)) return 0