From 9f36091ec91419271b5289c4baefdc00cf573b01 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 29 Jun 2026 17:27:05 +0000 Subject: [PATCH 1/2] feat(dump): migrate /dump status message to Bot API 10.1 rich messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The /dump status message (the initial "Firmware Dump Queued" post plus every progress/completion/failure edit the worker makes to it) now uses the Bot API 10.1 rich-text endpoints — sendRichMessage and editMessageText(rich_message) — instead of the legacy send_message/edit_message_text + parse_mode path. python-telegram-bot 22.7 (pinned <23.0) does not yet expose these methods, so they are called via the raw Bot API over httpx. Telegram error responses are translated back into the same telegram.error exceptions PTB raises (RetryAfter, BadRequest, Forbidden, TelegramError, NetworkError), so the queue's existing retry / dead-letter / "message is not modified" handling and the verify_telegram_context probe all keep working unchanged. InputRichMessage carries the content as a single Rich Markdown string. Rich Markdown reuses the legacy syntax with one breaking difference: *x* now means italic and bold needs **x**. Rather than rewrite every status builder (and its tests), a small pure converter, legacy_markdown_to_rich_markdown(), rewrites the already-rendered legacy text at the send boundary — doubling bold asterisks while leaving italics, code spans, fenced blocks, links and backslash escapes intact. A per-job _rich_status flag scopes this to the direct /dump message: the parallel moderated-request flow and all other legacy-Markdown messages are untouched. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_015VNyKjukMtUni9gjcKoeLN --- dumpyarabot/arq_jobs.py | 19 ++++- dumpyarabot/config.py | 6 ++ dumpyarabot/handlers.py | 8 +- dumpyarabot/message_queue.py | 153 ++++++++++++++++++++++++++++++++--- dumpyarabot/utils.py | 51 ++++++++++++ tests/test_rich_markdown.py | 75 +++++++++++++++++ 6 files changed, 298 insertions(+), 14 deletions(-) create mode 100644 tests/test_rich_markdown.py diff --git a/dumpyarabot/arq_jobs.py b/dumpyarabot/arq_jobs.py index 25f467d..ae3cad5 100644 --- a/dumpyarabot/arq_jobs.py +++ b/dumpyarabot/arq_jobs.py @@ -15,7 +15,7 @@ from rich.console import Console -from dumpyarabot.config import settings +from dumpyarabot.config import settings, RICH_MARKDOWN_PARSE_MODE from dumpyarabot.firmware_downloader import FirmwareDownloader from dumpyarabot.firmware_extractor import FirmwareExtractor from dumpyarabot.gitlab_manager import GitLabManager @@ -158,6 +158,19 @@ def _status_update_sequence(progress: Optional[Dict[str, Any]]) -> float: return 0.0 +def _status_parse_mode(job_data: Dict[str, Any]) -> str: + """Pick the parse mode for a job's status edits. + + Direct ``/dump`` jobs post their status message via sendRichMessage and set + ``_rich_status``; their edits must stay on the rich-text endpoints. Moderated + requests (which create a legacy-Markdown status message elsewhere) keep using + the legacy parse mode. + """ + if job_data.get("_rich_status"): + return RICH_MARKDOWN_PARSE_MODE + return settings.DEFAULT_PARSE_MODE + + async def _send_status_update( job_data: Dict[str, Any], message: str, @@ -219,7 +232,7 @@ async def _send_status_update( chat_id=chat_id, text=formatted_message, edit_message_id=initial_message_id, - parse_mode=settings.DEFAULT_PARSE_MODE, + parse_mode=_status_parse_mode(job_data), context={ "job_id": job_data["job_id"], "worker_id": "arq_worker", @@ -337,7 +350,7 @@ async def _send_failure_notification(job_data: Dict[str, Any], error_details: st chat_id=chat_id, text=formatted_message, edit_message_id=initial_message_id, - parse_mode=settings.DEFAULT_PARSE_MODE, + parse_mode=_status_parse_mode(job_data), context={"job_id": job_data.get("job_id", "unknown"), "type": "failure"} ) diff --git a/dumpyarabot/config.py b/dumpyarabot/config.py index ba857ae..7330597 100644 --- a/dumpyarabot/config.py +++ b/dumpyarabot/config.py @@ -44,6 +44,12 @@ class Settings(BaseSettings): settings = Settings() +# Sentinel parse mode that routes a queued/edited message through the Bot API +# 10.1 rich-text endpoints (sendRichMessage / editMessageText with rich_message) +# instead of the legacy send_message/edit_message_text + parse_mode path. +RICH_MARKDOWN_PARSE_MODE = "RichMarkdown" + + # Callback data prefixes CALLBACK_ACCEPT = "accept_" CALLBACK_REJECT = "reject_" diff --git a/dumpyarabot/handlers.py b/dumpyarabot/handlers.py index a69d391..b1dd48d 100644 --- a/dumpyarabot/handlers.py +++ b/dumpyarabot/handlers.py @@ -122,8 +122,10 @@ async def dump( initial_text += "*Elapsed:* 0s\n" initial_text += " *Worker:* Waiting for assignment...\n" - # Send initial message directly to get real Telegram message ID - initial_message = await message_queue.send_immediate_message( + # Send initial message directly to get real Telegram message ID. + # Uses the Bot API 10.1 rich-message endpoint so the whole /dump status + # message (this send plus every worker edit) renders as rich text. + initial_message = await message_queue.send_immediate_rich_message( chat_id=chat.id, text=initial_text, reply_to_message_id=None if use_privdump else message.message_id @@ -137,6 +139,8 @@ async def dump( enhanced_job_data = job.model_dump() # Store initial text so the worker can re-edit it during Telegram context verification enhanced_job_data["_queued_text"] = initial_text + # Flag the job so its status edits keep using the rich-message endpoints. + enhanced_job_data["_rich_status"] = True enhanced_job_data["metadata"] = { "telegram_context": { "chat_id": chat.id, diff --git a/dumpyarabot/message_queue.py b/dumpyarabot/message_queue.py index 3d6df9a..541cbbd 100644 --- a/dumpyarabot/message_queue.py +++ b/dumpyarabot/message_queue.py @@ -5,15 +5,17 @@ from enum import Enum from typing import Any, Dict, Optional, List +import httpx import redis.asyncio as redis from pydantic import BaseModel, Field, model_validator from rich.console import Console from telegram import Bot -from telegram.error import RetryAfter, TelegramError, NetworkError, BadRequest +from telegram.error import Forbidden, RetryAfter, TelegramError, NetworkError, BadRequest import telegram -from dumpyarabot.config import settings +from dumpyarabot.config import settings, RICH_MARKDOWN_PARSE_MODE from dumpyarabot.schemas import DumpArguments, DumpJob, JobCancelResult, JobProgress, JobStatus +from dumpyarabot.utils import legacy_markdown_to_rich_markdown console = Console() @@ -388,6 +390,120 @@ async def send_immediate_message( console.print(f"[green]Sent immediate message {message.message_id} to chat {chat_id}[/green]") return message + # ========== BOT API 10.1 RICH MESSAGE SUPPORT ========== + # python-telegram-bot (<=22.7, pinned <23.0) does not yet expose + # sendRichMessage / editMessageText(rich_message), so these are called via the + # raw Bot API. Telegram error responses are translated into the same + # telegram.error exceptions PTB raises, so the existing retry / dead-letter / + # "message is not modified" handling in _process_message works unchanged. + + def _telegram_api_base(self) -> str: + """Return the ``.../bot`` prefix for raw Bot API requests.""" + if settings.TELEGRAM_API_BASE_URL: + base = settings.TELEGRAM_API_BASE_URL.rstrip("/") + else: + base = "https://api.telegram.org" + return f"{base}/bot{settings.TELEGRAM_BOT_TOKEN}" + + async def _call_rich_api(self, method: str, payload: Dict[str, Any]) -> Dict[str, Any]: + """Call a raw Bot API method, returning ``result`` or raising a telegram error.""" + url = f"{self._telegram_api_base()}/{method}" + timeout = httpx.Timeout( + settings.TELEGRAM_TEXT_WRITE_TIMEOUT, + read=settings.TELEGRAM_TEXT_READ_TIMEOUT, + ) + try: + async with httpx.AsyncClient(timeout=timeout) as client: + response = await client.post(url, json=payload) + data = response.json() + except httpx.HTTPError as e: + raise NetworkError(f"{method} transport error: {e}") from e + except ValueError as e: # non-JSON body + raise TelegramError(f"{method} returned a non-JSON response: {e}") from e + + if data.get("ok"): + return data.get("result", {}) + + error_code = data.get("error_code") + description = data.get("description", "unknown error") + if error_code == 429: + retry_after = int((data.get("parameters") or {}).get("retry_after", 1)) + raise RetryAfter(retry_after) + if error_code == 403: + raise Forbidden(description) + if error_code == 400: + raise BadRequest(description) + raise TelegramError(f"{method} failed ({error_code}): {description}") + + @staticmethod + def _build_input_rich_message(text: str) -> Dict[str, Any]: + """Wrap legacy-Markdown ``text`` into an InputRichMessage payload.""" + return {"markdown": legacy_markdown_to_rich_markdown(text)} + + async def send_immediate_rich_message( + self, + chat_id: int, + text: str, + reply_to_message_id: Optional[int] = None, + ) -> "telegram.Message": + """Send a rich message directly via sendRichMessage and return the Message. + + Mirrors send_immediate_message but uses the Bot API 10.1 rich-text endpoint + so the returned message_id can be edited later with editMessageText(rich_message). + """ + bot = await self._ensure_bot() + + payload: Dict[str, Any] = { + "chat_id": chat_id, + "rich_message": self._build_input_rich_message(text), + } + if reply_to_message_id: + payload["reply_parameters"] = {"message_id": reply_to_message_id} + + console.print(f"[blue]Sending immediate rich message to chat {chat_id}[/blue]") + result = await self._call_rich_api("sendRichMessage", payload) + message = telegram.Message.de_json(result, bot) + console.print(f"[green]Sent immediate rich message {message.message_id} to chat {chat_id}[/green]") + return message + + async def _process_rich_message(self, message: "QueuedMessage", text: str) -> None: + """Dispatch a queued message through the rich-text endpoints. + + Raises the same telegram.error exceptions as the legacy path, so the + caller's retry / dead-letter / no-op-edit handling is reused verbatim. + """ + rich_message = self._build_input_rich_message(text) + + if message.edit_message_id: + await self._call_rich_api( + "editMessageText", + { + "chat_id": message.chat_id, + "message_id": message.edit_message_id, + "rich_message": rich_message, + }, + ) + return + + payload: Dict[str, Any] = { + "chat_id": message.chat_id, + "rich_message": rich_message, + } + if message.reply_parameters: + payload["reply_parameters"] = { + "message_id": message.reply_parameters["message_id"], + "chat_id": message.reply_parameters["chat_id"], + } + elif message.reply_to_message_id: + payload["reply_parameters"] = {"message_id": message.reply_to_message_id} + + result = await self._call_rich_api("sendRichMessage", payload) + + if message.delete_after: + asyncio.create_task( + self._auto_delete_message(message.chat_id, result["message_id"], message.delete_after) + ) + async def send_immediate_status_update( self, chat_id: int, @@ -607,6 +723,13 @@ async def _process_message(self, message: QueuedMessage) -> bool: if latest: text = latest + # Bot API 10.1 rich messages take a separate code path: the text is + # delivered as an InputRichMessage rather than text + parse_mode. + if message.parse_mode == RICH_MARKDOWN_PARSE_MODE: + await self._process_rich_message(message, text) + console.print(f"[green]Successfully processed {message.type.value} message[/green]") + return True + # Prepare common parameters kwargs = { "chat_id": message.chat_id, @@ -882,13 +1005,25 @@ async def verify_telegram_context(self, job_data: Dict[str, Any]) -> None: probe_text = job_data.get("_queued_text", f"\u23f3 Job `{job_id}` starting...") try: - await bot.edit_message_text( - chat_id=initial_chat_id, - message_id=initial_message_id, - text=probe_text, - parse_mode=settings.DEFAULT_PARSE_MODE, - disable_web_page_preview=True, - ) + if job_data.get("_rich_status"): + # The status message was sent via sendRichMessage, so it must be + # probed/edited via editMessageText(rich_message) to stay rich. + await self._call_rich_api( + "editMessageText", + { + "chat_id": initial_chat_id, + "message_id": initial_message_id, + "rich_message": self._build_input_rich_message(probe_text), + }, + ) + else: + await bot.edit_message_text( + chat_id=initial_chat_id, + message_id=initial_message_id, + text=probe_text, + parse_mode=settings.DEFAULT_PARSE_MODE, + disable_web_page_preview=True, + ) except Forbidden as e: raise RuntimeError(f"Telegram context invalid (bot blocked/forbidden): {e}") from e except BadRequest as e: diff --git a/dumpyarabot/utils.py b/dumpyarabot/utils.py index 7e59482..6f03c77 100644 --- a/dumpyarabot/utils.py +++ b/dumpyarabot/utils.py @@ -1,3 +1,4 @@ +import re import secrets import asyncio from datetime import datetime @@ -80,3 +81,53 @@ def escape_markdown(text: str) -> str: def generate_request_id() -> str: """Generate a unique request ID.""" return secrets.token_hex(4) # 8-character hex string + + +# Sentinels used to shield code spans, fenced blocks and backslash escapes while +# rewriting emphasis markers. Control characters won't occur in our status text. +_RICH_STASH_OPEN = "\x00" +_RICH_STASH_CLOSE = "\x01" +_RICH_STASH_RE = re.compile(r"\x00(\d+)\x01") +_RICH_PROTECT_RE = re.compile( + r"```.*?```" # fenced code block (pre) + r"|`[^`]*`" # inline code span + r"|\\.", # backslash escape, e.g. \* \_ \[ \` + re.DOTALL, +) + + +def legacy_markdown_to_rich_markdown(text: str) -> str: + """Convert Telegram legacy ``Markdown`` text into Bot API 10.1 Rich Markdown. + + The bot builds every status string in legacy ``Markdown`` (``*bold*``, + ``_italic_``, ``code spans`` and ``[links](url)``). Rich Markdown reuses the + same syntax with one breaking difference: a single ``*x*`` now means *italic*, + while **bold** requires ``**x**``. Italic (``_x_``), code spans, fenced blocks, + links and backslash escapes are identical in both dialects, so the only + rewrite needed is doubling the bold asterisks. + + In valid legacy Markdown every literal asterisk is backslash-escaped (``\\*``), + so once escapes, code spans and fenced blocks are shielded, every remaining + ``*`` is a bold delimiter and can be safely doubled. + + Args: + text: A string formatted for the legacy ``Markdown`` parse mode. + + Returns: + The equivalent string formatted for the Rich Markdown parse mode. + """ + if not text: + return text + + stash: List[str] = [] + + def _protect(match: "re.Match[str]") -> str: + stash.append(match.group(0)) + return f"{_RICH_STASH_OPEN}{len(stash) - 1}{_RICH_STASH_CLOSE}" + + shielded = _RICH_PROTECT_RE.sub(_protect, text) + + # Every surviving asterisk delimits legacy bold; ``**`` is Rich Markdown bold. + shielded = shielded.replace("*", "**") + + return _RICH_STASH_RE.sub(lambda m: stash[int(m.group(1))], shielded) diff --git a/tests/test_rich_markdown.py b/tests/test_rich_markdown.py new file mode 100644 index 0000000..e4224e0 --- /dev/null +++ b/tests/test_rich_markdown.py @@ -0,0 +1,75 @@ +"""Tests for the legacy-Markdown -> Bot API 10.1 Rich Markdown converter. + +The bot builds every /dump status string in Telegram's legacy ``Markdown`` +dialect, where ``*x*`` means bold. Rich Markdown reuses the same syntax but +reads ``*x*`` as *italic* and needs ``**x**`` for bold. The converter must flip +exactly the bold markers while leaving italics, code spans, fenced blocks, +links and backslash escapes untouched. +""" + +from dumpyarabot.message_formatting import format_comprehensive_progress_message +from dumpyarabot.utils import legacy_markdown_to_rich_markdown as to_rich + + +def test_bold_markers_are_doubled(): + assert to_rich("*Job ID:* done") == "**Job ID:** done" + + +def test_multiple_bold_spans_on_one_line(): + assert to_rich("*URL:* `x` *Options:* a") == "**URL:** `x` **Options:** a" + + +def test_italic_underscores_are_preserved(): + # ``_x_`` is italic in both dialects and must not change. + assert to_rich("_italic_ and *bold*") == "_italic_ and **bold**" + + +def test_asterisks_inside_code_spans_are_not_touched(): + assert to_rich("`a*b*c` *bold*") == "`a*b*c` **bold**" + + +def test_escaped_asterisk_stays_a_literal_and_is_not_doubled(): + # ``\*`` is a literal asterisk in legacy Markdown; it stays escaped in Rich + # Markdown and must NOT be turned into a bold delimiter. + assert to_rich(r"a \* b *bold*") == r"a \* b **bold**" + + +def test_escaped_underscore_inside_a_bare_url_is_preserved(): + src = r"*Repository:* https://example.com/tree/lagos\_g-user" + assert to_rich(src) == r"**Repository:** https://example.com/tree/lagos\_g-user" + + +def test_fenced_code_block_is_left_intact(): + src = "```\n*not bold*\n``` *bold*" + assert to_rich(src) == "```\n*not bold*\n``` **bold**" + + +def test_links_are_preserved(): + assert to_rich("[repo](https://t.me/) *bold*") == "[repo](https://t.me/) **bold**" + + +def test_empty_and_plain_text(): + assert to_rich("") == "" + assert to_rich("no markup here") == "no markup here" + + +async def test_converted_progress_message_has_only_double_asterisk_bold(): + """A real status message must contain no lone ``*`` bold delimiters once + converted (every bold becomes ``**``), so it renders bold under Rich Markdown.""" + job_data = { + "job_id": "abc123", + "dump_args": {"url": "https://example.com/fw.zip", "use_alt_dumper": False}, + "worker_id": "arq_worker", + } + progress = {"percentage": 45, "current_step_number": 4, "total_steps": 8} + + legacy = await format_comprehensive_progress_message(job_data, "Downloading", progress) + rich = to_rich(legacy) + + # No single-asterisk bold pairs should survive: every '*' must be part of '**'. + # Strip code spans first (asterisks there are literal and allowed). + import re + stripped = re.sub(r"`[^`]*`", "", rich) + stripped = re.sub(r"\\.", "", stripped) + assert "**" in stripped # bold labels are present + assert "*" not in stripped.replace("**", "") # nothing but doubled asterisks From 356a1c6cd54f7a4230e7ba38faf17467d3699783 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 29 Jun 2026 18:59:19 +0000 Subject: [PATCH 2/2] chore(deps): bump python-telegram-bot 22.7 -> 22.8 Moves to the latest PTB within the existing >=22.7,<23.0 pin, bringing Bot API 10.0 typed support to the rest of the bot. Note: this does NOT remove the raw Bot API calls used for the /dump rich-message migration. Rich Messages are a Bot API 10.1 feature and no released PTB exposes them yet (22.8 only reaches Bot API 10.0; upstream support is tracked in python-telegram-bot#5261). The raw sendRichMessage / editMessageText(rich_message) calls remain until PTB ships 10.1, with a TODO marking the swap. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_015VNyKjukMtUni9gjcKoeLN --- dumpyarabot/message_queue.py | 13 +++++++++---- uv.lock | 6 +++--- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/dumpyarabot/message_queue.py b/dumpyarabot/message_queue.py index 541cbbd..cb7523c 100644 --- a/dumpyarabot/message_queue.py +++ b/dumpyarabot/message_queue.py @@ -391,10 +391,15 @@ async def send_immediate_message( return message # ========== BOT API 10.1 RICH MESSAGE SUPPORT ========== - # python-telegram-bot (<=22.7, pinned <23.0) does not yet expose - # sendRichMessage / editMessageText(rich_message), so these are called via the - # raw Bot API. Telegram error responses are translated into the same - # telegram.error exceptions PTB raises, so the existing retry / dead-letter / + # Rich Messages landed in Bot API 10.1, but no released python-telegram-bot + # exposes them yet: 22.8 (latest, the version pinned here) only reaches Bot + # API 10.0, and upstream support is still open + # (https://github.com/python-telegram-bot/python-telegram-bot/issues/5261). + # Until a PTB release ships sendRichMessage / editMessageText(rich_message) / + # InputRichMessage, they are called here via the raw Bot API. + # TODO: replace _call_rich_api with PTB's typed methods once PTB supports 10.1. + # Telegram error responses are translated into the same telegram.error + # exceptions PTB raises, so the existing retry / dead-letter / # "message is not modified" handling in _process_message works unchanged. def _telegram_api_base(self) -> str: diff --git a/uv.lock b/uv.lock index 7b6cc87..6c41720 100644 --- a/uv.lock +++ b/uv.lock @@ -1271,15 +1271,15 @@ wheels = [ [[package]] name = "python-telegram-bot" -version = "22.7" +version = "22.8" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "httpcore", marker = "python_full_version >= '3.14'" }, { name = "httpx" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/e4/25/2258161b1069e66d6c39c0a602dbe57461d4767dc0012539970ea40bc9d6/python_telegram_bot-22.7.tar.gz", hash = "sha256:784b59ea3852fe4616ad63b4a0264c755637f5d725e87755ecdee28300febf61", size = 1516454, upload-time = "2026-03-16T09:36:03.174Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ba/77/153517bb1ac1bba670c6fb1dbf09e1fd0730494b1705934e715391413a0d/python_telegram_bot-22.8.tar.gz", hash = "sha256:f9d3847fcb23ee603477e442800b33bb4adf851a73e0619d2050be879decf1ef", size = 1551700, upload-time = "2026-06-12T08:10:29.1Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/94/f7/0e2f89dd62f45d46d4ea0d8aec5893ce5b37389638db010c117f46f11450/python_telegram_bot-22.7-py3-none-any.whl", hash = "sha256:d72eed532cf763758cd9331b57a6d790aff0bb4d37d8f4e92149436fe21c6475", size = 745365, upload-time = "2026-03-16T09:36:01.498Z" }, + { url = "https://files.pythonhosted.org/packages/60/7c/ed7d4dd94280bd434173cae9f7a7aedaaab9af128ae4f494423a5687c820/python_telegram_bot-22.8-py3-none-any.whl", hash = "sha256:42373918097f1b837cc4e717d588c19ea79651497ec712bb5b0c76e5e63c50e1", size = 769397, upload-time = "2026-06-12T08:10:27.066Z" }, ] [package.optional-dependencies]