from __future__ import annotations

import base64
import json
import os
import time
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Callable
from urllib.parse import urlsplit, urlunsplit
from urllib.error import HTTPError, URLError
from urllib.request import Request, urlopen

from packages.pet_package_schema import DEFAULT_FRAMES_PER_ACTION


class QwenConfigError(RuntimeError):
    pass


class QwenRequestError(RuntimeError):
    pass


@dataclass
class QwenConfig:
    api_key: str
    base_url: str
    model: str

    @property
    def masked_api_key(self) -> str:
        return mask_secret(self.api_key)

    def __repr__(self) -> str:
        return (
            "QwenConfig("
            f"api_key={self.masked_api_key!r}, "
            f"base_url={self.base_url!r}, "
            f"model={self.model!r})"
        )


def mask_secret(value: str) -> str:
    if len(value) < 12:
        return "***"
    return f"{value[:4]}...{value[-4:]}"


def load_qwen_config(
    env_path: str | Path = ".env.local",
    *,
    model_keys: tuple[str, ...] | None = None,
) -> QwenConfig:
    env_values = _read_env_file(Path(env_path))
    merged = {**env_values, **{key: value for key, value in os.environ.items() if key.startswith("QWEN_")}}
    preferred_model_keys = model_keys or ("QWEN_MODEL_REVIEW", "QWEN_MODEL_VISION", "QWEN_MODEL_DEFAULT")

    api_key = merged.get("QWEN_API_KEY", "").strip()
    base_url = merged.get("QWEN_BASE_URL", "").strip().rstrip("/")
    model = next((merged.get(key, "").strip() for key in preferred_model_keys if merged.get(key, "").strip()), "")

    if not api_key or not base_url or not model:
        raise QwenConfigError(
            "Qwen config is incomplete. Set QWEN_API_KEY, QWEN_BASE_URL, and one model key from: "
            + ", ".join(preferred_model_keys)
            + "."
        )
    return QwenConfig(api_key=api_key, base_url=base_url, model=model)


class QwenReviewClient:
    def __init__(
        self,
        *,
        api_key: str,
        base_url: str,
        model: str,
        opener: Callable[..., Any] = urlopen,
    ) -> None:
        self.api_key = api_key
        self.base_url = base_url.rstrip("/")
        self.model = model
        self.opener = opener

    @classmethod
    def from_env(cls, env_path: str | Path = ".env.local") -> "QwenReviewClient":
        config = load_qwen_config(
            env_path,
            model_keys=("QWEN_MODEL_REVIEW", "QWEN_MODEL_DEFAULT", "QWEN_MODEL_VISION"),
        )
        return cls(api_key=config.api_key, base_url=config.base_url, model=config.model)

    def review_pet_package(
        self,
        *,
        pet_name: str,
        description: str,
        image_bytes: bytes,
        image_mime: str,
    ) -> dict[str, Any]:
        data_url = _to_data_url(image_bytes, image_mime)
        payload = {
            "model": self.model,
            "messages": [
                {
                    "role": "system",
                    "content": (
                        "You review a memorial desktop pet asset. Return compact JSON only. "
                        "Assess high-fidelity likeness to the authorized pet reference, recognizable pet traits, "
                        "calm low-disturbance desktop use, safe preview framing, and whether the asset avoids "
                        "pixelization, blocky downsampling, therapy, revival, chat, or productivity positioning. "
                        "The website shell may use pixel styling, but generated desktop pet assets must not be pixelized."
                    ),
                },
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": (
                                "Review this generated pet asset. "
                                f"Pet display name: {pet_name}. "
                                f"Package description: {description}. "
                                "The generated desktop pet asset must preserve high-fidelity likeness and must not be pixelized. "
                                "Return JSON with keys score (0-1), passed (boolean), notes (short), risks (array)."
                            ),
                        },
                        {"type": "image_url", "image_url": {"url": data_url}},
                    ],
                },
            ],
            "temperature": 0,
        }
        raw = self._post_chat_completions(payload)
        content = raw.get("choices", [{}])[0].get("message", {}).get("content", "")
        parsed_review = _parse_review_content(content)

        return {
            "status": "ok",
            "provider": "qwen",
            "model": self.model,
            "response_id": raw.get("id"),
            "review": parsed_review,
        }

    def review_action_frames(
        self,
        *,
        pet_name: str,
        action: str,
        image_bytes: bytes,
        image_mime: str,
    ) -> dict[str, Any]:
        data_url = _to_data_url(image_bytes, image_mime)
        action_requirements = _action_visual_review_requirements(action)
        payload = {
            "model": self.model,
            "messages": [
                {
                    "role": "system",
                    "content": (
                        "You review one generated action row for a memorial desktop pet. Return compact JSON only. "
                        "Check same pet identity, recognizable pet traits, full-body visibility, clean background removal readiness, "
                        "and action semantics. Reject if the row changes species, becomes a simplified unrelated animal, "
                        "contains cropped or broken bodies, or does not visibly perform the requested action."
                    ),
                },
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": (
                                "Please review one generated action row. "
                                f"Pet display name: {pet_name}. "
                                f"Action to verify: {action}. "
                                f"The {DEFAULT_FRAMES_PER_ACTION} frames must look like the same pet identity and must visibly match the action semantics. "
                                f"{action_requirements} "
                                "Return JSON with keys score (0-1), passed (boolean), action_ok (boolean), identity_ok (boolean), "
                                f"notes (short), risks (array). For walk, also return front_paw_sequence ({DEFAULT_FRAMES_PER_ACTION} short strings), "
                                f"hind_paw_sequence ({DEFAULT_FRAMES_PER_ACTION} short strings), gait_cycle_ok (boolean), two_frame_loop_ok (boolean), "
                                "and bad_frames (array of 1-based frame numbers)."
                            ),
                        },
                        {"type": "image_url", "image_url": {"url": data_url}},
                    ],
                },
            ],
            "temperature": 0,
        }
        raw = self._post_chat_completions(payload)
        content = raw.get("choices", [{}])[0].get("message", {}).get("content", "")
        parsed_review = _parse_review_content(content)
        return {
            "status": "ok",
            "provider": "qwen",
            "model": self.model,
            "response_id": raw.get("id"),
            "review": parsed_review,
        }

    def review_contact_sheet(
        self,
        *,
        pet_name: str,
        image_bytes: bytes,
        image_mime: str,
    ) -> dict[str, Any]:
        data_url = _to_data_url(image_bytes, image_mime)
        payload = {
            "model": self.model,
            "messages": [
                {
                    "role": "system",
                    "content": (
                        "You review the final 6-row contact sheet for a memorial desktop pet. Return compact JSON only. "
                        "Reject packages with wrong-facing frames or action rows that do not match hard required labels. "
                        "Tail_wag may be a subtle low-disturbance row; do not reject solely because tail_wag resembles idle or look."
                    ),
                },
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": (
                                "Please review the final 6-row contact sheet before installation. "
                                f"Pet display name: {pet_name}. "
                                "Rows are fixed: Row 1 idle, Row 2 sleep, Row 3 walk, Row 4 look, Row 5 sit, "
                                "Row 6 tail_wag. "
                                f"Row 2 sleep: all {DEFAULT_FRAMES_PER_ACTION} frames must keep the same sleeping direction/orientation; reject any flipped, "
                                "opposite-facing, upright, or non-sleeping frame. "
                                "Row 3 walk: this project intentionally uses a two-frame walk loop. "
                                f"All {DEFAULT_FRAMES_PER_ACTION} frames must face and walk right in side view; reject any left-facing, front-facing, sitting, "
                                "or loafing frame. Row 3 frames 1 and 2 must be two distinct walking key poses; "
                                f"Row 3 frames 3 through {DEFAULT_FRAMES_PER_ACTION} intentionally repeat Row 3 frames 1 and 2 for looping. "
                                "Reject if frames 1 and 2 are the same paw stance or only body translation. "
                                "Row 4 look should use a stable body with head/eye direction changes. "
                                "Row 6 tail_wag may be subtle; a small visible tail or rear-body variation is acceptable for a calm desktop pet. "
                                "Do not reject solely because tail_wag resembles idle or look when identity, full-body visibility, "
                                "walk direction, and sleep direction are acceptable. "
                                "Also check same pet identity, full-body visibility, calm low-disturbance tone, and no text/scenery. "
                                "Return JSON with keys score (0-1), passed (boolean), identity_ok (boolean), "
                                "row_semantics_ok (boolean), direction_ok (boolean), row_distinct_ok (boolean), "
                                "gait_cycle_ok (boolean for Row 3 walk), two_frame_loop_ok (boolean for Row 3 walk), "
                                "repair_actions (array of action names needing regeneration), "
                                "notes (short), risks (array)."
                            ),
                        },
                        {"type": "image_url", "image_url": {"url": data_url}},
                    ],
                },
            ],
            "temperature": 0,
        }
        raw = self._post_chat_completions(payload)
        content = raw.get("choices", [{}])[0].get("message", {}).get("content", "")
        parsed_review = _parse_review_content(content)
        return {
            "status": "ok",
            "provider": "qwen",
            "model": self.model,
            "response_id": raw.get("id"),
            "review": parsed_review,
        }

    def _post_chat_completions(self, payload: dict[str, Any]) -> dict[str, Any]:
        body = json.dumps(payload).encode("utf-8")
        request = Request(
            _chat_completions_url(self.base_url),
            data=body,
            method="POST",
            headers={
                "Authorization": f"Bearer {self.api_key}",
                "Content-Type": "application/json",
            },
        )
        try:
            with self.opener(request, timeout=60) as response:
                return json.loads(response.read().decode("utf-8"))
        except HTTPError as exc:
            raise QwenRequestError(f"Qwen request failed with HTTP {exc.code}") from exc
        except URLError as exc:
            raise QwenRequestError("Qwen request failed before receiving a response") from exc
        except json.JSONDecodeError as exc:
            raise QwenRequestError("Qwen response was not valid JSON") from exc


def _read_env_file(path: Path) -> dict[str, str]:
    if not path.exists():
        return {}
    values: dict[str, str] = {}
    for raw_line in path.read_text(encoding="utf-8").splitlines():
        line = raw_line.strip()
        if not line or line.startswith("#") or "=" not in line:
            continue
        key, value = line.split("=", 1)
        values[key.strip()] = value.strip().strip('"').strip("'")
    return values


def _env_int(values: dict[str, str], key: str, default: int) -> int:
    try:
        return int(str(values.get(key, default)).strip())
    except (TypeError, ValueError):
        return default


def _env_float(values: dict[str, str], key: str, default: float) -> float:
    try:
        return float(str(values.get(key, default)).strip())
    except (TypeError, ValueError):
        return default


def _retry_delay_seconds(exc: Exception, attempt: int, retry_base_seconds: float) -> float:
    retry_after = None
    headers = getattr(exc, "headers", None)
    if headers is not None:
        try:
            retry_after = headers.get("Retry-After")
        except AttributeError:
            retry_after = None
    if retry_after is not None:
        try:
            return max(0.0, min(float(str(retry_after).strip()), 300.0))
        except ValueError:
            pass
    return min(max(retry_base_seconds, 0.0) * (attempt + 1), 300.0)


class QwenPhotoAnalyzer:
    def __init__(
        self,
        *,
        api_key: str,
        base_url: str,
        model: str,
        opener: Callable[..., Any] = urlopen,
    ) -> None:
        self.api_key = api_key
        self.base_url = base_url.rstrip("/")
        self.model = model
        self.opener = opener

    @classmethod
    def from_env(cls, env_path: str | Path = ".env.local") -> "QwenPhotoAnalyzer":
        config = load_qwen_config(
            env_path,
            model_keys=("QWEN_MODEL_VISION", "QWEN_MODEL_DEFAULT", "QWEN_MODEL_REVIEW"),
        )
        return cls(api_key=config.api_key, base_url=config.base_url, model=config.model)

    def analyze_pet_photos(self, *, pet_name: str, notes: str, images: list[dict[str, Any]]) -> dict[str, Any]:
        content: list[dict[str, Any]] = [
            {
                "type": "text",
                "text": (
                    "Analyze these authorized pet reference images for a memorial desktop pet generator. "
                    "Return compact JSON only with keys: species, base_color, accent_color, eye_color, "
                    "body_shape, distinctive_marks, confidence, safe_for_generation, user_visible_summary. "
                    "Do not include raw image details about people, home, location, file paths, or private memory text. "
                    f"Pet display name: {pet_name}. Optional owner notes for visual/action cues only: {notes[:240]}"
                ),
            }
        ]
        for image in images:
            content.append(
                {
                    "type": "image_url",
                    "image_url": {"url": _to_data_url(image["bytes"], image["mime"])},
                }
            )

        payload = {
            "model": self.model,
            "messages": [
                {
                    "role": "system",
                    "content": (
                        "You extract safe visual traits for a high-fidelity, low-disturbance memorial "
                        "desktop pet asset. "
                        "Preserve natural proportions, coat texture, markings, eye color, and overall body type; "
                        "do not recommend pixel art, pixelization, or blocky downsampling. "
                        "Never frame the pet as revived, conscious, therapeutic, chatty, or productivity-related."
                    ),
                },
                {"role": "user", "content": content},
            ],
            "temperature": 0,
        }
        raw = self._post_chat_completions(payload)
        content_text = raw.get("choices", [{}])[0].get("message", {}).get("content", "")
        parsed = _parse_review_content(content_text)
        return _normalize_photo_analysis(parsed)

    def _post_chat_completions(self, payload: dict[str, Any]) -> dict[str, Any]:
        body = json.dumps(payload).encode("utf-8")
        request = Request(
            _chat_completions_url(self.base_url),
            data=body,
            method="POST",
            headers={
                "Authorization": f"Bearer {self.api_key}",
                "Content-Type": "application/json",
            },
        )
        try:
            with self.opener(request, timeout=60) as response:
                return json.loads(response.read().decode("utf-8"))
        except HTTPError as exc:
            raise QwenRequestError(f"Qwen request failed with HTTP {exc.code}") from exc
        except URLError as exc:
            raise QwenRequestError("Qwen request failed before receiving a response") from exc
        except json.JSONDecodeError as exc:
            raise QwenRequestError("Qwen response was not valid JSON") from exc


def _chat_completions_url(base_url: str) -> str:
    clean = base_url.rstrip("/")
    if clean.endswith("/chat/completions"):
        return clean
    return f"{clean}/chat/completions"


class QwenImageGenerationClient:
    provider = "qwen"

    def __init__(
        self,
        *,
        api_key: str,
        base_url: str,
        model: str,
        edit_model: str | None = None,
        opener: Callable[..., Any] = urlopen,
        sleep: Callable[[float], Any] = time.sleep,
        max_retries: int = 4,
        retry_base_seconds: float = 30.0,
        request_delay_seconds: float = 0.0,
        timeout_seconds: float = 180.0,
        canonical_reference_limit: int = 3,
        action_reference_limit: int = 1,
    ) -> None:
        self.api_key = api_key
        self.base_url = base_url.rstrip("/")
        self.model = model
        self.edit_model = edit_model or "qwen-image-edit"
        self.opener = opener
        self.sleep = sleep
        self.max_retries = max(0, int(max_retries))
        self.retry_base_seconds = max(0.0, float(retry_base_seconds))
        self.request_delay_seconds = max(0.0, float(request_delay_seconds))
        self.timeout_seconds = max(10.0, float(timeout_seconds))
        self.canonical_reference_limit = max(1, min(int(canonical_reference_limit), 3))
        self.action_reference_limit = max(1, min(int(action_reference_limit), 1))

    @classmethod
    def from_env(
        cls,
        env_path: str | Path = ".env.local",
        *,
        opener: Callable[..., Any] = urlopen,
    ) -> "QwenImageGenerationClient":
        env_values = _read_env_file(Path(env_path))
        merged = {**env_values, **{key: value for key, value in os.environ.items() if key.startswith("QWEN_")}}
        config = load_qwen_config(
            env_path,
            model_keys=("QWEN_MODEL_IMAGE", "QWEN_MODEL_DEFAULT"),
        )
        edit_model = merged.get("QWEN_MODEL_IMAGE_EDIT") or "qwen-image-edit"
        return cls(
            api_key=config.api_key,
            base_url=config.base_url,
            model=config.model,
            edit_model=edit_model,
            opener=opener,
            max_retries=_env_int(merged, "QWEN_IMAGE_MAX_RETRIES", 4),
            retry_base_seconds=_env_float(merged, "QWEN_IMAGE_RETRY_BASE_SECONDS", 30.0),
            request_delay_seconds=_env_float(merged, "QWEN_IMAGE_REQUEST_DELAY_SECONDS", 0.0),
            timeout_seconds=_env_float(merged, "QWEN_IMAGE_TIMEOUT_SECONDS", 180.0),
            canonical_reference_limit=_env_int(merged, "QWEN_IMAGE_CANONICAL_REFERENCE_LIMIT", 3),
            action_reference_limit=_env_int(merged, "QWEN_IMAGE_ACTION_REFERENCE_LIMIT", 1),
        )

    def generate_pet_spritesheet(
        self,
        *,
        pet_name: str,
        notes: str,
        analysis: dict[str, Any],
        images: list[dict[str, Any]],
        size: str = "1024*1024",
    ) -> dict[str, Any]:
        if not images:
            raise ValueError("at least one image is required for Qwen image generation")
        _ensure_qwen_image_model(self.model)

        prompt = _build_pet_image_prompt(pet_name=pet_name, notes=notes, analysis=analysis)
        content = [{"image": _to_data_url(image["bytes"], image["mime"])} for image in images[: self.canonical_reference_limit]]
        content.append({"text": prompt})
        payload = {
            "model": self.model,
            "input": {
                "messages": [
                    {
                        "role": "user",
                        "content": content,
                    }
                ]
            },
            "parameters": {
                "n": 1,
                "size": size,
                "watermark": False,
                "prompt_extend": False,
                "negative_prompt": (
                    "pixel art, blocky pixels, 8-bit, text, logo, speech bubble, human, room background, "
                    "medical claim, therapy claim, resurrection, chatbot, productivity assistant, transparent background, "
                    "alpha transparency, checkerboard transparency, black cutout holes"
                ),
            },
        }
        raw = self._post_multimodal_generation(payload)
        if self.request_delay_seconds:
            self.sleep(self.request_delay_seconds)
        image_url = _extract_generated_image_url(raw)
        image_bytes = self._download_generated_image(image_url)
        return {
            "status": "ok",
            "provider": "qwen",
            "model": self.model,
            "request_id": raw.get("request_id"),
            "image_bytes": image_bytes,
            "image_mime": "image/png",
            "usage": raw.get("usage", {}),
            "prompt_sha256": _sha256_text(prompt),
        }

    def generate_canonical_pet(
        self,
        *,
        pet_name: str,
        notes: str,
        analysis: dict[str, Any],
        images: list[dict[str, Any]],
        size: str = "1024*1024",
    ) -> dict[str, Any]:
        if not images:
            raise ValueError("at least one image is required for Qwen image generation")
        _ensure_qwen_image_model(self.model)
        prompt = _build_canonical_pet_prompt(pet_name=pet_name, notes=notes, analysis=analysis)
        content = [{"image": _to_data_url(image["bytes"], image["mime"])} for image in images[: self.canonical_reference_limit]]
        content.append({"text": prompt})
        return self._generate_image(content=content, prompt=prompt, size=size)

    def generate_action_frames(
        self,
        *,
        action: str,
        pet_name: str,
        notes: str,
        analysis: dict[str, Any],
        images: list[dict[str, Any]],
        canonical_image_bytes: bytes,
        size: str = "1024*1024",
    ) -> dict[str, Any]:
        if not canonical_image_bytes:
            raise ValueError("canonical image is required for action generation")
        _ensure_qwen_image_model(self.edit_model)
        prompt = _build_action_frames_prompt(action=action, pet_name=pet_name, notes=notes, analysis=analysis)
        content = [{"image": _to_data_url(canonical_image_bytes, "image/png")}]
        content.append({"text": prompt})
        return self._generate_image(content=content, prompt=prompt, size=size, model=self.edit_model)

    def generate_action_strip(
        self,
        *,
        action: str,
        pet_name: str,
        notes: str,
        analysis: dict[str, Any],
        images: list[dict[str, Any]],
        canonical_image_bytes: bytes,
        layout_guide_image_bytes: bytes,
        size: str = "1280*720",
    ) -> dict[str, Any]:
        if not canonical_image_bytes:
            raise ValueError("canonical image is required for action generation")
        if not layout_guide_image_bytes:
            raise ValueError("layout guide image is required for action strip generation")
        _ensure_qwen_image_model(self.edit_model)
        prompt = _build_action_strip_prompt(action=action, pet_name=pet_name, notes=notes, analysis=analysis)
        content = [{"image": _to_data_url(layout_guide_image_bytes, "image/png")}]
        content.append({"image": _to_data_url(canonical_image_bytes, "image/png")})
        content.append({"text": prompt})
        return self._generate_image(content=content, prompt=prompt, size=size, model=self.edit_model)

    def generate_action_frame(
        self,
        *,
        action: str,
        frame_index: int,
        candidate_index: int = 0,
        pet_name: str,
        notes: str,
        analysis: dict[str, Any],
        images: list[dict[str, Any]],
        canonical_image_bytes: bytes,
        previous_frame_image_bytes: bytes | None = None,
        size: str = "1024*1024",
    ) -> dict[str, Any]:
        if not canonical_image_bytes:
            raise ValueError("canonical image is required for action generation")
        if frame_index < 0 or frame_index >= DEFAULT_FRAMES_PER_ACTION:
            raise ValueError(f"frame_index must be between 0 and {DEFAULT_FRAMES_PER_ACTION - 1}")
        _ensure_qwen_image_model(self.edit_model)
        attach_previous_frame = bool(previous_frame_image_bytes) and action != "walk"
        prompt = _build_action_frame_prompt(
            action=action,
            frame_index=frame_index,
            candidate_index=candidate_index,
            pet_name=pet_name,
            notes=notes,
            analysis=analysis,
            has_previous_frame=attach_previous_frame,
        )
        content: list[dict[str, str]] = []
        content.append({"image": _to_data_url(canonical_image_bytes, "image/png")})
        if attach_previous_frame:
            content.append({"image": _to_data_url(previous_frame_image_bytes, "image/png")})
        content.append({"text": prompt})
        return self._generate_image(content=content, prompt=prompt, size=size, model=self.edit_model)

    def generate_action_frame_minimal(
        self,
        *,
        action: str,
        frame_index: int,
        candidate_index: int = 0,
        pet_name: str,
        notes: str,
        analysis: dict[str, Any],
        canonical_image_bytes: bytes | None = None,
        size: str = "1024*1024",
    ) -> dict[str, Any]:
        if frame_index < 0 or frame_index >= DEFAULT_FRAMES_PER_ACTION:
            raise ValueError(f"frame_index must be between 0 and {DEFAULT_FRAMES_PER_ACTION - 1}")
        _ensure_qwen_image_model(self.edit_model)
        prompt = _build_minimal_action_frame_prompt(
            action=action,
            frame_index=frame_index,
            candidate_index=candidate_index,
            pet_name=pet_name,
            notes=notes,
            analysis=analysis,
        )
        content: list[dict[str, Any]] = []
        if canonical_image_bytes:
            content.append({"image": _to_data_url(canonical_image_bytes, "image/png")})
        content.append({"text": prompt})
        return self._generate_image(
            content=content,
            prompt=prompt,
            size=size,
            model=self.edit_model,
            negative_prompt=(
                "pixel art, text, logo, human, scenery, speech bubble, multiple copies, cropped body, "
                "transparent background, alpha transparency, checkerboard transparency, black cutout holes"
            ),
        )

    def _generate_image(
        self,
        *,
        content: list[dict[str, Any]],
        prompt: str,
        size: str,
        model: str | None = None,
        negative_prompt: str | None = None,
    ) -> dict[str, Any]:
        if negative_prompt is None:
            negative_prompt = (
                "pixel art, blocky pixels, 8-bit, text, logo, speech bubble, human, room background, "
                "medical claim, therapy claim, resurrection, chatbot, productivity assistant, duplicate identity drift, "
                "transparent background, alpha transparency, checkerboard transparency, transparent holes inside body, "
                "black cutout holes, missing fur patches, broken alpha mask"
            )
        model = model or self.model
        payload = {
            "model": model,
            "input": {"messages": [{"role": "user", "content": content}]},
            "parameters": {
                "n": 1,
                "size": size,
                "watermark": False,
                "prompt_extend": False,
                "negative_prompt": negative_prompt,
            },
        }
        raw = self._post_multimodal_generation(payload)
        if self.request_delay_seconds:
            self.sleep(self.request_delay_seconds)
        image_url = _extract_generated_image_url(raw)
        image_bytes = self._download_generated_image(image_url)
        return {
            "status": "ok",
            "provider": "qwen",
            "model": model,
            "request_id": raw.get("request_id"),
            "image_bytes": image_bytes,
            "image_mime": "image/png",
            "usage": raw.get("usage", {}),
            "prompt_sha256": _sha256_text(prompt),
        }

    def _post_multimodal_generation(self, payload: dict[str, Any]) -> dict[str, Any]:
        body = json.dumps(payload).encode("utf-8")
        request = Request(
            _multimodal_generation_url(self.base_url),
            data=body,
            method="POST",
            headers={
                "Authorization": f"Bearer {self.api_key}",
                "Content-Type": "application/json",
            },
        )
        for attempt in range(self.max_retries + 1):
            try:
                with self.opener(request, timeout=self.timeout_seconds) as response:
                    return json.loads(response.read().decode("utf-8"))
            except HTTPError as exc:
                if exc.code == 429 and attempt < self.max_retries:
                    self.sleep(_retry_delay_seconds(exc, attempt, self.retry_base_seconds))
                    continue
                detail = _http_error_detail(exc)
                raise QwenRequestError(f"Qwen image generation failed with HTTP {exc.code}{detail}") from exc
            except URLError as exc:
                if attempt < self.max_retries:
                    self.sleep(_retry_delay_seconds(exc, attempt, self.retry_base_seconds))
                    continue
                raise QwenRequestError("Qwen image generation failed before receiving a response") from exc
            except json.JSONDecodeError as exc:
                raise QwenRequestError("Qwen image generation response was not valid JSON") from exc
        raise QwenRequestError("Qwen image generation failed after retries")

    def _download_generated_image(self, image_url: str) -> bytes:
        try:
            with self.opener(image_url, timeout=self.timeout_seconds) as response:
                image_bytes = response.read()
        except HTTPError as exc:
            raise QwenRequestError(f"Qwen generated image download failed with HTTP {exc.code}") from exc
        except URLError as exc:
            raise QwenRequestError("Qwen generated image download failed before receiving a response") from exc
        if not image_bytes:
            raise QwenRequestError("Qwen generated image download was empty")
        return image_bytes


def _multimodal_generation_url(base_url: str) -> str:
    clean = base_url.rstrip("/")
    target_path = "/api/v1/services/aigc/multimodal-generation/generation"
    if clean.endswith(target_path):
        return clean
    parts = urlsplit(clean)
    if parts.scheme and parts.netloc:
        return urlunsplit((parts.scheme, parts.netloc, target_path, "", ""))
    return f"{clean}{target_path}"


def _action_visual_review_requirements(action: str) -> str:
    requirements = {
        "sleep": (
            f"Hard rules for sleep: all {DEFAULT_FRAMES_PER_ACTION} frames must be low lying, curled, or clearly sleeping; "
            "reject upright sitting, standing, walking, or any flipped opposite-facing sleep direction."
        ),
        "walk": (
            "Hard rules for walk: this project intentionally uses a two-frame walk loop. "
            f"All {DEFAULT_FRAMES_PER_ACTION} frames must face and walk right in side view; "
            f"frames 1 and 2 must be two distinct walking key poses, and frames 3 through {DEFAULT_FRAMES_PER_ACTION} intentionally repeat frames 1 and 2. "
            "reject any left-facing, front-facing, sitting, or loafing frame; "
            "reject if frames 1 and 2 are the same paw stance or only body translation. "
            f"Set two_frame_loop_ok=true when frames 1 and 2 form a readable two-pose walk cycle and frames 3 through {DEFAULT_FRAMES_PER_ACTION} correctly copy them. "
            "Set gait_cycle_ok=true when the two-frame loop is acceptable for a small desktop pet."
        ),
        "look": (
            f"Hard rules for look: keep one stable body orientation across all {DEFAULT_FRAMES_PER_ACTION} frames; "
            "only the head or eyes may move gently; reject full-body turns or body-direction changes within the row."
        ),
        "sit": (
            f"Hard rules for sit: all {DEFAULT_FRAMES_PER_ACTION} frames must remain seated with rear and hind legs on the ground; "
            "reject standing, walking, or four-paw support poses."
        ),
        "tail_wag": (
            "Hard rules for tail_wag: tail motion may be subtle, but the tail or rear-body variation must be visible; "
            "do not reject solely because the row is calm or low-disturbance."
        ),
    }
    return requirements.get(action, "Hard rules: preserve the same pet identity and reject wrong action semantics.")


def _http_error_detail(exc: HTTPError) -> str:
    try:
        raw = exc.read()
    except Exception:
        raw = b""
    if not raw:
        return ""
    try:
        text = raw.decode("utf-8", errors="replace")
    except Exception:
        text = repr(raw)
    text = " ".join(text.split())
    if not text:
        return ""
    return f": {text[:300]}"


def _extract_generated_image_url(raw: dict[str, Any]) -> str:
    if raw.get("code") or raw.get("message"):
        raise QwenRequestError(
            "Qwen image generation returned an error: "
            f"{str(raw.get('code', 'UnknownError'))[:80]} - {str(raw.get('message', ''))[:220]}"
        )
    output = raw.get("output", {})
    choices = output.get("choices", [])
    for choice in choices:
        content = choice.get("message", {}).get("content", [])
        if isinstance(content, dict):
            content = [content]
        if isinstance(content, list):
            for item in content:
                if isinstance(item, dict) and item.get("image"):
                    return str(item["image"])
    results = output.get("results", [])
    if isinstance(results, list):
        for item in results:
            if isinstance(item, dict) and item.get("url"):
                return str(item["url"])
    raise QwenRequestError("Qwen image generation response did not include an image URL")


def _ensure_qwen_image_model(model: str) -> None:
    if not model.startswith("qwen-image"):
        raise QwenConfigError(
            "QWEN_MODEL_IMAGE must be a Qwen image generation/edit model such as "
            "qwen-image-2.0-pro, qwen-image-2.0, qwen-image-max, or qwen-image-plus; "
            f"got {model!r}."
        )


CHROMA_KEY_BACKGROUND_INSTRUCTION = (
    "Use a flat pure magenta #FF00FF chroma-key background, not transparency. "
    "Do not generate transparent pixels anywhere in the output. "
    "The pet must have a fully opaque pet body with no transparent pixels, black holes, cutout holes, missing fur patches, or broken alpha mask. "
    "Keep #FF00FF and similar magenta colors completely out of the pet, fur, eyes, markings, highlights, shadows, and effects. "
)

ACTION_FRAME_BACKGROUND_INSTRUCTION = (
    "使用纯蓝 #0000FF 单色背景，不要白底，不要透明背景，不要棋盘格，不要场景、地面、阴影、文字、编号、边框或分割线。"
    "蓝色背景必须铺满整张图片直到四周边缘，不要画成蓝色卡片、蓝色方块或局部背景板。"
    "角色不要站在地面上，不要接触阴影、落地阴影、投影、彩色反光或暗色渐变。"
    "蓝色背景只用于后续本地抠图，宠物身体、毛发、眼睛、花纹和边缘不要被背景颜色污染。"
)

CANONICAL_BACKGROUND_INSTRUCTION = (
    "Use a plain white clean background, not transparency and not chroma-key color. "
    "Do not generate transparent pixels anywhere in the output. "
    "Avoid floor shadows, colored halos, glow, scenery, checkerboard, text, labels, borders, or props. "
    "The white background is only for later local background removal; keep the pet body, fur, eyes, markings, and edges clean. "
)


def _build_pet_image_prompt(*, pet_name: str, notes: str, analysis: dict[str, Any]) -> str:
    distinctive_marks = analysis.get("distinctive_marks", [])
    if not isinstance(distinctive_marks, list):
        distinctive_marks = [str(distinctive_marks)]
    marks = ", ".join(str(mark) for mark in distinctive_marks[:6]) or "authorized reference markings"
    safe_notes = " ".join(notes.strip().split())[:180]
    return (
        "Create a high-fidelity memorial desktop pet visual asset from the authorized reference photos. "
        "The website UI may be soft 2D pixel-inspired, but the generated desktop pet asset must not be pixel art, "
        "must not be blocky, and must preserve natural likeness. "
        "Generate one clean 2D desktop pet atlas preview. "
        f"{CHROMA_KEY_BACKGROUND_INSTRUCTION}"
        "Keep the same face, coat texture, fur markings, eye color, body proportions, and familiar gentle presence. "
        "Use low-disturbance memorial actions as design intent: idle, sleep, walk, look, sit, tail_wag. "
        "Do not add text, logos, humans, room scenery, speech bubbles, tears, halos, medical claims, therapy claims, resurrection framing, "
        "chatbot framing, or productivity-assistant elements. "
        f"Pet display name: {pet_name[:80]}. "
        f"Visual analysis: species={analysis.get('species', 'pet')}, base_color={analysis.get('base_color', 'unknown')}, "
        f"accent_color={analysis.get('accent_color', 'unknown')}, eye_color={analysis.get('eye_color', 'unknown')}, "
        f"body_shape={analysis.get('body_shape', 'unknown')}, distinctive_marks={marks}. "
        f"Owner-provided action cues only, not text to render: {safe_notes}."
    )


def _build_canonical_pet_prompt(*, pet_name: str, notes: str, analysis: dict[str, Any]) -> str:
    return (
        "Create one canonical high-fidelity 2D memorial desktop pet reference from the authorized raw pet photos. "
        "Generate exactly one zoomed-out full-body pet, centered, no background scenery. "
        "The whole body must fit comfortably inside the image with safe padding; all four paws and tail visible when anatomically present. "
        "Use a neutral standing or relaxed side-three-quarter full-body pose suitable as an identity reference for later animation; "
        "not a lying, sleeping, curled, or loafing pose. "
        "Do not create a headshot, bust, close-up portrait, cropped body, or half-body image. "
        f"{CANONICAL_BACKGROUND_INSTRUCTION}"
        "Preserve real likeness: face, coat texture, fur markings, eye color, body proportions, and calm visual identity. "
        "This is not pixel art; do not create blocky pixels or an 8-bit style. "
        "Do not add text, logos, humans, speech bubbles, halos, tears, medical claims, therapy claims, resurrection framing, "
        "chatbot framing, or productivity-assistant elements. "
        f"Pet display name: {pet_name[:80]}. "
        f"Visual traits: species={analysis.get('species', 'pet')}, base_color={analysis.get('base_color', 'unknown')}, "
        f"accent_color={analysis.get('accent_color', 'unknown')}, eye_color={analysis.get('eye_color', 'unknown')}, "
        f"body_shape={analysis.get('body_shape', 'unknown')}. "
        f"Owner action cues only, not text to render: {' '.join(notes.strip().split())[:160]}."
    )


def _build_action_frames_prompt(*, action: str, pet_name: str, notes: str, analysis: dict[str, Any]) -> str:
    action_guidance = {
        "idle": f"{DEFAULT_FRAMES_PER_ACTION} subtle idle poses with tiny breathing or head angle variation",
        "sleep": f"{DEFAULT_FRAMES_PER_ACTION} calm sleeping or drowsy poses, very low disturbance",
        "walk": f"{DEFAULT_FRAMES_PER_ACTION} gentle walking poses, side-oriented, no speed lines or dust",
        "look": f"{DEFAULT_FRAMES_PER_ACTION} calm looking poses with stable body direction and small head or eye changes, not talking",
        "sit": f"{DEFAULT_FRAMES_PER_ACTION} seated poses with small posture variation",
        "tail_wag": f"{DEFAULT_FRAMES_PER_ACTION} visible tail-motion poses where the tail is the main action and the body stays stable",
    }.get(action, f"{DEFAULT_FRAMES_PER_ACTION} low-disturbance poses")
    return (
        f"Using the canonical reference image as the only pet identity reference, generate exactly {DEFAULT_FRAMES_PER_ACTION} separated full-body desktop pet frames "
        f"for the action '{action}'. "
        f"Action meaning: {action_guidance}. "
        f"Place the {DEFAULT_FRAMES_PER_ACTION} frames in a single horizontal row, evenly spaced. "
        f"{CHROMA_KEY_BACKGROUND_INSTRUCTION}"
        "Keep the same pet identity in every frame: same face, fur markings, body proportions, eye color, and calm memorial tone. "
        "Do not change species, do not add text, logos, humans, scenery, detached effects, speed lines, speech bubbles, tears, halos, "
        "medical/therapy claims, resurrection framing, chatbot framing, or productivity-assistant elements. "
        "The result must be high-fidelity 2D desktop pet art, not pixel art and not blocky. "
        f"Pet display name: {pet_name[:80]}. "
        f"Owner action cues only, not text to render: {' '.join(notes.strip().split())[:140]}."
    )


def _build_action_strip_prompt(*, action: str, pet_name: str, notes: str, analysis: dict[str, Any]) -> str:
    action_prompt, requirements = _action_strip_requirements(action)
    requirement_lines = " ".join(f"- {line}" for line in requirements)
    return (
        f"Create one horizontal animation strip for memorial desktop pet `{pet_name[:80]}`, state `{action}`. "
        "Treat this request as exactly one action-row job in a hatch-pet-style production pipeline. "
        "Use the attached canonical base as the only pet identity reference: markings, body type, coat texture, face, eye color, "
        "tail visibility, and familiar expression must come from that canonical base. "
        "Use the attached layout guide only for slot count, spacing, centering, and padding. "
        "Do not copy any visible guide pixels, guide colors, borders, center marks, labels, or background into the result. "
        "Do not generate a single large animal portrait. Do not generate one close-up. "
        f"Output exactly {DEFAULT_FRAMES_PER_ACTION} separate full-body versions of the same pet in one left-to-right horizontal row. "
        f"{CHROMA_KEY_BACKGROUND_INSTRUCTION}"
        f"Treat the row as {DEFAULT_FRAMES_PER_ACTION} invisible equal-width slots: one centered complete pose per slot, evenly spaced, with no overlap, "
        f"clipping, empty slots, labels, borders, or merged bodies. Leave clear empty space between the {DEFAULT_FRAMES_PER_ACTION} pet copies. "
        f"Use a zoomed-out sprite-sheet camera so all {DEFAULT_FRAMES_PER_ACTION} complete bodies fit; never fill the canvas with one animal. "
        "Identity must stay the same in every frame: preserve the same face, coat texture, fur markings, body proportions, "
        "eye color, calm memorial tone, and high-fidelity 2D look from the canonical base. "
        "Animation continuity: keep apparent pet scale stable within the row and change pose within each slot. "
        f"State action: {action_prompt}. "
        f"State requirements: {requirement_lines} "
        "Clean extraction: crisp opaque pet edges, safe padding, no scenery, text, guide marks, checkerboard, shadows, glows, "
        "motion blur, speed lines, dust, detached effects, stray pixels, halos, speech bubbles, therapy claims, resurrection framing, "
        "chatbot framing, productivity-assistant framing, pixel art, blocky pixels, or 8-bit style. "
        f"Visual traits from analysis: species={analysis.get('species', 'pet')}, base_color={analysis.get('base_color', 'unknown')}, "
        f"accent_color={analysis.get('accent_color', 'unknown')}, eye_color={analysis.get('eye_color', 'unknown')}. "
        f"Owner action cues only, not text to render: {' '.join(notes.strip().split())[:120]}."
    )


def _build_action_frame_prompt(
    *,
    action: str,
    frame_index: int,
    candidate_index: int,
    pet_name: str,
    notes: str,
    analysis: dict[str, Any],
    has_previous_frame: bool,
) -> str:
    action_guidance = {
        "idle": [
            "拖动结束后的短暂停留第 1 帧：四脚站立静止，四只脚自然支撑，背部基本水平，臀部离地；不要坐姿，屁股不要贴地。",
            "拖动结束后的短暂停留第 2 帧：仍然四脚站立静止，只允许轻微呼吸、眨眼或尾尖小变化；不要坐姿，不要走路，屁股不要贴地。",
            "拖动结束后的短暂停留第 3 帧：四脚站立静止，身体只做很小的放松变化，臀部仍然离地；不要画成 sit 行的稳定坐姿。",
            "拖动结束后的短暂停留第 4 帧：回到接近第 1 帧的四脚站立静止，准备 20 秒后才会切换到 sit；不要坐姿，屁股不要贴地。",
        ],
        "sleep": [
            "同一睡姿方向的低伏侧躺睡觉，头部固定在画面右侧，身体在画面左侧，鼻尖朝画面右下或右侧；眼睛半闭或闭合，不是坐姿，不要翻转方向。",
            "同一睡姿方向，头部仍固定在画面右侧，身体仍在画面左侧，完全睡着，眼睛闭合；只允许轻微呼吸变化，不要翻转方向。",
            "同一睡姿方向，头部仍固定在画面右侧，头枕在前爪上，后腿和尾巴只有小变化，眼睛闭合；不要换到另一侧睡，不要翻转方向。",
            "同一睡姿方向，头部仍固定在画面右侧，回到接近第 1 帧的低伏侧躺睡姿，耳朵或前爪有小变化，准备循环；不要翻转方向。",
        ],
        "walk": [
            "右侧侧面正在往右走路。身体横向，头在身体右端，鼻尖、脸和前进方向都朝画面右侧，不要回头看左。画面右侧最前方那只可见前脚大幅向右前方伸出，另一只前脚竖直支撑在肩膀下方；画面左侧可见后脚向左后方伸出，另一只后脚在臀部下方支撑；四只脚尽量可见。",
            "右侧侧面正在往右走路。身体横向，头在身体右端，鼻尖、脸和前进方向都朝画面右侧，不要回头看左。第 1 帧向前伸出的前脚现在竖直落地支撑在肩膀下方；另一只前脚在胸口下方清楚抬离地面并向右前方摆动；后脚在身体下方交替，其中一只后脚也要抬离地面；脚位必须和第 1 帧不同。",
            "本帧由本地流程复制第 1 帧，不会请求模型生成。",
            "本帧由本地流程复制第 2 帧，不会请求模型生成。",
        ],
        "look": [
            "安静站立或坐着，保持同一身体方向，眼睛自然看前方。",
            "只轻微转动头部和视线，眼睛略微向上看，身体方向保持不变，四只脚保持原地不走路。",
            "只轻微转动头部和视线，眼睛略微向前或向下看，身体方向保持不变，不要整只猫转身。",
            "头部回到接近第 1 帧的自然观察姿态，保持同一身体方向。",
        ],
        "sit": [
            "同一个坐姿的第 1 帧：坐在地上，屁股和后腿贴地，前爪竖直在身体前方，绝对不是站立。",
            "同一个坐姿的第 2 帧：仍然坐在地上，屁股和后腿贴地，只允许眨眼、耳朵或头部有很小变化，前爪保持竖直。",
            "同一个坐姿的第 3 帧：仍然坐在地上，身体不要移动，不要站起来，只允许眨眼、耳朵或头部有很小变化。",
            "同一个坐姿的第 4 帧：回到接近第 1 帧的稳定坐姿，屁股和后腿仍然贴地，准备循环。",
        ],
        "tail_wag": [
            "同一侧面或侧后方站姿，身体尽量保持不变，尾巴清楚可见，尾巴尖放在一侧；轻微摆尾即可，不能用头动或身体动代替。",
            "同一侧面或侧后方站姿，身体尽量保持不变，尾巴从一侧轻微移动到中间或上方；尾巴有可见小变化即可。",
            "同一侧面或侧后方站姿，身体尽量保持不变，尾巴轻微摆到另一侧；尾巴有可见小变化即可。",
            "同一侧面或侧后方站姿，身体尽量保持不变，尾巴回到接近第 1 帧的一侧，准备循环；轻微变化即可。",
        ],
    }.get(action, ["low-disturbance pose"] * DEFAULT_FRAMES_PER_ACTION)
    phase = action_guidance[frame_index % len(action_guidance)]
    action_title = {
        "idle": "安静待机",
        "sleep": "蜷缩或侧躺睡觉",
        "walk": "右侧侧面正在往右走路",
        "look": "安静观察",
        "sit": "稳定坐姿",
        "tail_wag": "尾巴轻微摆动",
    }.get(action, action)
    if has_previous_frame and action == "walk" and frame_index == 2:
        previous_frame_line = "第 2 张参考图只用于保持画风、比例和角色连续性；当前姿态仍按本帧描述生成。 "
    elif has_previous_frame:
        previous_frame_line = "第 2 张参考图是同一动作上一帧，只用来保持画风、比例和角色连续性；当前姿态仍按本帧描述生成。 "
    else:
        previous_frame_line = ""
    action_constraint = {
        "idle": "idle 是拖动结束后到坐下前的过渡常驻，必须和 sit 明显不同：四脚站立静止，背部基本水平，臀部离地，20 秒后才会切换到 sit；不要坐姿，不要屁股贴地，不要画成正面稳定坐着。",
        "sleep": f"{DEFAULT_FRAMES_PER_ACTION} 帧都必须保持同一睡姿方向，只允许呼吸、耳朵、前爪的小变化；任何一帧都不要翻转方向，不要换成另一侧躺。",
        "walk": "本流程只向模型生成第 1、2 帧，后两帧由本地复制第 1、2 帧形成两帧循环。鼻尖和前进方向始终朝右，任何一帧都不能朝左；第 1、2 帧必须是两个不同的右行脚位，至少一只脚的位置明显不同；不能只平移身体，不能正面坐着，不能趴着不走。",
        "sit": f"{DEFAULT_FRAMES_PER_ACTION} 帧都必须保持同一个坐姿，只允许眨眼、耳朵或头部有很小变化；不要站立，不要四脚撑地走路，不要画成从站立到坐下的过渡。",
        "look": f"{DEFAULT_FRAMES_PER_ACTION} 帧都必须保持同一身体方向，只允许头部和眼神小幅变化；不要整只猫转身，不要在同一行动画里左右翻转。",
        "tail_wag": f"{DEFAULT_FRAMES_PER_ACTION} 帧保留同一只猫的身份一致性，尾巴清楚可见并有轻微位置变化即可；不要为了夸张摆尾而改变脸、体型、毛色或整体风格。",
    }.get(action, "")
    repair_constraint = ""
    if candidate_index > 0:
        repair_constraint = {
            "sleep": "这是修复候选：上一轮可能出现方向翻转或不像睡觉，本帧必须仍然低伏睡觉且保持同一睡姿方向。",
            "walk": "这是修复候选：上一轮可能出现第 1、2 帧脚步太像或没有抬脚，本帧必须侧面朝右走路，鼻尖和脚步方向都朝右；必须有一只脚清楚抬离地面或明显蹬地。",
            "look": "这是修复候选：上一轮可能出现整只猫转身或方向变化，本帧必须保持同一身体方向，只允许头部和眼神小幅变化。",
            "sit": "这是修复候选：上一轮可能出现站立或走路，本帧必须明确坐在地上，屁股和后腿贴地，四只脚不能形成站立支撑。",
            "tail_wag": "这是修复候选：上一轮可能尾巴没有变化，本帧让尾巴有轻微可见变化即可，身体尽量保持同一站姿，不要牺牲身份一致性。",
        }.get(action, "这是修复候选：本帧必须更清楚地表现指定动作。")
    final_repair_constraint = ""
    if candidate_index >= 10:
        final_repair_constraint = {
            "idle": "这是最终整表 QA 修复候选：必须明显不同于 sit；保持站立或半站立放松，不要坐姿，不要屁股贴地。",
            "sleep": f"这是最终整表 QA 修复候选：{DEFAULT_FRAMES_PER_ACTION} 帧睡觉必须保持同一方向，头部都在画面右侧，身体都在画面左侧；不要出现任何镜像、反向或坐姿帧。",
            "walk": "这是最终整表 QA 修复候选：只生成第 1、2 帧关键脚位，后两帧由本地复制；本帧必须右行，头必须在身体右端，鼻尖必须朝画面右侧；第 1、2 帧必须明显不同；不要出现任何朝左、回头看左、正面或坐趴帧。",
            "look": "这是最终整表 QA 修复候选：必须明显不同于 idle；本帧保持同一身体方向，只允许头部和眼神变化，不要整只猫转身。",
            "tail_wag": "这是最终整表 QA 修复候选：轻微尾巴变化也可以，保留同一只猫的脸、毛色、体型和站姿，不要牺牲身份一致性。",
        }.get(action, "这是最终整表 QA 修复候选：本帧必须更清楚地表现指定动作，并和其他动作行区分开。")
    return (
        f"基于第 1 张参考图，生成同一角色的{action_title}动画第 {frame_index + 1} / {DEFAULT_FRAMES_PER_ACTION} 帧。"
        f"{phase}"
        f"{action_constraint}"
        f"{repair_constraint}"
        f"{final_repair_constraint}"
        "保留主要外貌特征：脸、毛色、花纹、体型、眼睛颜色、真实或半写实毛发质感。"
        "不要换成另一只猫，不要改成卡通、线稿、矢量插画、贴纸、玩具或吉祥物。"
        "保持相同镜头距离、相同视觉体量和相近安全边距，不要让角色突然变大或变小。"
        f"{ACTION_FRAME_BACKGROUND_INSTRUCTION}"
        "只输出一个完整单帧，只出现一个角色；不要动作条、拼图、网格、多个角色、文字或编号。"
        "完整身体可见，居中，有安全边距。不要生成透明像素。"
        f"{previous_frame_line}"
        "不要场景、地面、阴影、速度线、特效、对白框、医疗/治疗/复活/聊天/效率工具元素。"
    )


def _build_minimal_action_frame_prompt(
    *,
    action: str,
    frame_index: int,
    candidate_index: int,
    pet_name: str,
    notes: str,
    analysis: dict[str, Any],
) -> str:
    action_guidance = {
        "idle": "安静待机，轻微呼吸起伏",
        "sleep": "蜷缩或侧躺睡觉，身体低伏",
        "walk": "右侧侧面正在往右走路，身体横向、面部朝右",
        "look": "安静观察，保持同一身体方向，只有头部和眼神轻微变化",
        "sit": "坐在地上的稳定坐姿，屁股和后腿贴地，前爪竖直，绝对不是站立",
        "tail_wag": "尾巴清楚可见，轻微摆动",
    }.get(action, "quiet low-disturbance pose")
    return (
        f"基于这张参考图，生成同一角色的{action_guidance}动画第 {frame_index + 1} / {DEFAULT_FRAMES_PER_ACTION} 帧。"
        "保留主要外貌特征和真实或半写实毛发质感。"
        "不要换成另一只猫，不要改成卡通、线稿、矢量插画、贴纸、玩具或吉祥物。"
        f"{ACTION_FRAME_BACKGROUND_INSTRUCTION}"
        "只输出一个完整单帧，只出现一个角色；不要文字、编号、动作条、拼图、网格、场景、地面、阴影或透明像素。"
    )


def _action_strip_requirements(action: str) -> tuple[str, list[str]]:
    requirements = {
        "idle": (
            "Calm low-distraction resting loop with subtle breathing, tiny blink, and slight head or body bob",
            [
                f"Keep the pet essentially in the same calm baseline pose across all {DEFAULT_FRAMES_PER_ACTION} frames.",
                "Do not show walking, running, jumping, talking, or emotional drama.",
            ],
        ),
        "sleep": (
            "Quiet sleeping loop with lying, curled, or clearly low sleeping body posture",
            [
                "Every frame must read as sleep: lying, curled, or low relaxed body, not sitting upright.",
                "Eyes should be closed or drowsy and the body should stay low with tiny breathing or ear/paw variation.",
            ],
        ),
        "walk": (
            "Gentle walking loop in side-oriented body posture",
            [
                "Every frame must read as walking, with side-oriented body and alternating paw positions.",
                "Do not reuse the canonical front-facing sitting or standing pose.",
                "No speed lines, dust, ground shadows, or scenery.",
            ],
        ),
        "look": (
            "Gentle looking-around loop with stable body direction and head or eye angle changes",
            [
                "Keep the same body orientation in every frame; only the head and eyes may shift gently.",
                "Do not turn the whole body around or change body direction within the row.",
                "Do not make the pet talk, emote dramatically, or add symbols.",
            ],
        ),
        "sit": (
            "Settled seated loop with small posture and head variation",
            [
                "A seated pose is allowed here, but frames must still vary gently.",
                "Preserve the same body proportions and markings.",
            ],
        ),
        "tail_wag": (
            "Visible tail small-motion loop, turning the body if needed so the tail can be seen",
            [
                "The tail must be visible in most frames unless the real animal has no visible tail.",
                "Show left, center, right, and return tail positions through pose only.",
                "Do not add motion marks or detached effects.",
            ],
        ),
    }
    return requirements.get(
        action,
        (
            "Low-disturbance memorial desktop pet action loop",
            [f"Preserve identity and generate {DEFAULT_FRAMES_PER_ACTION} clearly separated full-body frames."],
        ),
    )


def _sha256_text(value: str) -> str:
    import hashlib

    return hashlib.sha256(value.encode("utf-8")).hexdigest()


def _to_data_url(image_bytes: bytes, image_mime: str) -> str:
    encoded = base64.b64encode(image_bytes).decode("ascii")
    return f"data:{image_mime};base64,{encoded}"


def _parse_review_content(content: str) -> dict[str, Any]:
    content = _extract_json_text(content)
    try:
        parsed = json.loads(content)
    except json.JSONDecodeError:
        parsed = {"score": None, "passed": None, "notes": content[:240], "risks": ["non_json_review"]}
    if not isinstance(parsed, dict):
        parsed = {"score": None, "passed": None, "notes": str(parsed)[:240], "risks": ["unexpected_review_shape"]}
    parsed.setdefault("risks", [])
    return parsed


def _extract_json_text(content: str) -> str:
    stripped = content.strip()
    if stripped.startswith("```"):
        lines = stripped.splitlines()
        if lines and lines[0].startswith("```"):
            lines = lines[1:]
        if lines and lines[-1].strip().startswith("```"):
            lines = lines[:-1]
        stripped = "\n".join(lines).strip()
    if "{" in stripped and "}" in stripped:
        start = stripped.find("{")
        end = stripped.rfind("}") + 1
        return stripped[start:end]
    return stripped


def _normalize_photo_analysis(parsed: dict[str, Any]) -> dict[str, Any]:
    confidence = parsed.get("confidence", 0.5)
    try:
        confidence = float(confidence)
    except (TypeError, ValueError):
        confidence = 0.5
    marks = parsed.get("distinctive_marks", [])
    if not isinstance(marks, list):
        marks = [str(marks)]
    return {
        "species": str(parsed.get("species", "pet"))[:40],
        "base_color": str(parsed.get("base_color", "gray"))[:40],
        "accent_color": str(parsed.get("accent_color", "white"))[:40],
        "eye_color": str(parsed.get("eye_color", "green"))[:40],
        "body_shape": str(parsed.get("body_shape", "rounded"))[:80],
        "distinctive_marks": [str(mark)[:80] for mark in marks[:6]],
        "confidence": max(0.0, min(1.0, confidence)),
        "safe_for_generation": bool(parsed.get("safe_for_generation", True)),
        "user_visible_summary": str(parsed.get("user_visible_summary", "已提取适合生成低打扰桌宠的视觉特征。"))[:220],
    }