zh-plus
diff --git a/‎.github/workflows/ci.yml
+14-33 b/‎.github/workflows/ci.yml
+14-33
diff --git a/‎README.md
+24 b/‎README.md
+24
diff --git a/‎openlrc/agents.py
+10-7 b/‎openlrc/agents.py
+10-7
diff --git a/‎openlrc/chatbot.py
+73-43 b/‎openlrc/chatbot.py
+73-43
diff --git a/‎openlrc/context.py
+3-2 b/‎openlrc/context.py
+3-2
diff --git a/‎openlrc/evaluate.py
+2-2 b/‎openlrc/evaluate.py
+2-2
@@ -23,42 +23,23 @@ jobs:
 
     runs-on: ${{ matrix.os }}
     steps:
-      - name: Delete huge unnecessary tools folder
-        run: rm -rf /opt/hostedtoolcache
-
       - uses: actions/checkout@v4
 
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v4
+      - name: Install the latest version of uv and set the python version
+        uses: astral-sh/setup-uv@v6
         with:
           python-version: ${{ matrix.python-version }}
-
-      - name: Cache Poetry dependencies
-        uses: actions/cache@v3
-        with:
-          path: |
-            ~/.cache/pypoetry
-            ~/.venv
-          key: ${{ runner.os }}-poetry-${{ matrix.python-version }}-${{ hashFiles('**/poetry.lock') }}
-          restore-keys: |
-            ${{ runner.os }}-poetry-${{ matrix.python-version }}-
-            ${{ runner.os }}-poetry-
-            ${{ runner.os }}-
-
-      - name: Install Poetry
-        uses: snok/install-poetry@v1
-        with:
-          version: 1.8.4
-      - name: Install dependencies
-        run: |
-          poetry lock --no-update
-          poetry install
-          poetry run pip uninstall -y faster-whisper
-          poetry run pip install "faster-whisper @ https://github.com/SYSTRAN/faster-whisper/archive/8327d8cc647266ed66f6cd878cf97eccface7351.tar.gz"
-      - name: Install extra dependencies
-        run: |
-          poetry run pip install --force-reinstall torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
-          poetry run pip install -U typing-extensions
+          activate-environment: true
+      #      - name: Install dependencies
+      #        run: |
+      #          poetry lock --no-update
+      #          poetry install
+      #          poetry run pip uninstall -y faster-whisper
+      #          poetry run pip install "faster-whisper @ https://github.com/SYSTRAN/faster-whisper/archive/8327d8cc647266ed66f6cd878cf97eccface7351.tar.gz"
+      #      - name: Install extra dependencies
+      #        run: |
+      #          poetry run pip install --force-reinstall torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
+      #          poetry run pip install -U typing-extensions
 
       - name: Install ffmpeg
         uses: FedericoCarboni/setup-ffmpeg@v2
@@ -76,4 +57,4 @@ jobs:
       - name: Test with unittest
         working-directory: ./tests
         run: |
-          poetry run python -m unittest discover -s . -p 'test_*.py'
+          uv run python -m unittest discover -s . -p 'test_*.py'
@@ -263,6 +263,30 @@ To maintain context between translation segments, the process is sequential for
 
 [//]: # (## Comparison to https://microsoft.github.io/autogen/docs/notebooks/agentchat_video_transcript_translate_with_whisper/)
 
+## Development Guide
+
+I'm using [uv](https://github.com/astral-sh/uv) for package management.
+Install uv with our standalone installers:
+
+#### On macOS and Linux.
+
+```shell
+curl -LsSf https://astral.sh/uv/install.sh | sh
+```
+
+#### On Windows.
+
+```shell
+powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex"
+```
+
+### Install deps
+
+```shell
+uv venv
+uv sync
+```
+
 ## Todo
 
 - [x] [Efficiency] Batched translate/polish for GPT request (enable contextual ability).
 
@@ -43,7 +43,7 @@ def _initialize_chatbot(self, chatbot_model: Union[str, ModelConfig], fee_limit:
         if isinstance(chatbot_model, str):
             chatbot_cls: Union[Type[ClaudeBot], Type[GPTBot], Type[GeminiBot]]
             chatbot_cls, model_name = route_chatbot(chatbot_model)
-            return chatbot_cls(model_name=model_name, fee_limit=fee_limit, proxy=proxy, retry=2,
+            return chatbot_cls(model_name=model_name, fee_limit=fee_limit, proxy=proxy, retry=4,
                                temperature=self.TEMPERATURE, base_url_config=base_url_config)
         elif isinstance(chatbot_model, ModelConfig):
             chatbot_cls = provider2chatbot[chatbot_model.provider]
@@ -58,9 +58,11 @@ def _initialize_chatbot(self, chatbot_model: Union[str, ModelConfig], fee_limit:
                     base_url_config = None
                     logger.warning(f'Unsupported base_url configuration for provider: {chatbot_model.provider}')
 
-            return chatbot_cls(model_name=chatbot_model.name, fee_limit=fee_limit, proxy=proxy, retry=2,
+            return chatbot_cls(model_name=chatbot_model.name, fee_limit=fee_limit, proxy=proxy, retry=4,
                                temperature=self.TEMPERATURE, base_url_config=base_url_config,
                                api_key=chatbot_model.api_key)
+        else:
+            raise ValueError(f'Invalid chatbot model type: {type(chatbot_model)}. Expected str or ModelConfig.')
 
 
 class ChunkedTranslatorAgent(Agent):
@@ -76,7 +78,7 @@ class ChunkedTranslatorAgent(Agent):
     TEMPERATURE = 1.0
 
     def __init__(self, src_lang, target_lang, info: TranslateInfo = TranslateInfo(),
-                 chatbot_model: Union[str, ModelConfig] = 'gpt-4o-mini', fee_limit: float = 0.8, proxy: str = None,
+                 chatbot_model: Union[str, ModelConfig] = 'gpt-4.1-nano', fee_limit: float = 0.8, proxy: str = None,
                  base_url_config: Optional[dict] = None):
         """
         Initialize the ChunkedTranslatorAgent.
@@ -190,7 +192,8 @@ def translate_chunk(self, chunk_id: int, chunk: List[Tuple[int, str]],
         guideline = context.guideline if use_glossary else context.non_glossary_guideline
         messages_list = [
             {'role': 'system', 'content': self.prompter.system()},
-            {'role': 'user', 'content': self.prompter.user(chunk_id, user_input, context.summary, guideline)},
+            {'role': 'user',
+             'content': self.prompter.user(chunk_id, user_input, context.previous_summaries, guideline)},
         ]
         resp = self.chatbot.message(messages_list, output_checker=self.prompter.check_format)[0]
         translations, summary, scene = self._parse_responses(resp)
@@ -213,7 +216,7 @@ class ContextReviewerAgent(Agent):
     TEMPERATURE = 0.6
 
     def __init__(self, src_lang, target_lang, info: TranslateInfo = TranslateInfo(),
-                 chatbot_model: Union[str, ModelConfig] = 'gpt-4o-mini',
+                 chatbot_model: Union[str, ModelConfig] = 'gpt-4.1-nano',
                  retry_model: Optional[Union[str, ModelConfig]] = None, fee_limit: float = 0.8, proxy: str = None,
                  base_url_config: Optional[dict] = None):
         """
@@ -359,7 +362,7 @@ class ProofreaderAgent(Agent):
     TEMPERATURE = 0.8
 
     def __init__(self, src_lang, target_lang, info: TranslateInfo = TranslateInfo(),
-                 chatbot_model: Union[str, ModelConfig] = 'gpt-4o-mini', fee_limit: float = 0.8, proxy: str = None,
+                 chatbot_model: Union[str, ModelConfig] = 'gpt-4.1-nano', fee_limit: float = 0.8, proxy: str = None,
                  base_url_config: Optional[dict] = None):
         """
         Initialize the ProofreaderAgent.
@@ -432,7 +435,7 @@ class TranslationEvaluatorAgent(Agent):
 
     TEMPERATURE = 0.95
 
-    def __init__(self, chatbot_model: Union[str, ModelConfig] = 'gpt-4o-mini', fee_limit: float = 0.8,
+    def __init__(self, chatbot_model: Union[str, ModelConfig] = 'gpt-4.1-nano', fee_limit: float = 0.8,
                  proxy: str = None,
                  base_url_config: Optional[dict] = None):
         """
 
@@ -2,6 +2,7 @@
 #  All rights reserved.
 
 import asyncio
+import json
 import os
 import random
 import re
@@ -10,15 +11,14 @@
 from typing import List, Union, Dict, Callable, Optional
 
 import anthropic
-import google.generativeai as genai
 import httpx
 import openai
 from anthropic import AsyncAnthropic
 from anthropic._types import NOT_GIVEN
 from anthropic.types import Message
-from google.generativeai import GenerationConfig
-from google.generativeai.types import AsyncGenerateContentResponse, GenerateContentResponse, HarmCategory, \
-    HarmBlockThreshold
+from google import genai
+from google.genai import types
+from google.genai.types import HarmCategory, HarmBlockThreshold
 from openai import AsyncClient as AsyncGPTClient
 from openai.types.chat import ChatCompletion
 
@@ -57,10 +57,7 @@ def route_chatbot(model: str) -> (type, str):
         chatbot_type, chatbot_model = re.match(r'(.+):(.+)', model).groups()
         chatbot_type, chatbot_model = chatbot_type.strip().lower(), chatbot_model.strip()
 
-        try:
-            Models.get_model(chatbot_model)
-        except ValueError:
-            raise ValueError(f'Invalid model {chatbot_model}.')
+        Models.get_model(chatbot_model)
 
         if chatbot_type == 'openai':
             return GPTBot, chatbot_model
@@ -174,7 +171,7 @@ def __str__(self):
 
 @_register_chatbot
 class GPTBot(ChatBot):
-    def __init__(self, model_name='gpt-4o-mini', temperature=1, top_p=1, retry=8, max_async=16, json_mode=False,
+    def __init__(self, model_name='gpt-4.1-nano', temperature=1, top_p=1, retry=8, max_async=16, json_mode=False,
                  fee_limit=0.05, proxy=None, base_url_config=None, api_key=None):
 
         # clamp temperature to 0-2
@@ -235,7 +232,8 @@ async def _create_achat(self, messages: List[Dict], stop_sequences: Optional[Lis
                     continue
 
                 break
-            except (openai.RateLimitError, openai.APITimeoutError, openai.APIConnectionError, openai.APIError) as e:
+            except (openai.RateLimitError, openai.APITimeoutError, openai.APIConnectionError, openai.APIError,
+                    json.decoder.JSONDecodeError) as e:
                 sleep_time = self._get_sleep_time(e)
                 logger.warning(f'{type(e).__name__}: {e}. Wait {sleep_time}s before retry. Retry num: {i + 1}.')
                 time.sleep(sleep_time)
@@ -251,6 +249,8 @@ def _get_sleep_time(error):
             return random.randint(30, 60)
         elif isinstance(error, openai.APITimeoutError):
             return 3
+        elif isinstance(error, json.decoder.JSONDecodeError):
+            return 1
         else:
             return 15
 
@@ -341,34 +341,54 @@ def _get_sleep_time(self, error):
 
 @_register_chatbot
 class GeminiBot(ChatBot):
-    def __init__(self, model_name='gemini-2.0-flash-exp', temperature=1, top_p=1, retry=8, max_async=16, fee_limit=0.8,
+    def __init__(self, model_name='gemini-2.5-flash-preview-04-17', temperature=1, top_p=1, retry=8, max_async=16,
+                 fee_limit=0.8,
                  proxy=None, base_url_config=None, api_key=None):
         self.temperature = max(0, min(1, temperature))
 
         super().__init__(model_name, temperature, top_p, retry, max_async, fee_limit)
 
         self.model_name = model_name
 
-        genai.configure(api_key=api_key or os.environ['GOOGLE_API_KEY'])
-        self.config = GenerationConfig(temperature=self.temperature, top_p=self.top_p)
+        # genai.configure(api_key=api_key or os.environ['GOOGLE_API_KEY'])
+        self.client = genai.Client(
+            api_key=api_key or os.environ['GOOGLE_API_KEY']
+        )
+
         # Should not block any translation-related content.
-        self.safety_settings = {
-            HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
-            HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
-            HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
-            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE
-        }
+        # self.safety_settings = {
+        #     HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
+        #     HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
+        #     HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
+        #     HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE
+        # }
+        self.safety_settings = [
+            types.SafetySetting(
+                category=HarmCategory.HARM_CATEGORY_HATE_SPEECH, threshold=HarmBlockThreshold.BLOCK_NONE
+            ),
+            types.SafetySetting(
+                category=HarmCategory.HARM_CATEGORY_HARASSMENT, threshold=HarmBlockThreshold.BLOCK_NONE
+            ),
+            types.SafetySetting(
+                category=HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT, threshold=HarmBlockThreshold.BLOCK_NONE
+            ),
+            types.SafetySetting(
+                category=HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT, threshold=HarmBlockThreshold.BLOCK_NONE
+            )
+        ]
+        self.config = types.GenerateContentConfig(temperature=self.temperature, top_p=self.top_p,
+                                                  safety_settings=self.safety_settings)
 
         if proxy:
             logger.warning('Google Gemini SDK does not support proxy, try using the system-level proxy if needed.')
 
         if base_url_config:
             logger.warning('Google Gemini SDK does not support changing base_url.')
 
-    def update_fee(self, response: Union[GenerateContentResponse, AsyncGenerateContentResponse]):
+    def update_fee(self, response: types.GenerateContentResponse):
         model_info = self.model_info
         prompt_tokens = response.usage_metadata.prompt_token_count
-        completion_tokens = response.usage_metadata.candidates_token_count
+        completion_tokens = response.usage_metadata.candidates_token_count or 0
 
         self.api_fees[-1] += (prompt_tokens * model_info.input_price +
                               completion_tokens * model_info.output_price) / 1000000
@@ -401,31 +421,41 @@ async def _create_achat(self, messages: List[Dict], stop_sequences: Optional[Lis
             history_messages[i]['parts'] = [{'text': content}]
 
         self.config.stop_sequences = stop_sequences
-        generative_model = genai.GenerativeModel(model_name=self.model_name, generation_config=self.config,
-                                                 safety_settings=self.safety_settings, system_instruction=system_msg)
-        client = genai.ChatSession(generative_model, history=history_messages)
+        # generative_model = genai.GenerativeModel(model_name=self.model_name, generation_config=self.config,
+        #                                          safety_settings=self.safety_settings, system_instruction=system_msg)
+        # client = genai.ChatSession(generative_model, history=history_messages)
+        self.config.system_instruction = system_msg
 
         response = None
         for i in range(self.retry):
-            try:
-                # send_message_async is buggy, so we use send_message instead as a workaround
-                response = client.send_message(user_msg, safety_settings=self.safety_settings)
-                self.update_fee(response)
-                if not output_checker(user_msg, response.text):
-                    logger.warning(f'Invalid response format. Retry num: {i + 1}.')
-                    continue
-
-                if not response._done:
-                    logger.warning(f'Failed to get a complete response. Retry num: {i + 1}.')
-                    continue
-
-                break
-            except (genai.types.BrokenResponseError, genai.types.IncompleteIterationError,
-                    genai.types.StopCandidateException) as e:
-                logger.warning(f'{type(e).__name__}: {e}. Retry num: {i + 1}.')
-            except genai.types.generation_types.BlockedPromptException as e:
-                logger.warning(f'Prompt blocked: {e}.\n Retry in 30s.')
-                time.sleep(30)
+            # try:
+            # send_message_async is buggy, so we use send_message instead as a workaround
+            # response = client.send_message(user_msg, safety_settings=self.safety_settings)
+            response = await self.client.aio.models.generate_content(
+                model=self.model_name,
+                contents=user_msg,
+                config=self.config,
+            )
+            self.update_fee(response)
+            if not response.text:
+                logger.warning(f'Get None response. Wait 15s. Retry num: {i + 1}.')
+                time.sleep(15)
+                continue
+
+            if not output_checker(user_msg, response.text):
+                logger.warning(f'Invalid response format. Retry num: {i + 1}.')
+                continue
+
+            if not response:
+                logger.warning(f'Failed to get a complete response. Retry num: {i + 1}.')
+                continue
+
+            break
+            # except Exception as e:
+            #     logger.warning(f'{type(e).__name__}: {e}. Retry num: {i + 1}.')
+            #     time.sleep(3)
+            # except genai.types.generation_types.BlockedPromptException as e:
+            #     logger.warning(f'Prompt blocked: {e}.\n Retry in 30s.')
 
         if not response:
             raise ChatBotException('Failed to create a chat.')
 
@@ -1,14 +1,15 @@
-#  Copyright (C) 2024. Hao Zheng
+#  Copyright (C) 2025. Hao Zheng
 #  All rights reserved.
 import re
-from typing import Optional, Union
+from typing import Optional, Union, List
 
 from pydantic import BaseModel
 
 from openlrc import ModelConfig
 
 
 class TranslationContext(BaseModel):
+    previous_summaries: Optional[List[str]] = None
     summary: Optional[str] = ''
     scene: Optional[str] = ''
     model: Optional[Union[str, ModelConfig]] = None
 
@@ -1,4 +1,4 @@
-#  Copyright (C) 2024. Hao Zheng
+#  Copyright (C) 2025. Hao Zheng
 #  All rights reserved.
 import abc
 from typing import Union
@@ -26,7 +26,7 @@ class LLMTranslationEvaluator(TranslationEvaluator):
     Evaluate the translated texts using large language models.
     """
 
-    def __init__(self, chatbot_model: Union[str, ModelConfig] = 'gpt-4o-mini'):
+    def __init__(self, chatbot_model: Union[str, ModelConfig] = 'gpt-4.1-nano'):
         self.agenet = TranslationEvaluatorAgent(chatbot_model=chatbot_model)
 
     def evaluate(self, src_texts, target_texts, src_lang=None, target_lang=None):