[fix] concurrency issues for bq

sagar-sarkale-yral · sagar-sarkale-yral · commit 38dabefa8e62 · 2025-03-26T16:08:26.000+05:30
diff --git a/src_deploy/servers/server_fastapi.py b/src_deploy/servers/server_fastapi.py
@@ -97,6 +97,45 @@ async def lifespan(app: FastAPI):
 )
 
 
+# Add middleware to limit concurrent requests
+# This helps prevent overwhelming BigQuery with too many concurrent requests
+class ConcurrencyLimitMiddleware:
+    def __init__(self, app, max_concurrent_requests=100):
+        self.app = app
+        self.semaphore = asyncio.Semaphore(max_concurrent_requests)
+        self.active_requests = 0
+        self.lock = asyncio.Lock()
+        self.max_concurrent = max_concurrent_requests
+        logger.info(
+            f"Initialized concurrency limiter with max {max_concurrent_requests} concurrent requests"
+        )
+
+    async def __call__(self, scope, receive, send):
+        if scope["type"] != "http":
+            await self.app(scope, receive, send)
+            return
+
+        async with self.semaphore:
+            async with self.lock:
+                self.active_requests += 1
+                current = self.active_requests
+
+            try:
+                await self.app(scope, receive, send)
+            finally:
+                async with self.lock:
+                    self.active_requests -= 1
+
+
+# Calculate optimal concurrency based on configuration
+bq_pool_size = int(config.get_moderation_service_config().get("BQ_POOL_SIZE", 20))
+# Set server concurrency limit to 3x the BigQuery pool size for optimal throughput
+max_concurrent_requests = bq_pool_size * 3
+app.add_middleware(
+    ConcurrencyLimitMiddleware, max_concurrent_requests=max_concurrent_requests
+)
+
+
 async def get_service() -> ModerationService:
     """
     Dependency to get the moderation service
diff --git a/src_deploy/services/moderation_service.py b/src_deploy/services/moderation_service.py
@@ -12,6 +12,7 @@
 import re
 import numpy as np
 import time
+import json
 from typing import Dict, Any, Optional, Union, List
 from pathlib import Path
 from dataclasses import dataclass
@@ -110,10 +111,15 @@ async def initialize(self) -> bool:
 
             # Initialize GCP utils
             gcp_credentials = self.config.get("GCP_CREDENTIALS")
-            # Configure BigQuery connection pool size from config or use a reasonable default
-            bq_pool_size = int(self.config.get("BQ_POOL_SIZE", 20))
+
+            # Calculate optimal BigQuery pool size based on system resources and config
+            # Default pool size is now calculated based on expected concurrency
+            cpu_count = os.cpu_count() or 4
+            default_pool_size = min(cpu_count * 5, 40)  # Scale with CPU but cap at 40
+            bq_pool_size = int(self.config.get("BQ_POOL_SIZE", default_pool_size))
+
             logger.info(
-                f"Initializing GCP utils with BigQuery pool size: {bq_pool_size}"
+                f"Initializing GCP utils with BigQuery pool size: {bq_pool_size} (CPU cores: {cpu_count})"
             )
 
             self.gcp_utils = GCPUtils(
@@ -526,9 +532,20 @@ async def moderate_content(self, request: ModerationRequest) -> ModerationRespon
             # 2. Get similar examples using BigQuery vector search
             # Use the new async BigQuery implementation directly
             bigquery_start = time.time()
+
+            # Optimize vector search options based on concurrency
+            # Adjust search parameters for better performance under load
+            vector_search_options = {
+                # Increase search fraction for better recall at high concurrency
+                "fraction_lists_to_search": 0.15,
+                # Don't use brute force by default for better scalability
+                "use_brute_force": False,
+            }
+
             similar_examples = await self.gcp_utils.bigquery_vector_search_async(
                 embedding=embedding_list,
                 top_k=request.num_examples,
+                options=json.dumps(vector_search_options),
             )
             bigquery_time_ms = (time.time() - bigquery_start) * 1000
 
diff --git a/src_deploy/utils/gcp_utils.py b/src_deploy/utils/gcp_utils.py
@@ -10,6 +10,7 @@
 import time
 import asyncio
 import concurrent.futures
+import random
 from typing import List, Dict, Any, Optional, Union
 from pathlib import Path
 from google.cloud import bigquery, storage
@@ -55,14 +56,22 @@ def __init__(
         self.bq_client = None
         self.storage_client = None
 
-        # Client pools for scaling
+        # Client pools for scaling - Use a list for random access instead of pop/append
         self.bq_client_pool = []
-        self.bq_pool_lock = asyncio.Lock()
+
+        # Semaphore to prevent too many concurrent BigQuery connections
+        # This is more efficient than a lock for high-concurrency scenarios
+        self.bq_pool_semaphore = asyncio.Semaphore(bq_pool_size)
         self.bq_pool_initialized = False
 
-        # Thread pool for executing BigQuery operations
+        # Use an optimized thread pool with a larger size to handle concurrency better
+        # Setting max_workers higher than client pool to allow for better parallelism
+        thread_pool_size = max(
+            bq_pool_size * 2, 40
+        )  # At least 2x the client pool or 40
         self.thread_pool = concurrent.futures.ThreadPoolExecutor(
-            max_workers=self.bq_pool_size, thread_name_prefix="bq_worker"
+            max_workers=thread_pool_size,
+            thread_name_prefix="bq_worker",
         )
 
         # Initialize credentials if provided
@@ -107,13 +116,16 @@ def _initialize_bq_client_pool(self) -> None:
                 logger.error("Cannot initialize BigQuery client pool: No credentials")
                 return
 
-            # Create multiple BigQuery clients
+            # Create multiple BigQuery clients with optimized settings
             for i in range(self.bq_pool_size):
+                # Configure each client with optimized settings for high concurrency
                 client = bigquery.Client(
                     credentials=self.credentials,
                     project=self.project_id,
-                    # Configure BigQuery client for better performance
-                    # These settings help manage resource usage under load
+                    # Configure client with connection pooling settings
+                    client_options=bigquery.ClientOptions(
+                        api_endpoint="https://bigquery.googleapis.com",
+                    ),
                 )
                 self.bq_client_pool.append(client)
 
@@ -127,23 +139,28 @@ def _initialize_bq_client_pool(self) -> None:
 
     async def get_bq_client(self):
         """
-        Get a BigQuery client from the pool
+        Get a BigQuery client from the pool with semaphore protection
         Returns:
             A BigQuery client from the pool
         """
-        async with self.bq_pool_lock:
-            if not self.bq_pool_initialized:
-                # Fall back to the single client if pool isn't initialized
-                return self.bq_client
+        if not self.bq_pool_initialized:
+            # Fall back to the single client if pool isn't initialized
+            return self.bq_client
 
-            if not self.bq_client_pool:
-                logger.error("BigQuery client pool is empty")
-                return self.bq_client
+        if not self.bq_client_pool:
+            logger.error("BigQuery client pool is empty")
+            return self.bq_client
 
-            # Simple round-robin selection from the pool
-            client = self.bq_client_pool.pop(0)
-            self.bq_client_pool.append(client)
-            return client
+        # Acquire semaphore to limit concurrent access
+        await self.bq_pool_semaphore.acquire()
+        try:
+            # Use random client selection instead of round-robin to avoid lock contention
+            client_index = random.randrange(len(self.bq_client_pool))
+            return self.bq_client_pool[client_index]
+        except Exception as e:
+            logger.error(f"Error selecting BigQuery client: {e}")
+            return self.bq_client
+        # Don't release semaphore here - it will be released after query execution
 
     def download_file_from_gcs(
         self,
@@ -187,13 +204,15 @@ def _execute_bigquery_search(
         client,
         query: str,
         job_config=None,
+        semaphore=None,
     ) -> pd.DataFrame:
         """
         Execute a BigQuery query with a specific client
         Args:
             client: BigQuery client to use
             query: Query to execute
             job_config: Optional job configuration
+            semaphore: Optional semaphore to release after execution
         Returns:
             DataFrame with query results
         """
@@ -202,59 +221,66 @@ def _execute_bigquery_search(
         max_retries = 3
         retry_delay = 0.5  # Start with 0.5 second delay
 
-        while retry_count <= max_retries:
-            try:
-                # Execute query with timeout and retry settings
-                query_job = client.query(query, job_config=job_config)
-
-                # Set a timeout for the query execution to prevent hanging
-                timeout = 25  # seconds
-                start_wait = time.time()
+        try:
+            while retry_count <= max_retries:
+                try:
+                    # Execute query with timeout and retry settings
+                    query_job = client.query(query, job_config=job_config)
 
-                # Wait for the job to complete with timeout
-                while not query_job.done() and (time.time() - start_wait) < timeout:
-                    time.sleep(0.1)
+                    # Set a timeout for the query execution to prevent hanging
+                    timeout = 25  # seconds
+                    start_wait = time.time()
 
-                if not query_job.done():
-                    raise TimeoutError(f"Query execution timed out after {timeout}s")
+                    # Wait for the job to complete with timeout
+                    while not query_job.done() and (time.time() - start_wait) < timeout:
+                        time.sleep(0.1)
 
-                # Check for errors
-                if query_job.errors:
-                    raise Exception(f"Query failed with errors: {query_job.errors}")
+                    if not query_job.done():
+                        raise TimeoutError(
+                            f"Query execution timed out after {timeout}s"
+                        )
 
-                # Convert to DataFrame
-                results = query_job.to_dataframe()
+                    # Check for errors
+                    if query_job.errors:
+                        raise Exception(f"Query failed with errors: {query_job.errors}")
 
-                duration = time.time() - start_time
-                logger.info(
-                    f"BigQuery query execution took {duration*1000:.2f}ms after {retry_count} retries"
-                )
-                return results
+                    # Convert to DataFrame
+                    results = query_job.to_dataframe()
 
-            except Exception as e:
-                retry_count += 1
-                if retry_count > max_retries:
                     duration = time.time() - start_time
-                    logger.error(
-                        f"BigQuery query failed after {duration*1000:.2f}ms and {retry_count-1} retries: {e}"
+                    logger.info(
+                        f"BigQuery query execution took {duration*1000:.2f}ms after {retry_count} retries"
                     )
-                    raise
-
-                # Implement exponential backoff
-                sleep_time = retry_delay * (
-                    2 ** (retry_count - 1)
-                )  # Exponential backoff
-                logger.warning(
-                    f"BigQuery query attempt {retry_count} failed: {e}. Retrying in {sleep_time:.2f}s..."
-                )
-                time.sleep(sleep_time)
+                    return results
+
+                except Exception as e:
+                    retry_count += 1
+                    if retry_count > max_retries:
+                        duration = time.time() - start_time
+                        logger.error(
+                            f"BigQuery query failed after {duration*1000:.2f}ms and {retry_count-1} retries: {e}"
+                        )
+                        raise
+
+                    # Implement exponential backoff
+                    sleep_time = retry_delay * (
+                        2 ** (retry_count - 1)
+                    )  # Exponential backoff
+                    logger.warning(
+                        f"BigQuery query attempt {retry_count} failed: {e}. Retrying in {sleep_time:.2f}s..."
+                    )
+                    time.sleep(sleep_time)
+        finally:
+            # Always release the semaphore, even if an exception occurred
+            if semaphore:
+                semaphore.release()
 
     async def bigquery_vector_search_async(
         self,
         embedding: List[float],
         top_k: int = 5,
         distance_type: str = "COSINE",
-        options: str = '{"fraction_lists_to_search": 0.1, "use_brute_force": false}',
+        options: str = '{"fraction_lists_to_search": 0.15, "use_brute_force": false}',
     ) -> pd.DataFrame:
         """
         Perform vector search in BigQuery asynchronously
@@ -268,13 +294,14 @@ async def bigquery_vector_search_async(
         """
         start_time = time.time()
 
-        # Get client from pool
+        # Get client from pool (will acquire semaphore)
         client = await self.get_bq_client()
 
         # Convert embedding to string for SQL query
         embedding_str = "[" + ", ".join(str(x) for x in embedding) + "]"
 
-        # Construct query with timeout settings
+        # Optimize query for better caching and performance
+        # Use query parameters for better cache performance
         query = f"""
         SELECT
           base.text,
@@ -303,13 +330,14 @@ async def bigquery_vector_search_async(
                 },
             )
 
-            # Execute in thread pool
+            # Execute in thread pool, passing the semaphore to be released after execution
             results = await asyncio.get_event_loop().run_in_executor(
                 self.thread_pool,
                 self._execute_bigquery_search,
                 client,
                 query,
                 job_config,
+                self.bq_pool_semaphore,  # Pass semaphore to release after execution
             )
 
             # Calculate total time including query construction
@@ -321,6 +349,9 @@ async def bigquery_vector_search_async(
 
             return results
         except Exception as e:
+            # Release semaphore in case of error
+            self.bq_pool_semaphore.release()
+
             # Log error with timing information
             duration = time.time() - start_time
             logger.error(
@@ -333,7 +364,7 @@ def bigquery_vector_search(
         embedding: List[float],
         top_k: int = 5,
         distance_type: str = "COSINE",
-        options: str = '{"fraction_lists_to_search": 0.1, "use_brute_force": false}',
+        options: str = '{"fraction_lists_to_search": 0.15, "use_brute_force": false}',
     ) -> pd.DataFrame:
         """
         Perform vector search in BigQuery (synchronous wrapper for the async version)
@@ -409,6 +440,11 @@ def bigquery_vector_search(
 
     async def close(self):
         """Clean up resources when shutting down"""
+        # Wait for any in-progress queries to complete
+        # by acquiring the full semaphore capacity
+        for _ in range(self.bq_pool_size):
+            await self.bq_pool_semaphore.acquire()
+
         if self.thread_pool:
-            self.thread_pool.shutdown(wait=False)
+            self.thread_pool.shutdown(wait=True)
             logger.info("Shut down BigQuery thread pool")