Fix: Expending the cache cleanup triggers by adding DROP & ROLLBACK into the list of triggering commands

timm4205 · bsharifi · commit ea7df8aec96a · 2025-05-06T09:34:34.000-07:00
diff --git a/redshift_connector/config.py b/redshift_connector/config.py
@@ -28,6 +28,7 @@ def get_name(cls, i: int) -> str:
 
 
 DEFAULT_PROTOCOL_VERSION: int = ClientProtocolVersion.BINARY.value
+DEFAULT_MAX_PREPARED_STATEMENTS: int = 1000
 
 
 class DbApiParamstyle(Enum):
diff --git a/redshift_connector/core.py b/redshift_connector/core.py
@@ -19,6 +19,7 @@
 
 from redshift_connector.config import (
     DEFAULT_PROTOCOL_VERSION,
+    DEFAULT_MAX_PREPARED_STATEMENTS,
     ClientProtocolVersion,
     DbApiParamstyle,
     _client_encoding,
@@ -421,7 +422,7 @@ def __init__(
         ssl: bool = True,
         sslmode: str = "verify-ca",
         timeout: typing.Optional[int] = None,
-        max_prepared_statements: int = 1000,
+        max_prepared_statements: int = DEFAULT_MAX_PREPARED_STATEMENTS,
         tcp_keepalive: typing.Optional[bool] = True,
         application_name: typing.Optional[str] = None,
         replication: typing.Optional[str] = None,
@@ -500,7 +501,7 @@ def __init__(
         self.notifications: deque = deque(maxlen=100)
         self.notices: deque = deque(maxlen=100)
         self.parameter_statuses: deque = deque(maxlen=100)
-        self.max_prepared_statements: int = int(max_prepared_statements)
+        self.max_prepared_statements: int = int(self.get_max_prepared_statement(max_prepared_statements))
         self._run_cursor: Cursor = Cursor(self, paramstyle=DbApiParamstyle.NAMED.value)
         self._client_protocol_version: int = client_protocol_version
         self._database = database
@@ -1845,7 +1846,8 @@ def execute(self: "Connection", cursor: Cursor, operation: str, vals) -> None:
             # consist of "redshift_connector", statement, process id and statement number.
             # e.g redshift_connector_statement_11432_2
             statement_name: str = "_".join(("redshift_connector", "statement", str(pid), str(statement_num)))
-            statement_name_bin: bytes = statement_name.encode("ascii") + NULL_BYTE
+            statement_name_bin: bytes = self.get_statement_name_bin(statement_name)
+
             # row_desc: list that used to store metadata of rows from DB
             # param_funcs: type transform function
             ps = {
@@ -1942,12 +1944,12 @@ def execute(self: "Connection", cursor: Cursor, operation: str, vals) -> None:
 
             ps["bind_2"] = h_pack(len(output_fc)) + pack("!" + "h" * len(output_fc), *output_fc)
 
-            if len(cache["ps"]) > self.max_prepared_statements:
+            if len(cache["ps"]) >= self.max_prepared_statements:
                 for p in cache["ps"].values():
                     self.close_prepared_statement(p["statement_name_bin"])
                 cache["ps"].clear()
-
-            cache["ps"][key] = ps
+            if self.max_prepared_statements > 0:
+                cache["ps"][key] = ps
 
         cursor._cached_rows.clear()
         cursor._row_count = -1
@@ -2118,7 +2120,7 @@ def handle_COMMAND_COMPLETE(self: "Connection", data: bytes, cursor: Cursor) ->
             # cursor object
             cursor._redshift_row_count = len(cursor._cached_rows)
 
-        if command in (b"ALTER", b"CREATE"):
+        if command in (b"ALTER", b"CREATE", b"DROP", b"ROLLBACK"):
             for scache in self._caches.values():
                 for pcache in scache.values():
                     for ps in pcache["ps"].values():
@@ -2638,3 +2640,14 @@ def set_idc_plugins_params(
 
         if idc_client_display_name:
             init_params["idc_client_display_name"] = idc_client_display_name
+
+    def get_statement_name_bin(self, statement_name: str) -> bytes:
+        # When max_prepared_statements is 0, we use an empty statement name. This creates an unnamed
+        # prepared statement that lasts only until the next Parse statement, avoiding "statement already exists" errors
+        return ("" if self.max_prepared_statements == 0 else statement_name).encode("ascii") + NULL_BYTE
+
+    def get_max_prepared_statement(self, max_prepared_statements: int) -> int:
+        if max_prepared_statements < 0:
+            _logger.error("Parameter max_prepared_statements must >= 0. Using default value %d", DEFAULT_MAX_PREPARED_STATEMENTS)
+            return DEFAULT_MAX_PREPARED_STATEMENTS
+        return max_prepared_statements
diff --git a/test/integration/test_query.py b/test/integration/test_query.py
@@ -370,19 +370,144 @@ def test_merge_read(con) -> None:
 
 
 def test_handle_COMMAND_COMPLETE_closed_ps(con, mocker) -> None:
+    """
+    Test the handling of prepared statement cache cleanup for different SQL commands.
+    This test verifies that DDL commands trigger cache cleanup while DML commands preserve the cache.
+
+    The test executes the following sequence:
+    1. DROP TABLE IF EXISTS t1 (should clear cache)
+    2. CREATE TABLE t1 (should clear cache)
+    3. ALTER TABLE t1 (should clear cache)
+    4. INSERT INTO t1 (should preserve cache)
+    5. SELECT FROM t1 (should preserve cache)
+    6. ROLLBACK (should clear cache)
+    7. CREATE TABLE AS SELECT (should preserve cache)
+    8. SELECT FROM t1 (should preserve cache)
+    9. DROP TABLE IF EXISTS t1 (should clear cache)
+
+    Args:
+        con: Database connection fixture
+        mocker: pytest-mock fixture for creating spies
+    """
     with con.cursor() as cursor:
+        # Create spy to track calls to close_prepared_statement
+        spy = mocker.spy(con, "close_prepared_statement")
+
         cursor.execute("drop table if exists t1")
+        assert spy.called
+        # Two calls expected: one for BEGIN transaction, one for DROP TABLE
+        assert spy.call_count == 2
+        spy.reset_mock()
 
-        spy = mocker.spy(con, "close_prepared_statement")
         cursor.execute("create table t1 (a int primary key)")
+        assert spy.called
+        # One call expected for CREATE TABLE
+        assert spy.call_count == 1
+        spy.reset_mock()
 
-        assert len(con._caches) == 1
-        cache_iter = next(iter(con._caches.values()))  # get first transaction
-        assert len(next(iter(cache_iter.values()))["statement"]) == 3  # should be 3 ps in this transaction
-        # begin transaction, drop table t1, create table t1
+        cursor.execute("alter table t1 rename column a to b;")
         assert spy.called
+        # One call expected for ALTER TABLE
+        assert spy.call_count == 1
+        spy.reset_mock()
+
+        cursor.execute("insert into t1 values(1)")
+        assert spy.call_count == 0
+        spy.reset_mock()
+
+        cursor.execute("select * from t1")
+        assert spy.call_count == 0
+        spy.reset_mock()
+
+        cursor.execute("rollback")
+        assert spy.called
+        # Three calls expected: INSERT, SELECT, and ROLLBACK statements
         assert spy.call_count == 3
+        spy.reset_mock()
+
+        cursor.execute("create table t1 as (select 1)")
+        assert spy.call_count == 0
+        spy.reset_mock()
+
+        cursor.execute("select * from t1")
+        assert spy.call_count == 0
+        spy.reset_mock()
+
+        cursor.execute("drop table if exists t1")
+        assert spy.called
+        # Four calls expected: BEGIN, CREATE TABLE AS, SELECT, and DROP
+        assert spy.call_count == 4
+        spy.reset_mock()
+
+        # Ensure there's exactly one process in the cache
+        assert len(con._caches) == 1
+        # get cache for current process
+        cache_iter = next(iter(con._caches.values()))
+
+        # Verify the number of prepared statements in this transaction
+        # Should be 7 statements total from all operations
+        assert len(next(iter(cache_iter.values()))["statement"]) == 8  # should be 8 ps in this process
+
+@pytest.mark.parametrize("test_case", [
+    {
+        "name": "max_prepared_statements_zero",
+        "max_prepared_statements": 0,
+        "queries": ["SELECT 1", "SELECT 2"],
+        "expected_close_calls": 0,
+        "expected_cache_size": 0
+    },
+    {
+        "name": "max_prepared_statements_default",
+        "max_prepared_statements": 1000,
+        "queries": ["SELECT 1", "SELECT 2"],
+        "expected_close_calls": 0,
+        "expected_cache_size": 3
+    },
+    {
+        "name": "max_prepared_statements_limit_1",
+        "max_prepared_statements": 2,
+        "queries": ["SELECT 1", "SELECT 2", "SELECT 3"],
+        "expected_close_calls": 2,
+        "expected_cache_size": 2
+    },
+{
+        "name": "max_prepared_statements_limit_2",
+        "max_prepared_statements": 2,
+        "queries": ["SELECT 1", "SELECT 2"],
+        "expected_close_calls": 2,
+        "expected_cache_size": 1
+    }
+])
+def test_max_prepared_statement(con, mocker, test_case) -> None:
+    """
+    Test the prepared statement cache management functionality.
+    This test verifies the behavior of the cache cleanup mechanism when:
+    1. max_prepared_statements = 0: No statement will be cached
+    2. max_prepared_statements > 0: Statements are cached up to the limit
+
+    :param con: Connection object
+    :param mocker: pytest mocker fixture
+    :param test_case: Dictionary containing test parameters:
+    :return: None
+    """
+    con.max_prepared_statements = test_case["max_prepared_statements"]
+    with con.cursor() as cursor:
+        # Create spy to track calls to close_prepared_statement
+        spy = mocker.spy(con, "close_prepared_statement")
+
+        for query in test_case["queries"]:
+            cursor.execute(query)
+
+        # Ensure there's exactly one process in the cache
+        assert len(con._caches) == 1
+        # Get cache for current process
+        cache_iter = next(iter(con._caches.values()))
+
+        # Verify close_prepared_statement was called the expected number of times
+        assert spy.call_count == test_case["expected_close_calls"]
 
+        # Verify the final cache size matches expected size
+        assert len(next(iter(cache_iter.values()))["ps"]) == test_case["expected_cache_size"]
 
 @pytest.mark.parametrize("_input", ["NO_SCHEMA_UNIVERSAL_QUERY", "EXTERNAL_SCHEMA_QUERY", "LOCAL_SCHEMA_QUERY"])
 def test___get_table_filter_clause_return_empty_result(con, _input) -> None:
diff --git a/test/unit/test_core.py b/test/unit/test_core.py

Original file line number	Diff line number	Diff line change
`@@ -28,6 +28,7 @@ def get_name(cls, i: int) -> str:`
`28`	`28`
`29`	`29`
`30`	`30`	`DEFAULT_PROTOCOL_VERSION: int = ClientProtocolVersion.BINARY.value`
	`31`	`+DEFAULT_MAX_PREPARED_STATEMENTS: int = 1000`
`31`	`32`
`32`	`33`
`33`	`34`	`class DbApiParamstyle(Enum):`