Skip to content

Commit 13f427b

Browse files
waahm7TingDaoKgraebm
authored
Connection Manager Acquisition Timeout (#479)
Co-authored-by: Dengke Tang <[email protected]> Co-authored-by: Michael Graeb <[email protected]>
1 parent a2fb16c commit 13f427b

File tree

6 files changed

+206
-47
lines changed

6 files changed

+206
-47
lines changed

include/aws/http/connection_manager.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,13 @@ struct aws_http_connection_manager_options {
125125
*/
126126
uint64_t max_connection_idle_in_milliseconds;
127127

128+
/**
129+
* If set to a non-zero value, aws_http_connection_manager_acquire_connection() calls
130+
* will give up after waiting this long for a connection from the pool,
131+
* failing with error AWS_ERROR_HTTP_CONNECTION_MANAGER_ACQUISITION_TIMEOUT.
132+
*/
133+
uint64_t connection_acquisition_timeout_ms;
134+
128135
/**
129136
* THIS IS AN EXPERIMENTAL AND UNSTABLE API
130137
* (Optional)

include/aws/http/http.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ enum aws_http_errors {
6060
AWS_ERROR_HTTP_MANUAL_WRITE_NOT_ENABLED,
6161
AWS_ERROR_HTTP_MANUAL_WRITE_HAS_COMPLETED,
6262
AWS_ERROR_HTTP_RESPONSE_FIRST_BYTE_TIMEOUT,
63+
AWS_ERROR_HTTP_CONNECTION_MANAGER_ACQUISITION_TIMEOUT,
6364

6465
AWS_ERROR_HTTP_END_RANGE = AWS_ERROR_ENUM_END_RANGE(AWS_C_HTTP_PACKAGE_ID)
6566
};

source/connection_manager.c

Lines changed: 144 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,10 @@ struct aws_http_connection_manager {
193193
struct aws_linked_list idle_connections;
194194

195195
/*
196-
* The set of all incomplete connection acquisition requests
196+
* The set of all incomplete connection acquisition requests.
197+
* This must be a FIFO list. When connections are requested by the user, they are added to the back. When we need to
198+
* complete the acquisition, we pop from the front. In this way, the list is always sorted from the oldest (in terms
199+
* of timeout timestamp) to the newest and we can cull it similar to idle_connections.
197200
*/
198201
struct aws_linked_list pending_acquisitions;
199202

@@ -286,6 +289,8 @@ struct aws_http_connection_manager {
286289
*/
287290
uint64_t max_connection_idle_in_milliseconds;
288291

292+
uint64_t connection_acquisition_timeout_ms;
293+
289294
/*
290295
* Task to cull idle connections. This task is run periodically on the cull_event_loop if a non-zero
291296
* culling time interval is specified.
@@ -392,6 +397,7 @@ struct aws_http_connection_acquisition {
392397
struct aws_http_connection *connection;
393398
int error_code;
394399
struct aws_channel_task acquisition_task;
400+
uint64_t timeout_timestamp;
395401
};
396402

397403
static void s_connection_acquisition_task(
@@ -758,26 +764,17 @@ static void s_final_destruction_task(struct aws_task *task, void *arg, enum aws_
758764
}
759765

760766
static void s_cull_task(struct aws_task *task, void *arg, enum aws_task_status status);
761-
static void s_schedule_connection_culling(struct aws_http_connection_manager *manager) {
762-
if (manager->max_connection_idle_in_milliseconds == 0) {
763-
return;
764-
}
765-
766-
if (manager->cull_task == NULL) {
767-
manager->cull_task = aws_mem_calloc(manager->allocator, 1, sizeof(struct aws_task));
768-
aws_task_init(manager->cull_task, s_cull_task, manager, "cull_idle_connections");
769-
/* For the task to properly run and cancel, we need to keep manager alive */
770-
aws_ref_count_acquire(&manager->internal_ref_count);
771-
}
772767

773-
if (manager->cull_event_loop == NULL) {
774-
manager->cull_event_loop = aws_event_loop_group_get_next_loop(manager->bootstrap->event_loop_group);
768+
/*
769+
* Calculates the next timestamp the idle connections should be culled. Manager lock must be held somewhere in the call
770+
* stack. Returns UINT64_MAX if max_connection_idle_in_milliseconds is not set.
771+
*/
772+
static uint64_t s_calculate_idle_connection_cull_task_time_synced(struct aws_http_connection_manager *manager) {
773+
if (manager->max_connection_idle_in_milliseconds == 0) {
774+
return UINT64_MAX;
775775
}
776-
AWS_FATAL_ASSERT(manager->cull_event_loop != NULL);
777-
778776
uint64_t cull_task_time = 0;
779777

780-
aws_mutex_lock(&manager->lock);
781778
const struct aws_linked_list_node *end = aws_linked_list_end(&manager->idle_connections);
782779
struct aws_linked_list_node *oldest_node = aws_linked_list_begin(&manager->idle_connections);
783780
if (oldest_node != end) {
@@ -799,11 +796,68 @@ static void s_schedule_connection_culling(struct aws_http_connection_manager *ma
799796
now + aws_timestamp_convert(
800797
manager->max_connection_idle_in_milliseconds, AWS_TIMESTAMP_MILLIS, AWS_TIMESTAMP_NANOS, NULL);
801798
}
802-
aws_mutex_unlock(&manager->lock);
803799

804-
aws_event_loop_schedule_task_future(manager->cull_event_loop, manager->cull_task, cull_task_time);
800+
return cull_task_time;
801+
}
802+
803+
/*
804+
* Calculates the next timestamp the pending acquisitions should be culled. Manager lock must be held somewhere in the
805+
* call stack. Returns UINT64_MAX if connection_acquisition_timeout_ms is not set.
806+
*/
807+
static uint64_t s_calculate_pending_acquisition_cull_task_time_synced(struct aws_http_connection_manager *manager) {
808+
if (manager->connection_acquisition_timeout_ms == 0) {
809+
return UINT64_MAX;
810+
}
805811

806-
return;
812+
uint64_t cull_task_time = 0;
813+
814+
const struct aws_linked_list_node *end = aws_linked_list_end(&manager->pending_acquisitions);
815+
struct aws_linked_list_node *oldest_node = aws_linked_list_begin(&manager->pending_acquisitions);
816+
if (oldest_node != end) {
817+
/*
818+
* front of the list has the closest cull time
819+
*/
820+
struct aws_http_connection_acquisition *oldest_pending_acquire =
821+
AWS_CONTAINER_OF(oldest_node, struct aws_http_connection_acquisition, node);
822+
cull_task_time = oldest_pending_acquire->timeout_timestamp;
823+
} else {
824+
/*
825+
* There are no acquisition in the list, so the absolute minimum anything could be culled is the full
826+
* culling interval from now.
827+
*/
828+
uint64_t now = 0;
829+
manager->system_vtable->aws_high_res_clock_get_ticks(&now);
830+
cull_task_time =
831+
now + aws_timestamp_convert(
832+
manager->connection_acquisition_timeout_ms, AWS_TIMESTAMP_MILLIS, AWS_TIMESTAMP_NANOS, NULL);
833+
}
834+
return cull_task_time;
835+
}
836+
837+
static void s_schedule_culling(struct aws_http_connection_manager *manager) {
838+
if (manager->max_connection_idle_in_milliseconds == 0 && manager->connection_acquisition_timeout_ms == 0) {
839+
return;
840+
}
841+
842+
if (manager->cull_task == NULL) {
843+
manager->cull_task = aws_mem_calloc(manager->allocator, 1, sizeof(struct aws_task));
844+
aws_task_init(manager->cull_task, s_cull_task, manager, "cull_idle_connections");
845+
/* For the task to properly run and cancel, we need to keep manager alive */
846+
aws_ref_count_acquire(&manager->internal_ref_count);
847+
}
848+
849+
if (manager->cull_event_loop == NULL) {
850+
manager->cull_event_loop = aws_event_loop_group_get_next_loop(manager->bootstrap->event_loop_group);
851+
}
852+
AWS_FATAL_ASSERT(manager->cull_event_loop != NULL);
853+
854+
aws_mutex_lock(&manager->lock);
855+
uint64_t idle_cull_time = s_calculate_idle_connection_cull_task_time_synced(manager);
856+
uint64_t acquisition_cull_time = s_calculate_pending_acquisition_cull_task_time_synced(manager);
857+
aws_mutex_unlock(&manager->lock);
858+
859+
aws_event_loop_schedule_task_future(
860+
manager->cull_event_loop, manager->cull_task, aws_min_u64(idle_cull_time, acquisition_cull_time));
807861
}
808862

809863
struct aws_http_connection_manager *aws_http_connection_manager_new(
@@ -900,6 +954,8 @@ struct aws_http_connection_manager *aws_http_connection_manager_new(
900954
manager->shutdown_complete_user_data = options->shutdown_complete_user_data;
901955
manager->enable_read_back_pressure = options->enable_read_back_pressure;
902956
manager->max_connection_idle_in_milliseconds = options->max_connection_idle_in_milliseconds;
957+
manager->connection_acquisition_timeout_ms = options->connection_acquisition_timeout_ms;
958+
903959
if (options->proxy_ev_settings) {
904960
manager->proxy_ev_settings = *options->proxy_ev_settings;
905961
}
@@ -938,7 +994,7 @@ struct aws_http_connection_manager *aws_http_connection_manager_new(
938994
}
939995

940996
/* NOTHING can fail after here */
941-
s_schedule_connection_culling(manager);
997+
s_schedule_culling(manager);
942998

943999
AWS_LOGF_INFO(AWS_LS_HTTP_CONNECTION_MANAGER, "id=%p: Successfully created", (void *)manager);
9441000

@@ -1227,6 +1283,22 @@ void aws_http_connection_manager_acquire_connection(
12271283
request->user_data = user_data;
12281284
request->manager = manager;
12291285

1286+
if (manager->connection_acquisition_timeout_ms) {
1287+
uint64_t acquire_start_timestamp = 0;
1288+
if (manager->system_vtable->aws_high_res_clock_get_ticks(&acquire_start_timestamp) == AWS_OP_SUCCESS) {
1289+
request->timeout_timestamp =
1290+
acquire_start_timestamp +
1291+
aws_timestamp_convert(
1292+
manager->connection_acquisition_timeout_ms, AWS_TIMESTAMP_MILLIS, AWS_TIMESTAMP_NANOS, NULL);
1293+
} else {
1294+
AWS_LOGF_WARN(
1295+
AWS_LS_HTTP_CONNECTION_MANAGER,
1296+
"id=%p: Failed to get current timestamp using aws_high_res_clock_get_ticks function. Ignoring the "
1297+
"connection_acquisition_timeout_ms value. ",
1298+
(void *)manager);
1299+
}
1300+
}
1301+
12301302
struct aws_connection_management_transaction work;
12311303
s_aws_connection_management_transaction_init(&work, manager);
12321304

@@ -1545,10 +1617,11 @@ static void s_aws_http_connection_manager_on_connection_shutdown(
15451617
s_aws_http_connection_manager_execute_transaction(&work);
15461618
}
15471619

1548-
static void s_cull_idle_connections(struct aws_http_connection_manager *manager) {
1549-
AWS_LOGF_INFO(AWS_LS_HTTP_CONNECTION_MANAGER, "id=%p: culling idle connections", (void *)manager);
1620+
static void s_cull_task_impl(struct aws_http_connection_manager *manager) {
1621+
AWS_LOGF_INFO(
1622+
AWS_LS_HTTP_CONNECTION_MANAGER, "id=%p: culling idle connections and pending acquisitions", (void *)manager);
15501623

1551-
if (manager == NULL || manager->max_connection_idle_in_milliseconds == 0) {
1624+
if (manager == NULL) {
15521625
return;
15531626
}
15541627

@@ -1564,26 +1637,53 @@ static void s_cull_idle_connections(struct aws_http_connection_manager *manager)
15641637

15651638
/* Only if we're not shutting down */
15661639
if (manager->state == AWS_HCMST_READY) {
1567-
const struct aws_linked_list_node *end = aws_linked_list_end(&manager->idle_connections);
1568-
struct aws_linked_list_node *current_node = aws_linked_list_begin(&manager->idle_connections);
1569-
while (current_node != end) {
1570-
struct aws_linked_list_node *node = current_node;
1571-
struct aws_idle_connection *current_idle_connection =
1572-
AWS_CONTAINER_OF(node, struct aws_idle_connection, node);
1573-
if (current_idle_connection->cull_timestamp > now) {
1574-
break;
1640+
/* cull idle connections */
1641+
if (manager->max_connection_idle_in_milliseconds != 0) {
1642+
const struct aws_linked_list_node *idle_connections_end = aws_linked_list_end(&manager->idle_connections);
1643+
struct aws_linked_list_node *idle_connections_current = aws_linked_list_begin(&manager->idle_connections);
1644+
while (idle_connections_current != idle_connections_end) {
1645+
struct aws_linked_list_node *node = idle_connections_current;
1646+
struct aws_idle_connection *current_idle_connection =
1647+
AWS_CONTAINER_OF(node, struct aws_idle_connection, node);
1648+
if (current_idle_connection->cull_timestamp > now) {
1649+
break;
1650+
}
1651+
1652+
idle_connections_current = aws_linked_list_next(idle_connections_current);
1653+
aws_linked_list_remove(node);
1654+
aws_linked_list_push_back(&work.connections_to_release, node);
1655+
--manager->idle_connection_count;
1656+
1657+
AWS_LOGF_DEBUG(
1658+
AWS_LS_HTTP_CONNECTION_MANAGER,
1659+
"id=%p: culling idle connection (%p)",
1660+
(void *)manager,
1661+
(void *)current_idle_connection->connection);
15751662
}
1663+
}
15761664

1577-
current_node = aws_linked_list_next(current_node);
1578-
aws_linked_list_remove(node);
1579-
aws_linked_list_push_back(&work.connections_to_release, node);
1580-
--manager->idle_connection_count;
1581-
1582-
AWS_LOGF_DEBUG(
1583-
AWS_LS_HTTP_CONNECTION_MANAGER,
1584-
"id=%p: culling idle connection (%p)",
1585-
(void *)manager,
1586-
(void *)current_idle_connection->connection);
1665+
/* cull pending acquisitions */
1666+
if (manager->connection_acquisition_timeout_ms != 0) {
1667+
const struct aws_linked_list_node *pending_acquisitions_end =
1668+
aws_linked_list_end(&manager->pending_acquisitions);
1669+
struct aws_linked_list_node *pending_acquisitions_current =
1670+
aws_linked_list_begin(&manager->pending_acquisitions);
1671+
while (pending_acquisitions_current != pending_acquisitions_end) {
1672+
struct aws_linked_list_node *node = pending_acquisitions_current;
1673+
struct aws_http_connection_acquisition *current_pending_acquire =
1674+
AWS_CONTAINER_OF(node, struct aws_http_connection_acquisition, node);
1675+
if (current_pending_acquire->timeout_timestamp > now) {
1676+
break;
1677+
}
1678+
1679+
pending_acquisitions_current = aws_linked_list_next(pending_acquisitions_current);
1680+
s_aws_http_connection_manager_move_front_acquisition(
1681+
manager, NULL, AWS_ERROR_HTTP_CONNECTION_MANAGER_ACQUISITION_TIMEOUT, &work.completions);
1682+
AWS_LOGF_DEBUG(
1683+
AWS_LS_HTTP_CONNECTION_MANAGER,
1684+
"id=%p: Failing pending acquisition due to timeout",
1685+
(void *)manager);
1686+
}
15871687
}
15881688
}
15891689

@@ -1601,10 +1701,9 @@ static void s_cull_task(struct aws_task *task, void *arg, enum aws_task_status s
16011701
}
16021702

16031703
struct aws_http_connection_manager *manager = arg;
1704+
s_cull_task_impl(manager);
16041705

1605-
s_cull_idle_connections(manager);
1606-
1607-
s_schedule_connection_culling(manager);
1706+
s_schedule_culling(manager);
16081707
}
16091708

16101709
void aws_http_connection_manager_fetch_metrics(

source/http.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,9 @@ static struct aws_error_info s_errors[] = {
151151
AWS_DEFINE_ERROR_INFO_HTTP(
152152
AWS_ERROR_HTTP_RESPONSE_FIRST_BYTE_TIMEOUT,
153153
"The server does not begin responding within the configuration after a request is fully sent."),
154+
AWS_DEFINE_ERROR_INFO_HTTP(
155+
AWS_ERROR_HTTP_CONNECTION_MANAGER_ACQUISITION_TIMEOUT,
156+
"Connection Manager failed to acquire a connection within the defined timeout."),
154157
};
155158
/* clang-format on */
156159

tests/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -519,6 +519,7 @@ add_net_test_case(connection_customized_alpn_error_with_unknown_return_string)
519519
# unit tests where connections are mocked
520520
add_net_test_case(test_connection_manager_setup_shutdown)
521521
add_net_test_case(test_connection_manager_acquire_release_mix_synchronous)
522+
add_net_test_case(test_connection_manager_acquisition_timeout)
522523
add_net_test_case(test_connection_manager_connect_callback_failure)
523524
add_net_test_case(test_connection_manager_connect_immediate_failure)
524525
add_net_test_case(test_connection_manager_proxy_setup_shutdown)

0 commit comments

Comments
 (0)