Skip to content

Commit f1d1f91

Browse files
RUBY-3429 Retry failed KMS requests (#2907)
1 parent 1924187 commit f1d1f91

File tree

11 files changed

+244
-35
lines changed

11 files changed

+244
-35
lines changed

.evergreen/run-tests.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,7 @@ if test -n "$FLE"; then
219219
python3 -u .evergreen/csfle/kms_http_server.py --ca_file .evergreen/x509gen/ca.pem --cert_file .evergreen/x509gen/server.pem --port 8002 --require_client_cert &
220220
python3 -u .evergreen/csfle/kms_kmip_server.py &
221221
python3 -u .evergreen/csfle/fake_azure.py &
222+
python3 -u .evergreen/csfle/kms_failpoint_server.py --port 9003 &
222223

223224
# Obtain temporary AWS credentials
224225
PYTHON=python3 . .evergreen/csfle/set-temp-creds.sh

.rubocop.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,3 +110,6 @@ Style/TrailingCommaInArrayLiteral:
110110

111111
Style/TrailingCommaInHashLiteral:
112112
Enabled: false
113+
114+
RSpec/ExampleLength:
115+
Max: 10

gemfiles/standard.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,6 @@ def standard_dependencies
6868
gem 'ruby-lsp', platforms: :mri
6969
end
7070

71-
gem 'libmongocrypt-helper', '~> 1.11.0' if ENV['FLE'] == 'helper'
71+
gem 'libmongocrypt-helper', '~> 1.12.0' if ENV['FLE'] == 'helper'
7272
end
7373
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength, Metrics/BlockLength

lib/mongo/crypt/binding.rb

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ class Binding
8383
# will cause a `LoadError`.
8484
#
8585
# @api private
86-
MIN_LIBMONGOCRYPT_VERSION = Gem::Version.new("1.7.0")
86+
MIN_LIBMONGOCRYPT_VERSION = Gem::Version.new("1.12.0")
8787

8888
# @!method self.mongocrypt_version(len)
8989
# @api private
@@ -1113,6 +1113,62 @@ def self.check_kms_ctx_status(kms_context)
11131113
end
11141114
end
11151115

1116+
# @!method self.mongocrypt_kms_ctx_usleep(ctx)
1117+
# @api private
1118+
#
1119+
# Indicates how long to sleep before sending KMS request.
1120+
#
1121+
# @param [ FFI::Pointer ] ctx A pointer to a mongocrypt_ctx_t object.
1122+
# @return [ int64 ] A 64-bit encoded number of microseconds of how long to sleep.
1123+
attach_function :mongocrypt_kms_ctx_usleep, [:pointer], :int64
1124+
1125+
# Returns number of milliseconds to sleep before sending KMS request
1126+
# for the given KMS context.
1127+
#
1128+
# @param [ Mongo::Crypt::KmsContext ] kms_context KMS Context we are going
1129+
# to send KMS request for.
1130+
# @return [ Integer ] A 64-bit encoded number of microseconds to sleep.
1131+
def self.kms_ctx_usleep(kms_context)
1132+
mongocrypt_kms_ctx_usleep(kms_context.kms_ctx_p)
1133+
end
1134+
1135+
# @!method self.mongocrypt_kms_ctx_fail(ctx)
1136+
# @api private
1137+
#
1138+
# Indicate a network-level failure.
1139+
#
1140+
# @param [ FFI::Pointer ] ctx A pointer to a mongocrypt_ctx_t object.
1141+
# @return [ Boolean ] whether the failed request may be retried.
1142+
attach_function :mongocrypt_kms_ctx_fail, [:pointer], :bool
1143+
1144+
# Check whether the last failed request for the KMS context may be retried.
1145+
#
1146+
# @param [ Mongo::Crypt::KmsContext ] kms_context KMS Context
1147+
# @return [ true, false ] whether the failed request may be retried.
1148+
def self.kms_ctx_fail(kms_context)
1149+
mongocrypt_kms_ctx_fail(kms_context.kms_ctx_p)
1150+
end
1151+
1152+
# @!method self.mongocrypt_setopt_retry_kms(crypt, enable)
1153+
# @api private
1154+
#
1155+
# Enable or disable KMS retry behavior.
1156+
#
1157+
# @param [ FFI::Pointer ] crypt A pointer to a mongocrypt_t object
1158+
# @param [ Boolean ] enable A boolean indicating whether to retry operations.
1159+
# @return [ Boolean ] indicating success.
1160+
attach_function :mongocrypt_setopt_retry_kms, [:pointer, :bool], :bool
1161+
1162+
# Enable or disable KMS retry behavior.
1163+
#
1164+
# @param [ Mongo::Crypt::Handle ] handle
1165+
# @param [ true, false ] value whether to retry operations.
1166+
# @return [ true, fale ] true is the option was set, otherwise false.
1167+
def self.kms_ctx_setopt_retry_kms(handle, value)
1168+
mongocrypt_setopt_retry_kms(handle.ref, value)
1169+
end
1170+
1171+
11161172
# @!method self.mongocrypt_kms_ctx_done(ctx)
11171173
# @api private
11181174
#

lib/mongo/crypt/context.rb

Lines changed: 55 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@ def initialize(mongocrypt_handle, io)
4949
Binding.mongocrypt_ctx_new(@mongocrypt_handle.ref),
5050
Binding.method(:mongocrypt_ctx_destroy)
5151
)
52-
5352
@encryption_io = io
5453
@cached_azure_token = nil
5554
end
@@ -90,35 +89,13 @@ def run_state_machine(timeout_holder)
9089
when :done
9190
return nil
9291
when :need_mongo_keys
93-
filter = Binding.ctx_mongo_op(self)
94-
95-
@encryption_io.find_keys(filter, timeout_ms: timeout_ms).each do |key|
96-
mongocrypt_feed(key) if key
97-
end
98-
99-
mongocrypt_done
92+
provide_keys(timeout_ms)
10093
when :need_mongo_collinfo
101-
filter = Binding.ctx_mongo_op(self)
102-
103-
result = @encryption_io.collection_info(@db_name, filter, timeout_ms: timeout_ms)
104-
mongocrypt_feed(result) if result
105-
106-
mongocrypt_done
94+
provide_collection_info(timeout_ms)
10795
when :need_mongo_markings
108-
cmd = Binding.ctx_mongo_op(self)
109-
110-
result = @encryption_io.mark_command(cmd, timeout_ms: timeout_ms)
111-
mongocrypt_feed(result)
112-
113-
mongocrypt_done
96+
provide_markings(timeout_ms)
11497
when :need_kms
115-
while kms_context = Binding.ctx_next_kms_ctx(self) do
116-
provider = Binding.kms_ctx_get_kms_provider(kms_context)
117-
tls_options = @mongocrypt_handle.kms_tls_options(provider)
118-
@encryption_io.feed_kms(kms_context, tls_options)
119-
end
120-
121-
Binding.ctx_kms_done(self)
98+
feed_kms
12299
when :need_kms_credentials
123100
Binding.ctx_provide_kms_providers(
124101
self,
@@ -134,6 +111,57 @@ def run_state_machine(timeout_holder)
134111

135112
private
136113

114+
def provide_markings(timeout_ms)
115+
cmd = Binding.ctx_mongo_op(self)
116+
117+
result = @encryption_io.mark_command(cmd, timeout_ms: timeout_ms)
118+
mongocrypt_feed(result)
119+
120+
mongocrypt_done
121+
end
122+
123+
def provide_collection_info(timeout_ms)
124+
filter = Binding.ctx_mongo_op(self)
125+
126+
result = @encryption_io.collection_info(@db_name, filter, timeout_ms: timeout_ms)
127+
mongocrypt_feed(result) if result
128+
129+
mongocrypt_done
130+
end
131+
132+
def provide_keys(timeout_ms)
133+
filter = Binding.ctx_mongo_op(self)
134+
135+
@encryption_io.find_keys(filter, timeout_ms: timeout_ms).each do |key|
136+
mongocrypt_feed(key) if key
137+
end
138+
139+
mongocrypt_done
140+
end
141+
142+
def feed_kms
143+
while (kms_context = Binding.ctx_next_kms_ctx(self)) do
144+
begin
145+
delay = Binding.kms_ctx_usleep(kms_context)
146+
sleep(delay / 1_000_000.0) unless delay.nil?
147+
provider = Binding.kms_ctx_get_kms_provider(kms_context)
148+
tls_options = @mongocrypt_handle.kms_tls_options(provider)
149+
@encryption_io.feed_kms(kms_context, tls_options)
150+
rescue Error::KmsError => e
151+
if e.network_error?
152+
if Binding.kms_ctx_fail(kms_context)
153+
next
154+
else
155+
raise
156+
end
157+
else
158+
raise
159+
end
160+
end
161+
end
162+
Binding.ctx_kms_done(self)
163+
end
164+
137165
# Indicate that state machine is done feeding I/O responses back to libmongocrypt
138166
def mongocrypt_done
139167
Binding.mongocrypt_ctx_mongo_done(ctx_p)

lib/mongo/crypt/encryption_io.rb

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -363,8 +363,10 @@ def with_ssl_socket(endpoint, tls_options, timeout_ms: nil)
363363
tls_options.merge(socket_options)
364364
)
365365
yield(mongo_socket.socket)
366-
rescue => e
367-
raise Error::KmsError, "Error when connecting to KMS provider: #{e.class}: #{e.message}"
366+
rescue Error::KmsError
367+
raise
368+
rescue StandardError => e
369+
raise Error::KmsError.new("Error when connecting to KMS provider: #{e.class}: #{e.message}", network_error: true)
368370
ensure
369371
mongo_socket&.close
370372
end

lib/mongo/crypt/handle.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def initialize(kms_providers, kms_tls_options, options={})
7171
Binding.mongocrypt_new,
7272
Binding.method(:mongocrypt_destroy)
7373
)
74-
74+
Binding.kms_ctx_setopt_retry_kms(self, true)
7575
@kms_providers = kms_providers
7676
@kms_tls_options = kms_tls_options
7777

lib/mongo/error/kms_error.rb

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,15 @@ class Error
2020

2121
# A KMS-related error during client-side encryption.
2222
class KmsError < CryptError
23+
def initialize(message, code: nil, network_error: nil)
24+
@network_error = network_error
25+
super(message, code: code)
26+
end
27+
end
28+
29+
# @return [ true, false ] whether this error was caused by a network error.
30+
def network_error?
31+
@network_error == true
2332
end
2433
end
2534
end
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
# frozen_string_literal: true
2+
3+
require 'spec_helper'
4+
5+
def simulate_failure(type, times = 1)
6+
url = URI.parse("https://localhost:9003/set_failpoint/#{type}")
7+
data = { count: times }.to_json
8+
http = Net::HTTP.new(url.host, url.port)
9+
http.use_ssl = true
10+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
11+
http.ca_file = '.evergreen/x509gen/ca.pem'
12+
request = Net::HTTP::Post.new(url.path, { 'Content-Type' => 'application/json' })
13+
request.body = data
14+
http.request(request)
15+
end
16+
17+
describe 'KMS Retry Prose Spec' do
18+
require_libmongocrypt
19+
require_enterprise
20+
min_server_version '4.2'
21+
22+
include_context 'define shared FLE helpers'
23+
24+
let(:key_vault_client) do
25+
ClientRegistry.instance.new_local_client(SpecConfig.instance.addresses)
26+
end
27+
28+
let(:client_encryption) do
29+
Mongo::ClientEncryption.new(
30+
key_vault_client,
31+
kms_tls_options: {
32+
aws: default_kms_tls_options_for_provider,
33+
gcp: default_kms_tls_options_for_provider,
34+
azure: default_kms_tls_options_for_provider,
35+
},
36+
key_vault_namespace: key_vault_namespace,
37+
# For some reason libmongocrypt ignores custom endpoints for Azure and CGP
38+
# kms_providers: aws_kms_providers.merge(azure_kms_providers).merge(gcp_kms_providers)
39+
kms_providers: aws_kms_providers
40+
)
41+
end
42+
43+
shared_examples 'kms_retry prose spec' do
44+
it 'createDataKey and encrypt with TCP retry' do
45+
simulate_failure('network')
46+
data_key_id = client_encryption.create_data_key(kms_provider, master_key: master_key)
47+
simulate_failure('network')
48+
expect do
49+
client_encryption.encrypt(123, key_id: data_key_id, algorithm: 'AEAD_AES_256_CBC_HMAC_SHA_512-Deterministic')
50+
end.not_to raise_error
51+
end
52+
53+
it 'createDataKey and encrypt with HTTP retry' do
54+
simulate_failure('http')
55+
data_key_id = client_encryption.create_data_key(kms_provider, master_key: master_key)
56+
simulate_failure('http')
57+
expect do
58+
client_encryption.encrypt(123, key_id: data_key_id, algorithm: 'AEAD_AES_256_CBC_HMAC_SHA_512-Deterministic')
59+
end.not_to raise_error
60+
end
61+
62+
it 'createDataKey fails after too many retries' do
63+
simulate_failure('network', 4)
64+
expect do
65+
client_encryption.create_data_key(kms_provider, master_key: master_key)
66+
end.to raise_error(Mongo::Error::KmsError)
67+
end
68+
end
69+
70+
context 'with AWS KMS provider' do
71+
let(:kms_provider) { 'aws' }
72+
73+
let(:master_key) do
74+
{
75+
region: 'foo',
76+
key: 'bar',
77+
endpoint: '127.0.0.1:9003',
78+
}
79+
end
80+
81+
include_examples 'kms_retry prose spec'
82+
end
83+
84+
context 'with GCP KMS provider', skip: 'For some reason libmongocrypt ignores custom endpoints for Azure and CGP' do
85+
let(:kms_provider) { 'gcp' }
86+
87+
let(:master_key) do
88+
{
89+
project_id: 'foo',
90+
location: 'bar',
91+
key_ring: 'baz',
92+
key_name: 'qux',
93+
endpoint: '127.0.0.1:9003'
94+
}
95+
end
96+
97+
include_examples 'kms_retry prose spec'
98+
end
99+
100+
context 'with Azure KMS provider', skip: 'For some reason libmongocrypt ignores custom endpoints for Azure and CGP' do
101+
let(:kms_provider) { 'azure' }
102+
103+
let(:master_key) do
104+
{
105+
key_vault_endpoint: '127.0.0.1:9003',
106+
key_name: 'foo',
107+
}
108+
end
109+
110+
include_examples 'kms_retry prose spec'
111+
end
112+
end

spec/integration/client_side_encryption/kms_tls_options_spec.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,7 @@
323323
}
324324
)
325325
rescue Mongo::Error::KmsError => exc
326-
exc.message.should =~ /Error when connecting to KMS provider/
326+
exc.message.should =~ /Error when connecting to KMS provider|Empty KMS response/
327327
exc.message.should =~ /libmongocrypt error code/
328328
exc.message.should_not =~ /CryptError/
329329
else

spec/integration/search_indexes_prose_spec.rb

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,6 @@ def filter_results(result, names)
147147
.first
148148
end
149149

150-
# rubocop:disable RSpec/ExampleLength
151150
it 'succeeds' do
152151
expect(create_index).to be == name
153152
helper.wait_for(name)
@@ -158,7 +157,6 @@ def filter_results(result, names)
158157

159158
expect(index['latestDefinition']).to be == new_definition
160159
end
161-
# rubocop:enable RSpec/ExampleLength
162160
end
163161

164162
# Case 5: dropSearchIndex suppresses namespace not found errors

0 commit comments

Comments
 (0)