Skip to content

RUBY-3429 Retry failed KMS requests #2907

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Nov 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .evergreen/run-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ if test -n "$FLE"; then
python3 -u .evergreen/csfle/kms_http_server.py --ca_file .evergreen/x509gen/ca.pem --cert_file .evergreen/x509gen/server.pem --port 8002 --require_client_cert &
python3 -u .evergreen/csfle/kms_kmip_server.py &
python3 -u .evergreen/csfle/fake_azure.py &
python3 -u .evergreen/csfle/kms_failpoint_server.py --port 9003 &

# Obtain temporary AWS credentials
PYTHON=python3 . .evergreen/csfle/set-temp-creds.sh
Expand Down
3 changes: 3 additions & 0 deletions .rubocop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -110,3 +110,6 @@ Style/TrailingCommaInArrayLiteral:

Style/TrailingCommaInHashLiteral:
Enabled: false

RSpec/ExampleLength:
Max: 10
2 changes: 1 addition & 1 deletion gemfiles/standard.rb
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,6 @@ def standard_dependencies
gem 'ruby-lsp', platforms: :mri
end

gem 'libmongocrypt-helper', '~> 1.11.0' if ENV['FLE'] == 'helper'
gem 'libmongocrypt-helper', '~> 1.12.0' if ENV['FLE'] == 'helper'
end
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength, Metrics/BlockLength
58 changes: 57 additions & 1 deletion lib/mongo/crypt/binding.rb
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ class Binding
# will cause a `LoadError`.
#
# @api private
MIN_LIBMONGOCRYPT_VERSION = Gem::Version.new("1.7.0")
MIN_LIBMONGOCRYPT_VERSION = Gem::Version.new("1.12.0")

# @!method self.mongocrypt_version(len)
# @api private
Expand Down Expand Up @@ -1113,6 +1113,62 @@ def self.check_kms_ctx_status(kms_context)
end
end

# @!method self.mongocrypt_kms_ctx_usleep(ctx)
# @api private
#
# Indicates how long to sleep before sending KMS request.
#
# @param [ FFI::Pointer ] ctx A pointer to a mongocrypt_ctx_t object.
# @return [ int64 ] A 64-bit encoded number of microseconds of how long to sleep.
attach_function :mongocrypt_kms_ctx_usleep, [:pointer], :int64

# Returns number of milliseconds to sleep before sending KMS request
# for the given KMS context.
#
# @param [ Mongo::Crypt::KmsContext ] kms_context KMS Context we are going
# to send KMS request for.
# @return [ Integer ] A 64-bit encoded number of microseconds to sleep.
def self.kms_ctx_usleep(kms_context)
mongocrypt_kms_ctx_usleep(kms_context.kms_ctx_p)
end

# @!method self.mongocrypt_kms_ctx_fail(ctx)
# @api private
#
# Indicate a network-level failure.
#
# @param [ FFI::Pointer ] ctx A pointer to a mongocrypt_ctx_t object.
# @return [ Boolean ] whether the failed request may be retried.
attach_function :mongocrypt_kms_ctx_fail, [:pointer], :bool

# Check whether the last failed request for the KMS context may be retried.
#
# @param [ Mongo::Crypt::KmsContext ] kms_context KMS Context
# @return [ true, false ] whether the failed request may be retried.
def self.kms_ctx_fail(kms_context)
mongocrypt_kms_ctx_fail(kms_context.kms_ctx_p)
end

# @!method self.mongocrypt_setopt_retry_kms(crypt, enable)
# @api private
#
# Enable or disable KMS retry behavior.
#
# @param [ FFI::Pointer ] crypt A pointer to a mongocrypt_t object
# @param [ Boolean ] enable A boolean indicating whether to retry operations.
# @return [ Boolean ] indicating success.
attach_function :mongocrypt_setopt_retry_kms, [:pointer, :bool], :bool

# Enable or disable KMS retry behavior.
#
# @param [ Mongo::Crypt::Handle ] handle
# @param [ true, false ] value whether to retry operations.
# @return [ true, fale ] true is the option was set, otherwise false.
def self.kms_ctx_setopt_retry_kms(handle, value)
mongocrypt_setopt_retry_kms(handle.ref, value)
end


# @!method self.mongocrypt_kms_ctx_done(ctx)
# @api private
#
Expand Down
82 changes: 55 additions & 27 deletions lib/mongo/crypt/context.rb
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ def initialize(mongocrypt_handle, io)
Binding.mongocrypt_ctx_new(@mongocrypt_handle.ref),
Binding.method(:mongocrypt_ctx_destroy)
)

@encryption_io = io
@cached_azure_token = nil
end
Expand Down Expand Up @@ -90,35 +89,13 @@ def run_state_machine(timeout_holder)
when :done
return nil
when :need_mongo_keys
filter = Binding.ctx_mongo_op(self)

@encryption_io.find_keys(filter, timeout_ms: timeout_ms).each do |key|
mongocrypt_feed(key) if key
end

mongocrypt_done
provide_keys(timeout_ms)
when :need_mongo_collinfo
filter = Binding.ctx_mongo_op(self)

result = @encryption_io.collection_info(@db_name, filter, timeout_ms: timeout_ms)
mongocrypt_feed(result) if result

mongocrypt_done
provide_collection_info(timeout_ms)
when :need_mongo_markings
cmd = Binding.ctx_mongo_op(self)

result = @encryption_io.mark_command(cmd, timeout_ms: timeout_ms)
mongocrypt_feed(result)

mongocrypt_done
provide_markings(timeout_ms)
when :need_kms
while kms_context = Binding.ctx_next_kms_ctx(self) do
provider = Binding.kms_ctx_get_kms_provider(kms_context)
tls_options = @mongocrypt_handle.kms_tls_options(provider)
@encryption_io.feed_kms(kms_context, tls_options)
end

Binding.ctx_kms_done(self)
feed_kms
when :need_kms_credentials
Binding.ctx_provide_kms_providers(
self,
Expand All @@ -134,6 +111,57 @@ def run_state_machine(timeout_holder)

private

def provide_markings(timeout_ms)
cmd = Binding.ctx_mongo_op(self)

result = @encryption_io.mark_command(cmd, timeout_ms: timeout_ms)
mongocrypt_feed(result)

mongocrypt_done
end

def provide_collection_info(timeout_ms)
filter = Binding.ctx_mongo_op(self)

result = @encryption_io.collection_info(@db_name, filter, timeout_ms: timeout_ms)
mongocrypt_feed(result) if result

mongocrypt_done
end

def provide_keys(timeout_ms)
filter = Binding.ctx_mongo_op(self)

@encryption_io.find_keys(filter, timeout_ms: timeout_ms).each do |key|
mongocrypt_feed(key) if key
end

mongocrypt_done
end

def feed_kms
while (kms_context = Binding.ctx_next_kms_ctx(self)) do
begin
delay = Binding.kms_ctx_usleep(kms_context)
sleep(delay / 1_000_000.0) unless delay.nil?
provider = Binding.kms_ctx_get_kms_provider(kms_context)
tls_options = @mongocrypt_handle.kms_tls_options(provider)
@encryption_io.feed_kms(kms_context, tls_options)
rescue Error::KmsError => e
if e.network_error?
if Binding.kms_ctx_fail(kms_context)
next
else
raise
end
else
raise
end
end
end
Binding.ctx_kms_done(self)
end

# Indicate that state machine is done feeding I/O responses back to libmongocrypt
def mongocrypt_done
Binding.mongocrypt_ctx_mongo_done(ctx_p)
Expand Down
6 changes: 4 additions & 2 deletions lib/mongo/crypt/encryption_io.rb
Original file line number Diff line number Diff line change
Expand Up @@ -363,8 +363,10 @@ def with_ssl_socket(endpoint, tls_options, timeout_ms: nil)
tls_options.merge(socket_options)
)
yield(mongo_socket.socket)
rescue => e
raise Error::KmsError, "Error when connecting to KMS provider: #{e.class}: #{e.message}"
rescue Error::KmsError
raise
rescue StandardError => e
raise Error::KmsError.new("Error when connecting to KMS provider: #{e.class}: #{e.message}", network_error: true)
ensure
mongo_socket&.close
end
Expand Down
2 changes: 1 addition & 1 deletion lib/mongo/crypt/handle.rb
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def initialize(kms_providers, kms_tls_options, options={})
Binding.mongocrypt_new,
Binding.method(:mongocrypt_destroy)
)

Binding.kms_ctx_setopt_retry_kms(self, true)
@kms_providers = kms_providers
@kms_tls_options = kms_tls_options

Expand Down
9 changes: 9 additions & 0 deletions lib/mongo/error/kms_error.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,15 @@ class Error

# A KMS-related error during client-side encryption.
class KmsError < CryptError
def initialize(message, code: nil, network_error: nil)
@network_error = network_error
super(message, code: code)
end
end

# @return [ true, false ] whether this error was caused by a network error.
def network_error?
@network_error == true
end
end
end
112 changes: 112 additions & 0 deletions spec/integration/client_side_encryption/kms_retry_prose_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
# frozen_string_literal: true

require 'spec_helper'

def simulate_failure(type, times = 1)
url = URI.parse("https://localhost:9003/set_failpoint/#{type}")
data = { count: times }.to_json
http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
http.ca_file = '.evergreen/x509gen/ca.pem'
request = Net::HTTP::Post.new(url.path, { 'Content-Type' => 'application/json' })
request.body = data
http.request(request)
end

describe 'KMS Retry Prose Spec' do
require_libmongocrypt
require_enterprise
min_server_version '4.2'

include_context 'define shared FLE helpers'

let(:key_vault_client) do
ClientRegistry.instance.new_local_client(SpecConfig.instance.addresses)
end

let(:client_encryption) do
Mongo::ClientEncryption.new(
key_vault_client,
kms_tls_options: {
aws: default_kms_tls_options_for_provider,
gcp: default_kms_tls_options_for_provider,
azure: default_kms_tls_options_for_provider,
},
key_vault_namespace: key_vault_namespace,
# For some reason libmongocrypt ignores custom endpoints for Azure and CGP
# kms_providers: aws_kms_providers.merge(azure_kms_providers).merge(gcp_kms_providers)
kms_providers: aws_kms_providers
)
end

shared_examples 'kms_retry prose spec' do
it 'createDataKey and encrypt with TCP retry' do
simulate_failure('network')
data_key_id = client_encryption.create_data_key(kms_provider, master_key: master_key)
simulate_failure('network')
expect do
client_encryption.encrypt(123, key_id: data_key_id, algorithm: 'AEAD_AES_256_CBC_HMAC_SHA_512-Deterministic')
end.not_to raise_error
end

it 'createDataKey and encrypt with HTTP retry' do
simulate_failure('http')
data_key_id = client_encryption.create_data_key(kms_provider, master_key: master_key)
simulate_failure('http')
expect do
client_encryption.encrypt(123, key_id: data_key_id, algorithm: 'AEAD_AES_256_CBC_HMAC_SHA_512-Deterministic')
end.not_to raise_error
end

it 'createDataKey fails after too many retries' do
simulate_failure('network', 4)
expect do
client_encryption.create_data_key(kms_provider, master_key: master_key)
end.to raise_error(Mongo::Error::KmsError)
end
end

context 'with AWS KMS provider' do
let(:kms_provider) { 'aws' }

let(:master_key) do
{
region: 'foo',
key: 'bar',
endpoint: '127.0.0.1:9003',
}
end

include_examples 'kms_retry prose spec'
end

context 'with GCP KMS provider', skip: 'For some reason libmongocrypt ignores custom endpoints for Azure and CGP' do
let(:kms_provider) { 'gcp' }

let(:master_key) do
{
project_id: 'foo',
location: 'bar',
key_ring: 'baz',
key_name: 'qux',
endpoint: '127.0.0.1:9003'
}
end

include_examples 'kms_retry prose spec'
end

context 'with Azure KMS provider', skip: 'For some reason libmongocrypt ignores custom endpoints for Azure and CGP' do
let(:kms_provider) { 'azure' }

let(:master_key) do
{
key_vault_endpoint: '127.0.0.1:9003',
key_name: 'foo',
}
end

include_examples 'kms_retry prose spec'
end
end
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@
}
)
rescue Mongo::Error::KmsError => exc
exc.message.should =~ /Error when connecting to KMS provider/
exc.message.should =~ /Error when connecting to KMS provider|Empty KMS response/
exc.message.should =~ /libmongocrypt error code/
exc.message.should_not =~ /CryptError/
else
Expand Down
2 changes: 0 additions & 2 deletions spec/integration/search_indexes_prose_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,6 @@ def filter_results(result, names)
.first
end

# rubocop:disable RSpec/ExampleLength
it 'succeeds' do
expect(create_index).to be == name
helper.wait_for(name)
Expand All @@ -158,7 +157,6 @@ def filter_results(result, names)

expect(index['latestDefinition']).to be == new_definition
end
# rubocop:enable RSpec/ExampleLength
end

# Case 5: dropSearchIndex suppresses namespace not found errors
Expand Down
Loading