Skip to content

optimize twilio #985

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
using BotSharp.Abstraction.MLTasks;

namespace BotSharp.Abstraction.Realtime;

public interface IRealtimeHook
{
Task OnModeReady(Agent agent, IRealTimeCompletion completer);
string[] OnModelTranscriptPrompt(Agent agent);
Task OnTranscribeCompleted(RoleDialogModel message, TranscriptionData data);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
public class TranscriptionData
{
public string Transcript { get; set; } = null!;
public float Confidence { get; set; }
public string Language { get; set; } = null!;
}
36 changes: 8 additions & 28 deletions src/Infrastructure/BotSharp.Core.Realtime/Services/RealtimeHub.cs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
using BotSharp.Core.Infrastructures;

namespace BotSharp.Core.Realtime.Services;

public class RealtimeHub : IRealtimeHub
Expand Down Expand Up @@ -70,8 +72,8 @@ private async Task ConnectToModel(WebSocket userWebSocket)
_conn.CurrentAgentId = agent.Id;

// Set model
var model = agent.LlmConfig.Model;
if (!model.Contains("-realtime-"))
var model = "gpt-4o-mini-realtime";
if (agent.Profiles.Contains("realtime"))
{
var llmProviderService = _services.GetRequiredService<ILlmProviderService>();
model = llmProviderService.GetProviderModel("openai", "gpt-4o", realTime: true).Name;
Expand All @@ -85,14 +87,14 @@ private async Task ConnectToModel(WebSocket userWebSocket)

var storage = _services.GetRequiredService<IConversationStorage>();
var dialogs = convService.GetDialogHistory();
if (dialogs.Count == 0)
/*if (dialogs.Count == 0)
{
dialogs.Add(new RoleDialogModel(AgentRole.User, "Hi"));
storage.Append(_conn.ConversationId, dialogs.First());
}
}*/

routing.Context.SetDialogs(dialogs);
routing.Context.SetMessageId(_conn.ConversationId, dialogs.Last().MessageId);
// routing.Context.SetMessageId(_conn.ConversationId, dialogs.Last().MessageId);

var states = _services.GetRequiredService<IConversationStateService>();

Expand All @@ -102,29 +104,7 @@ await _completer.Connect(_conn,
// Not TriggerModelInference, waiting for user utter.
var instruction = await _completer.UpdateSession(_conn);

// Trigger model inference if there is no audio file in the conversation
if (!states.ContainsState("init_audio_file"))
{
if (dialogs.LastOrDefault()?.Role == AgentRole.Assistant)
{
await _completer.TriggerModelInference($"Rephase your last response:\r\n{dialogs.LastOrDefault()?.Content}");
}
else
{
await _completer.TriggerModelInference("Reply based on the conversation context.");
}
}
else
{
// Append dialogs into model context
var history = "[CONVERSATION HISTORY]\r\n";
foreach (var message in dialogs)
{
history += $"{message.Role}: {message.Content}\r\n";
}

await _completer.TriggerModelInference($"{instruction}\r\n\r\n{history}\r\n\r\nAssist user without repeating your previous statement.");
}
await HookEmitter.Emit<IRealtimeHook>(_services, async hook => await hook.OnModeReady(agent, _completer));
},
onModelAudioDeltaReceived: async (audioDeltaData, itemId) =>
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ namespace BotSharp.Core.Agents.Services;
public partial class AgentService
{
#if !DEBUG
[SharpCache(10, perInstanceCache: true)]
[SharpCache(10)]
#endif
public async Task<PagedItems<Agent>> GetAgents(AgentFilter filter)
{
Expand All @@ -28,7 +28,7 @@ public async Task<PagedItems<Agent>> GetAgents(AgentFilter filter)
}

#if !DEBUG
[SharpCache(10, perInstanceCache: true)]
[SharpCache(10)]
#endif
public async Task<List<IdName>> GetAgentOptions(List<string>? agentIds)
{
Expand All @@ -40,7 +40,7 @@ public async Task<List<IdName>> GetAgentOptions(List<string>? agentIds)
}

#if !DEBUG
[SharpCache(10, perInstanceCache: true)]
[SharpCache(10)]
#endif
public async Task<Agent> GetAgent(string id)
{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
using BotSharp.Abstraction.Agents.Models;
using BotSharp.Abstraction.Infrastructures.Enums;
using BotSharp.Core.Infrastructures;
using BotSharp.Plugin.Twilio.Interfaces;
using BotSharp.Plugin.Twilio.Models;
using BotSharp.Plugin.Twilio.Services;
using Microsoft.AspNetCore.Http;
using Microsoft.AspNetCore.Mvc;
using Twilio.Http;
using Conversation = BotSharp.Abstraction.Conversations.Models.Conversation;
using Task = System.Threading.Tasks.Task;

namespace BotSharp.Plugin.Twilio.Controllers;

public class TwilioInboundController : TwilioController
{
private readonly TwilioSetting _settings;
private readonly IServiceProvider _services;
private readonly IHttpContextAccessor _context;
private readonly ILogger _logger;

public TwilioInboundController(TwilioSetting settings, IServiceProvider services, IHttpContextAccessor context, ILogger<TwilioOutboundController> logger)
{
_settings = settings;
_services = services;
_context = context;
_logger = logger;
}

[ValidateRequest]
[HttpPost("twilio/inbound")]
public async Task<TwiMLResult> InitiateStreamConversation(ConversationalVoiceRequest request)
{
if (request?.CallSid == null)
{
throw new ArgumentNullException(nameof(VoiceRequest.CallSid));
}

var twilio = _services.GetRequiredService<TwilioService>();
VoiceResponse response = default!;

var instruction = new ConversationalVoiceResponse
{
AgentId = request.AgentId,
ConversationId = request.ConversationId,
SpeechPaths = [],
ActionOnEmptyResult = true,
};

if (request.InitAudioFile != null)
{
instruction.SpeechPaths.Add(request.InitAudioFile);
}

// Load agent profile
var agentService = _services.GetRequiredService<IAgentService>();
var agent = await agentService.LoadAgent(request.AgentId);

await HookEmitter.Emit<ITwilioSessionHook>(_services, async hook =>
{
await hook.OnSessionCreating(request, instruction);
});

request.ConversationId = await InitConversation(request, agent);
instruction.AgentId = request.AgentId;
instruction.ConversationId = request.ConversationId;

if (request.AnsweredBy == "machine_start" &&
request.Direction == "outbound-api")
{
response = new VoiceResponse();

await HookEmitter.Emit<ITwilioCallStatusHook>(_services, async hook =>
{
await hook.OnVoicemailStarting(request);
});

var url = twilio.GetSpeechPath(request.ConversationId, "voicemail.mp3");
response.Play(new Uri(url));
}
else
{
if (agent.Profiles.Contains("realtime"))
{
response = twilio.ReturnBidirectionalMediaStreamsInstructions(instruction, agent);
}
else
{
if (string.IsNullOrWhiteSpace(request.Intent))
{
instruction.CallbackPath = $"twilio/voice/receive/0?agent-id={request.AgentId}&conversation-id={request.ConversationId}&{twilio.GenerateStatesParameter(request.States)}";
response = twilio.ReturnNoninterruptedInstructions(instruction);
}
else
{
int seqNum = 0;
var messageQueue = _services.GetRequiredService<TwilioMessageQueue>();
var sessionManager = _services.GetRequiredService<ITwilioSessionManager>();
await sessionManager.StageCallerMessageAsync(request.ConversationId, seqNum, request.Intent);
var callerMessage = new CallerMessage()
{
AgentId = request.AgentId,
ConversationId = request.ConversationId,
SeqNumber = seqNum,
Content = request.Intent,
From = request.From,
States = ParseStates(request.States)
};
await messageQueue.EnqueueAsync(callerMessage);
response = new VoiceResponse();
// delay 3 seconds to wait for the first message reply and caller is listening dudu sound
await Task.Delay(1000 * 3);
response.Redirect(new Uri($"{_settings.CallbackHost}/twilio/voice/reply/{seqNum}?agent-id={request.AgentId}&conversation-id={request.ConversationId}&{twilio.GenerateStatesParameter(request.States)}"), HttpMethod.Post);
}
}
}

await HookEmitter.Emit<ITwilioSessionHook>(_services, async hook =>
{
await hook.OnSessionCreated(request);
});

return TwiML(response);
}

protected Dictionary<string, string> ParseStates(List<string> states)
{
var result = new Dictionary<string, string>();
if (states is null || !states.Any())
{
return result;
}
foreach (var kvp in states)
{
var parts = kvp.Split(':', StringSplitOptions.TrimEntries | StringSplitOptions.RemoveEmptyEntries);
if (parts.Length == 2)
{
result.Add(parts[0], parts[1]);
}
}
return result;
}

private async Task<string> InitConversation(ConversationalVoiceRequest request, Agent agent)
{
var convService = _services.GetRequiredService<IConversationService>();
var conversation = await convService.GetConversation(request.ConversationId);
if (conversation == null)
{
var conv = new Conversation
{
AgentId = request.AgentId,
Channel = ConversationChannel.Phone,
ChannelId = request.CallSid,
Title = $"Incoming phone call from {request.From}",
Tags = [],
};

conversation = await convService.NewConversation(conv);
}

var states = new List<MessageState>
{
new("channel", ConversationChannel.Phone),
new("calling_phone", request.From),
new("phone_direction", request.Direction),
new("twilio_call_sid", request.CallSid),
};

// Enable lazy routing mode to optimize realtime experience
if (agent.Profiles.Contains("realtime") && agent.Type == AgentType.Routing)
{
states.Add(new(StateConst.ROUTING_MODE, "lazy"));
}

if (request.InitAudioFile != null)
{
states.Add(new("init_audio_file", request.InitAudioFile));
}

convService.SetConversationId(conversation.Id, states);
convService.SaveStates();

return conversation.Id;
}
}
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
using BotSharp.Abstraction.Agents.Models;
using BotSharp.Core.Infrastructures;
using BotSharp.Plugin.Twilio.Interfaces;
using BotSharp.Plugin.Twilio.Models;
using Microsoft.AspNetCore.Cors.Infrastructure;
using Microsoft.AspNetCore.Http;
using Microsoft.AspNetCore.Mvc;
using Newtonsoft.Json;
using System.IO;

namespace BotSharp.Plugin.Twilio.Controllers;

Expand Down Expand Up @@ -45,31 +41,4 @@ public async Task<ActionResult> PhoneRecordingStatus(ConversationalVoiceRequest

return Ok();
}

[ValidateRequest]
[HttpPost("twilio/record/transcribe")]
public async Task<ActionResult> PhoneRecordingTranscribe(ConversationalVoiceRequest request)
{
if (request.Final == "true")
{
_logger.LogError($"Transcription completed for {request.CallSid}, the transcription is: {request.TranscriptionData}");

// transcription completed
await HookEmitter.Emit<ITwilioCallStatusHook>(_services, x => x.OnTranscribeCompleted(request));

// Append the transcription to the dialog history
var transcript = JsonConvert.DeserializeObject<TranscriptionData>(request.TranscriptionData);
if (transcript != null && !string.IsNullOrEmpty(transcript.Transcript))
{
var storage = _services.GetRequiredService<IConversationStorage>();
var message = new RoleDialogModel(AgentRole.User, transcript.Transcript)
{
CurrentAgentId = request.AgentId
};
storage.Append(request.ConversationId, message);
}
}

return Ok();
}
}
Loading
Loading