Skip to content

WebDriver Playwright. #246

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jan 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion BotSharp.sln
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "BotSharp.Plugin.ChatHub", "
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "BotSharp.Plugin.TelegramBots", "src\Plugins\BotSharp.Plugin.TelegramBots\BotSharp.Plugin.TelegramBots.csproj", "{DCA18996-4D3A-4E98-BCD0-1FB77C59253E}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "BotSharp.Logger", "src\Infrastructure\BotSharp.Logger\BotSharp.Logger.csproj", "{5CA3335E-E6AD-46FD-B277-29BBC3A16500}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "BotSharp.Logger", "src\Infrastructure\BotSharp.Logger\BotSharp.Logger.csproj", "{5CA3335E-E6AD-46FD-B277-29BBC3A16500}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>netstandard2.1</TargetFramework>
<Nullable>enable</Nullable>
<RootNamespace>$(MSBuildProjectName.Replace(" ", "_"))s</RootNamespace>
</PropertyGroup>

<ItemGroup>
<ProjectReference Include="..\..\Infrastructure\BotSharp.Abstraction\BotSharp.Abstraction.csproj" />
</ItemGroup>

</Project>
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<Project Sdk="Microsoft.NET.Sdk">
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>netstandard2.1</TargetFramework>
Expand All @@ -10,7 +10,21 @@
</PropertyGroup>

<ItemGroup>
<ProjectReference Include="..\..\Infrastructure\BotSharp.Abstraction\BotSharp.Abstraction.csproj" />
<PackageReference Include="Microsoft.Playwright" Version="1.39.0" />
</ItemGroup>

<ItemGroup>
<Compile Remove="agents\**" />
<EmbeddedResource Remove="agents\**" />
<None Remove="agents\**" />
</ItemGroup>

<ItemGroup>
<None Remove="README.md" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\..\Infrastructure\BotSharp.Core\BotSharp.Core.csproj" />
</ItemGroup>

</Project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
namespace BotSharp.Plugin.WebDriver.Drivers;

public class PlaywrightInstance : IDisposable
{
IPlaywright _playwright;
IBrowser _browser;
IPage _page;

public IPlaywright Playwright => _playwright;
public IBrowser Browser => _browser;
public IPage Page => _page;

public void SetPlaywright(IPlaywright playwright) { _playwright = playwright; }
public void SetBrowser(IBrowser browser) { _browser = browser; }
public void SetPage(IPage page) { _page = page; }

public void Dispose()
{
_playwright.Dispose();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
using BotSharp.Plugin.WebDriver.Services;

namespace BotSharp.Plugin.WebDriver.Drivers;

public class PlaywrightWebDriver
{
private readonly IServiceProvider _services;
private readonly PlaywrightInstance _instance;

public PlaywrightWebDriver(IServiceProvider services, PlaywrightInstance instance)
{
_services = services;
_instance = instance;
}

public async Task<IBrowser> LaunchBrowser(string? url)
{
if (_instance.Playwright == null)
{
var playwright = await Playwright.CreateAsync();
_instance.SetPlaywright(playwright);
}

if (_instance.Browser == null)
{
var browser = await _instance.Playwright.Chromium.LaunchAsync(new BrowserTypeLaunchOptions
{
Headless = false,
Channel = "chrome",
});
_instance.SetBrowser(browser);
}

if (!string.IsNullOrEmpty(url))
{
var page = await _instance.Browser.NewPageAsync();
_instance.SetPage(page);
var response = await page.GotoAsync(url);
}

return _instance.Browser;
}

public async Task ClickElement(Agent agent, BrowsingContextIn context, string messageId)
{
// Retrieve the page raw html and infer the element path
var body = await _instance.Page.QuerySelectorAsync("body");

var str = new List<string>();
var anchors = await body.QuerySelectorAllAsync("a");
foreach (var a in anchors)
{
var text = await a.TextContentAsync();
if (!string.IsNullOrEmpty(text))
{
str.Add($"<a>{text}</a>");
}
}

var buttons = await body.QuerySelectorAllAsync("button");
foreach (var btn in buttons)
{
var text = await btn.TextContentAsync();
if (!string.IsNullOrEmpty(text))
{
str.Add($"<button>{text}</button>");
}
}

var driverService = _services.GetRequiredService<WebDriverService>();
var htmlElementContextOut = await driverService.FindElement(agent, string.Join("", str), context.ElementName, messageId);

var element = _instance.Page.Locator(htmlElementContextOut.TagName).Nth(htmlElementContextOut.Index + 1);
await element.ClickAsync();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
using BotSharp.Abstraction.Agents;

namespace BotSharp.Plugin.WebDriver.Functions;

public class ClickHtmlElementFn : IFunctionCallback
{
public string Name => "click_html_element";

private readonly IServiceProvider _services;
private readonly PlaywrightWebDriver _driver;

public ClickHtmlElementFn(IServiceProvider services,
PlaywrightWebDriver driver)
{
_services = services;
_driver = driver;
}

public async Task<bool> Execute(RoleDialogModel message)
{
var args = JsonSerializer.Deserialize<BrowsingContextIn>(message.FunctionArgs);

var agentService = _services.GetRequiredService<IAgentService>();
var agent = await agentService.LoadAgent(message.CurrentAgentId);
await _driver.ClickElement(agent, args, message.MessageId);

message.Content = "Executed successfully.";

return true;
}
}
28 changes: 28 additions & 0 deletions src/Plugins/BotSharp.Plugin.WebDriver/Functions/OpenBrowserFn.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
using BotSharp.Plugin.WebDriver.LlmContexts;

namespace BotSharp.Plugin.WebDriver.Functions;

public class OpenBrowserFn : IFunctionCallback
{
public string Name => "open_browser";

private readonly IServiceProvider _services;
private readonly PlaywrightWebDriver _driver;

public OpenBrowserFn(IServiceProvider services,
PlaywrightWebDriver driver)
{
_services = services;
_driver = driver;
}

public async Task<bool> Execute(RoleDialogModel message)
{
var args = JsonSerializer.Deserialize<BrowsingContextIn>(message.FunctionArgs);

var browser = await _driver.LaunchBrowser(args.Url);
message.Content = "Executed successfully.";

return true;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
using System.Text.Json.Serialization;

namespace BotSharp.Plugin.WebDriver.LlmContexts;

public class BrowsingContextIn
{
[JsonPropertyName("url")]
public string? Url { get; set; }

[JsonPropertyName("element_name")]
public string? ElementName { get; set; }

[JsonPropertyName("input_text")]
public string? InputText { get; set; }
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
using System.Text.Json.Serialization;

namespace BotSharp.Plugin.WebDriver.LlmContexts;

public class HtmlElementContextOut
{
[JsonPropertyName("tag_name")]
public string TagName { get; set; }

[JsonPropertyName("index")]
public int Index { get; set; }
}
10 changes: 10 additions & 0 deletions src/Plugins/BotSharp.Plugin.WebDriver/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# Web Driver

The WebDriver project will provide a new agent of `Web Browser` plugged into BotSharp's Routing mechanism.
When Router analyzes that the user's intention is to operate the web page, it will be routed to this Agent.
This Agent will contain some predefined action functions.
After analyzing the user's conversation record, it will generate the corresponding execution function and then call the browsers' API to perform corresponding operations.

## Installation

1. Copy `agents\f3ae2a0f-e6ba-4ee1-a0b9-75d7431ff32b` folder to `WebStarter\data\agents`
56 changes: 56 additions & 0 deletions src/Plugins/BotSharp.Plugin.WebDriver/Services/WebDriverService.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
using BotSharp.Abstraction.Agents.Enums;
using BotSharp.Abstraction.MLTasks;
using BotSharp.Core.Infrastructures;

namespace BotSharp.Plugin.WebDriver.Services;

public class WebDriverService
{
private readonly IServiceProvider _services;

public WebDriverService(IServiceProvider services)
{
_services = services;
}

public async Task<HtmlElementContextOut> FindElement(Agent agent, string html, string elementName, string messageId)
{
var parserInstruction = agent.Templates.First(x => x.Name == "html_parser").Content;

var render = _services.GetRequiredService<ITemplateRender>();
var prompt = render.Render(parserInstruction, new Dictionary<string, object>
{
{ "html_content", html },
{ "element_name", elementName }
});

var completer = CompletionProvider.GetCompletion(_services,
agentConfig: agent.LlmConfig);

if (completer is ITextCompletion textCompleter)
{
var result = await textCompleter.GetCompletion(prompt, agent.Id, messageId);
return result.JsonContent<HtmlElementContextOut>();
}
else if (completer is IChatCompletion chatCompleter)
{
var dialogs = new List<RoleDialogModel>
{
new RoleDialogModel(AgentRole.User, prompt)
{
CurrentAgentId = agent.Id,
MessageId = messageId
}
};
var result = chatCompleter.GetChatCompletions(new Agent
{
Id = agent.Id,
Name = agent.Name,
Instruction = "You're a HTML Parser."
}, dialogs);
return result.Content.JsonContent<HtmlElementContextOut>();
}

return null;
}
}
17 changes: 17 additions & 0 deletions src/Plugins/BotSharp.Plugin.WebDriver/Using.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
global using System;
global using System.Collections.Generic;
global using System.Text;
global using BotSharp.Abstraction.Conversations;
global using BotSharp.Abstraction.Plugins;
global using System.Text.Json;
global using BotSharp.Abstraction.Conversations.Models;
global using Microsoft.Playwright;
global using BotSharp.Plugin.WebDriver.Drivers;
global using System.Threading.Tasks;
global using BotSharp.Abstraction.Functions;
global using BotSharp.Abstraction.Agents.Models;
global using BotSharp.Abstraction.Templating;
global using BotSharp.Plugin.WebDriver.LlmContexts;
global using Microsoft.Extensions.DependencyInjection;
global using System.Linq;
global using BotSharp.Abstraction.Utilities;
11 changes: 7 additions & 4 deletions src/Plugins/BotSharp.Plugin.WebDriver/WebDriverPlugin.cs
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
using BotSharp.Abstraction.Plugins;
using BotSharp.Plugin.WebDriver.Services;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;

namespace BotSharp.Plugin.WebDriver;
namespace BotSharp.Plugin.Playwrights;

public class WebDriverPlugin : IBotSharpPlugin
{
public string Name => "Web Driver";
public string Description => "Manipulate web browser in automation tools.";

public void RegisterDI(IServiceCollection services, IConfiguration config)
{

services.AddScoped<PlaywrightWebDriver>();
services.AddSingleton<PlaywrightInstance>();
services.AddScoped<WebDriverService>();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"name": "Web Browser",
"description": "Perform a specific action on a web browser",
"createdDateTime": "2024-01-02T00:00:00Z",
"updatedDateTime": "2024-01-02T00:00:00Z",
"id": "f3ae2a0f-e6ba-4ee1-a0b9-75d7431ff32b",
"allowRouting": true,
"isPublic": true
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
[
{
"name": "open_browser",
"description": "open a browser",
"parameters": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "website url."
}
},
"required": []
}
},
{
"name": "click_html_element",
"description": "Click the html element in a web page.",
"parameters": {
"type": "object",
"properties": {
"element_name": {
"type": "string",
"description": "the html element name."
}
},
"required": ["element_name"]
}
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
You are a web browser that can manipulate web elements through automated tools like Playwright and Selenium.

Loading