Skip to content

Commit 6e89dc6

Browse files
authored
Merge pull request #319 from waelhosn/master
Resolved issue related to convertPdfToText
2 parents 32d5fc0 + fce5186 commit 6e89dc6

File tree

2 files changed

+23
-25
lines changed

2 files changed

+23
-25
lines changed

src/Infrastructure/BotSharp.OpenAPI/Controllers/KnowledgeBaseController.cs

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
using BotSharp.Abstraction.Knowledges.Models;
22
using BotSharp.Abstraction.Knowledges.Settings;
3+
using Microsoft.AspNetCore.Http;
34

45
namespace BotSharp.OpenAPI.Controllers;
56

@@ -16,13 +17,6 @@ public KnowledgeBaseController(IKnowledgeService knowledgeService, IServiceProvi
1617
_services = services;
1718
}
1819

19-
[HttpPost("/knowledge-base/embed")]
20-
public async Task EmbedKnowledge()
21-
{
22-
var chunks = await _knowledgeService.CollectChunkedKnowledge();
23-
await _knowledgeService.EmbedKnowledge(chunks);
24-
}
25-
2620
[HttpGet("/knowledge/{agentId}")]
2721
public async Task<List<RetrievedResult>> RetrieveKnowledge([FromRoute] string agentId, [FromQuery(Name = "q")] string question)
2822
{
@@ -70,8 +64,13 @@ public async Task<IActionResult> FeedKnowledge([FromRoute] string agentId, List<
7064
foreach (var formFile in files)
7165
{
7266
var filePath = Path.GetTempFileName();
73-
using var stream = System.IO.File.Create(filePath);
74-
await formFile.CopyToAsync(stream);
67+
68+
69+
using (var stream = new FileStream(filePath, FileMode.Create, FileAccess.Write, FileShare.None))
70+
{
71+
await formFile.CopyToAsync(stream);
72+
await stream.FlushAsync(); // Ensure all data is written to the file
73+
}
7574

7675
var content = await textConverter.ConvertPdfToText(filePath, startPageNum, endPageNum);
7776

@@ -84,6 +83,9 @@ await _knowledgeService.Feed(new KnowledgeFeedModel
8483
AgentId = agentId,
8584
Content = content
8685
});
86+
87+
// Delete the temp file after processing to clean up
88+
System.IO.File.Delete(filePath);
8789
}
8890

8991
return Ok(new { count = files.Count, size });

src/Plugins/BotSharp.Plugin.KnowledgeBase/Services/PigPdf2TextConverter.cs

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,28 +5,24 @@ namespace BotSharp.Plugin.KnowledgeBase.Services;
55

66
public class PigPdf2TextConverter : IPdf2TextConverter
77
{
8-
public async Task<string> ConvertPdfToText(string filePath, int? startPageNum, int? endPageNum)
8+
public Task<string> ConvertPdfToText(string filePath, int? startPageNum, int? endPageNum)
99
{
10-
return await OpenPdfDocumentAsync(filePath, startPageNum, endPageNum);
10+
// since PdfDocument.Open is not async, we dont need to make this method async
11+
// if you need this method to be async, consider wrapping the call in Task.Run for CPU-bound work
12+
return Task.FromResult(OpenPdfDocument(filePath, startPageNum, endPageNum));
1113
}
1214

13-
private async Task<string> OpenPdfDocumentAsync(string filePath, int? startPageNum, int? endPageNum)
15+
private string OpenPdfDocument(string filePath, int? startPageNum, int? endPageNum)
1416
{
15-
var document = PdfDocument.Open(filePath);
16-
var content = "";
17+
using var fileStream = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.Read);
18+
using var document = PdfDocument.Open(fileStream);
19+
var content = new StringBuilder();
1720
foreach (Page page in document.GetPages())
1821
{
19-
if (startPageNum.HasValue && page.Number < startPageNum.Value)
20-
{
21-
continue;
22-
}
23-
24-
if (endPageNum.HasValue && page.Number > endPageNum.Value)
25-
{
26-
continue;
27-
}
28-
content += page.Text;
22+
if (startPageNum.HasValue && page.Number < startPageNum.Value) continue;
23+
if (endPageNum.HasValue && page.Number > endPageNum.Value) continue;
24+
content.Append(page.Text);
2925
}
30-
return content;
26+
return content.ToString();
3127
}
3228
}

0 commit comments

Comments
 (0)