diff --git a/spring-ai-core/src/main/java/org/springframework/ai/reader/ExtractedTextFormatter.java b/spring-ai-core/src/main/java/org/springframework/ai/reader/ExtractedTextFormatter.java index 0ad3e3290b3..95df2f6b02f 100644 --- a/spring-ai-core/src/main/java/org/springframework/ai/reader/ExtractedTextFormatter.java +++ b/spring-ai-core/src/main/java/org/springframework/ai/reader/ExtractedTextFormatter.java @@ -32,8 +32,9 @@ * An instance of this formatter can be customized using the {@link Builder} nested class. * * @author Christian Tzolov + * @author Iryna Kopchak */ -public final class ExtractedTextFormatter { +public class ExtractedTextFormatter { /** Flag indicating if the text should be left-aligned */ private final boolean leftAlignment; @@ -84,7 +85,7 @@ public static ExtractedTextFormatter defaults() { * @return Returns the same text but with blank lines trimmed. */ public static String trimAdjacentBlankLines(String pageText) { - return pageText.replaceAll("(?m)(^ *\n)", "\n").replaceAll("(?m)^$([\r\n]+?)(^$[\r\n]+?^)+", "$1"); + return pageText.replaceAll("(?m)^(?:\\s*\\r?\\n)+", "\n"); } /**