Skip to content

src: remove regex usage for env file parsing #52406

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
161 changes: 122 additions & 39 deletions src/node_dotenv.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#include "node_dotenv.h"
#include <regex> // NOLINT(build/c++11)
#include <unordered_set>
#include "env-inl.h"
#include "node_file.h"
Expand All @@ -12,15 +11,6 @@ using v8::NewStringType;
using v8::Object;
using v8::String;

/**
* The inspiration for this implementation comes from the original dotenv code,
* available at https://github.com/motdotla/dotenv
*/
const std::regex LINE(
"\\s*(?:export\\s+)?([\\w.-]+)(?:\\s*=\\s*?|:\\s+?)(\\s*'(?:\\\\'|[^']"
")*'|\\s*\"(?:\\\\\"|[^\"])*\"|\\s*`(?:\\\\`|[^`])*`|[^#\r\n]+)?\\s*(?"
":#.*)?"); // NOLINT(whitespace/line_length)

std::vector<std::string> Dotenv::GetPathFromArgs(
const std::vector<std::string>& args) {
const auto find_match = [](const std::string& arg) {
Expand Down Expand Up @@ -101,35 +91,137 @@ Local<Object> Dotenv::ToObject(Environment* env) {
return result;
}

void Dotenv::ParseContent(const std::string_view content) {
std::string lines = std::string(content);
lines = std::regex_replace(lines, std::regex("\r\n?"), "\n");
std::string_view trim_spaces(std::string_view input) {
if (input.empty()) return "";
if (input.front() == ' ') {
input.remove_prefix(input.find_first_not_of(' '));
}
if (!input.empty() && input.back() == ' ') {
input = input.substr(0, input.find_last_not_of(' ') + 1);
}
return input;
}

void Dotenv::ParseContent(const std::string_view input) {
std::string lines(input);

// Handle windows newlines "\r\n": remove "\r" and keep only "\n"
lines.erase(std::remove(lines.begin(), lines.end(), '\r'), lines.end());

std::string_view content = lines;
content = trim_spaces(content);

std::string_view key;
std::string_view value;

while (!content.empty()) {
// Skip empty lines and comments
if (content.front() == '\n' || content.front() == '#') {
auto newline = content.find('\n');
if (newline != std::string_view::npos) {
content.remove_prefix(newline + 1);
continue;
}
}

// If there is no equal character, then ignore everything
auto equal = content.find('=');
if (equal == std::string_view::npos) {
break;
}

std::smatch match;
while (std::regex_search(lines, match, LINE)) {
const std::string key = match[1].str();
key = content.substr(0, equal);
content.remove_prefix(equal + 1);
key = trim_spaces(key);

// Default undefined or null to an empty string
std::string value = match[2].str();
if (key.empty()) {
break;
}

// Remove leading whitespaces
value.erase(0, value.find_first_not_of(" \t"));
// Remove export prefix from key
auto have_export = key.compare(0, 7, "export ") == 0;
if (have_export) {
key.remove_prefix(7);
}

// Remove trailing whitespaces
if (!value.empty()) {
value.erase(value.find_last_not_of(" \t") + 1);
// SAFETY: Content is guaranteed to have at least one character
if (content.empty()) {
// In case the last line is a single key without value
// Example: KEY= (without a newline at the EOF)
store_.insert_or_assign(std::string(key), "");
break;
}

if (!value.empty() && value.front() == '"') {
value = std::regex_replace(value, std::regex("\\\\n"), "\n");
value = std::regex_replace(value, std::regex("\\\\r"), "\r");
// Expand new line if \n it's inside double quotes
// Example: EXPAND_NEWLINES = 'expand\nnew\nlines'
if (content.front() == '"') {
auto closing_quote = content.find(content.front(), 1);
if (closing_quote != std::string_view::npos) {
value = content.substr(1, closing_quote - 1);
std::string multi_line_value = std::string(value);

size_t pos = 0;
while ((pos = multi_line_value.find("\\n", pos)) !=
std::string_view::npos) {
multi_line_value.replace(pos, 2, "\n");
pos += 1;
}

store_.insert_or_assign(std::string(key), multi_line_value);
content.remove_prefix(content.find('\n', closing_quote + 1));
continue;
}
}

// Remove surrounding quotes
value = trim_quotes(value);
// Check if the value is wrapped in quotes, single quotes or backticks
if ((content.front() == '\'' || content.front() == '"' ||
content.front() == '`')) {
auto closing_quote = content.find(content.front(), 1);

// Check if the closing quote is not found
// Example: KEY="value
if (closing_quote == std::string_view::npos) {
// Check if newline exist. If it does, take the entire line as the value
// Example: KEY="value\nKEY2=value2
// The value pair should be `"value`
auto newline = content.find('\n');
if (newline != std::string_view::npos) {
value = content.substr(0, newline);
store_.insert_or_assign(std::string(key), value);
content.remove_prefix(newline);
}
} else {
// Example: KEY="value"
value = content.substr(1, closing_quote - 1);
store_.insert_or_assign(std::string(key), value);
// Select the first newline after the closing quotation mark
// since there could be newline characters inside the value.
content.remove_prefix(content.find('\n', closing_quote + 1));
}
} else {
// Regular key value pair.
// Example: `KEY=this is value`
auto newline = content.find('\n');

if (newline != std::string_view::npos) {
value = content.substr(0, newline);
auto hash_character = value.find('#');
// Check if there is a comment in the line
// Example: KEY=value # comment
// The value pair should be `value`
if (hash_character != std::string_view::npos) {
value = content.substr(0, hash_character);
}
content.remove_prefix(newline);
} else {
// In case the last line is a single key/value pair
// Example: KEY=VALUE (without a newline at the EOF)
value = content.substr(0);
}

store_.insert_or_assign(std::string(key), value);
lines = match.suffix();
value = trim_spaces(value);
store_.insert_or_assign(std::string(key), value);
}
}
}

Expand Down Expand Up @@ -179,13 +271,4 @@ void Dotenv::AssignNodeOptionsIfAvailable(std::string* node_options) {
}
}

std::string_view Dotenv::trim_quotes(std::string_view str) {
static const std::unordered_set<char> quotes = {'"', '\'', '`'};
if (str.size() >= 2 && quotes.count(str.front()) &&
quotes.count(str.back())) {
str = str.substr(1, str.size() - 2);
}
return str;
}

} // namespace node
1 change: 0 additions & 1 deletion src/node_dotenv.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ class Dotenv {

private:
std::map<std::string, std::string> store_;
std::string_view trim_quotes(std::string_view str);
};

} // namespace node
Expand Down
2 changes: 2 additions & 0 deletions test/fixtures/dotenv/eof-without-value.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
BASIC=value
EMPTY=
7 changes: 7 additions & 0 deletions test/fixtures/dotenv/multiline.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
JWT_PUBLIC_KEY="-----BEGIN PUBLIC KEY-----
MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAnNl1tL3QjKp3DZWM0T3u
LgGJQwu9WqyzHKZ6WIA5T+7zPjO1L8l3S8k8YzBrfH4mqWOD1GBI8Yjq2L1ac3Y/
bTdfHN8CmQr2iDJC0C6zY8YV93oZB3x0zC/LPbRYpF8f6OqX1lZj5vo2zJZy4fI/
kKcI5jHYc8VJq+KCuRZrvn+3V+KuL9tF9v8ZgjF2PZbU+LsCy5Yqg1M8f5Jp5f6V
u4QuUoobAgMBAAE=
-----END PUBLIC KEY-----"
7 changes: 6 additions & 1 deletion test/fixtures/dotenv/valid.env
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
BASIC=basic

# COMMENTS=work
#BASIC=basic2
#BASIC=basic3

# previous line intentionally left blank
AFTER_LINE=after_line
EMPTY=
Expand Down Expand Up @@ -55,7 +59,8 @@ IS
A
"MULTILINE'S"
STRING`
export EXPORT_EXAMPLE = ignore export

MULTI_NOT_VALID_QUOTE="
MULTI_NOT_VALID=THIS
IS NOT MULTILINE
export EXAMPLE = ignore export
34 changes: 34 additions & 0 deletions test/parallel/test-dotenv-edge-cases.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ const common = require('../common');
const assert = require('node:assert');
const path = require('node:path');
const { describe, it } = require('node:test');
const fixtures = require('../common/fixtures');

const validEnvFilePath = '../fixtures/dotenv/valid.env';
const nodeOptionsEnvFilePath = '../fixtures/dotenv/node-options.env';
Expand Down Expand Up @@ -64,4 +65,37 @@ describe('.env supports edge cases', () => {
assert.strictEqual(child.stderr, '');
assert.strictEqual(child.code, 0);
});

it('should handle multiline quoted values', async () => {
// Ref: https://github.com/nodejs/node/issues/52248
const code = `
process.loadEnvFile('./multiline.env');
require('node:assert').ok(process.env.JWT_PUBLIC_KEY);
`.trim();
const child = await common.spawnPromisified(
process.execPath,
[ '--eval', code ],
{ cwd: fixtures.path('dotenv') },
);
assert.strictEqual(child.stdout, '');
assert.strictEqual(child.stderr, '');
assert.strictEqual(child.code, 0);
});

it('should handle empty value without a newline at the EOF', async () => {
// Ref: https://github.com/nodejs/node/issues/52466
const code = `
process.loadEnvFile('./eof-without-value.env');
require('assert').strictEqual(process.env.BASIC, 'value');
require('assert').strictEqual(process.env.EMPTY, '');
`.trim();
const child = await common.spawnPromisified(
process.execPath,
[ '--eval', code ],
{ cwd: fixtures.path('dotenv') },
);
assert.strictEqual(child.stdout, '');
assert.strictEqual(child.stderr, '');
assert.strictEqual(child.code, 0);
});
});
6 changes: 1 addition & 5 deletions test/parallel/test-dotenv.js
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,6 @@ assert.strictEqual(process.env.COMMENTS, undefined);
assert.strictEqual(process.env.EQUAL_SIGNS, 'equals==');
// Retains inner quotes
assert.strictEqual(process.env.RETAIN_INNER_QUOTES, '{"foo": "bar"}');
// Respects equals signs in values
assert.strictEqual(process.env.EQUAL_SIGNS, 'equals==');
// Retains inner quotes
assert.strictEqual(process.env.RETAIN_INNER_QUOTES, '{"foo": "bar"}');
assert.strictEqual(process.env.RETAIN_INNER_QUOTES_AS_STRING, '{"foo": "bar"}');
assert.strictEqual(process.env.RETAIN_INNER_QUOTES_AS_BACKTICKS, '{"foo": "bar\'s"}');
// Retains spaces in string
Expand All @@ -83,4 +79,4 @@ assert.strictEqual(process.env.EXPAND_NEWLINES, 'expand\nnew\nlines');
assert.strictEqual(process.env.DONT_EXPAND_UNQUOTED, 'dontexpand\\nnewlines');
assert.strictEqual(process.env.DONT_EXPAND_SQUOTED, 'dontexpand\\nnewlines');
// Ignore export before key
assert.strictEqual(process.env.EXAMPLE, 'ignore export');
assert.strictEqual(process.env.EXPORT_EXAMPLE, 'ignore export');
2 changes: 1 addition & 1 deletion test/parallel/util-parse-env.js
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ const fs = require('node:fs');
EMPTY_DOUBLE_QUOTES: '',
EMPTY_SINGLE_QUOTES: '',
EQUAL_SIGNS: 'equals==',
EXAMPLE: 'ignore export',
EXPORT_EXAMPLE: 'ignore export',
EXPAND_NEWLINES: 'expand\nnew\nlines',
INLINE_COMMENTS: 'inline comments',
INLINE_COMMENTS_BACKTICKS: 'inline comments outside of #backticks',
Expand Down