Skip to content

Commit 3a64fd4

Browse files
authored
[WasmFS] Add virtual backend utilities (#18248)
Add generic subclasses of DataFile, Directory, and Symlink that wrap other files and pass operations through to them. Use the new utilities in a rewrite of the ignore-case virtual backend that cleanly separates the underlying "real" file system from the virtual file system. This also fixes outstanding issues with the ignore case backend in which the original, non-normalized file names were not preserved.
1 parent b80f2bf commit 3a64fd4

File tree

6 files changed

+341
-103
lines changed

6 files changed

+341
-103
lines changed

system/lib/wasmfs/backends/ignore_case_backend.cpp

Lines changed: 171 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,19 @@
33
// University of Illinois/NCSA Open Source License. Both these licenses can be
44
// found in the LICENSE file.
55

6-
// This file defines the Ignore Case Backend of the new file system.
7-
// It is a virtual backend that normalizes all file paths to lower case.
6+
// A virtual backend that adapts any underlying backend to be
7+
// case-insensitive. IgnoreCaseDirectory intercepts all directory operations,
8+
// normalizes paths to be lower case, then forwards the operations with the new
9+
// paths to the underlying backend. It stores the original, non-normalized names
10+
// internally so they can be returned later, giving the appearance of a
11+
// case-insensitive but case-preserving file system.
12+
//
13+
// See the comment in virtual.h for an explanation of why DataFiles and Symlinks
14+
// must have no-op wrappers.
815

916
#include "backend.h"
1017
#include "file.h"
11-
#include "memory_backend.h"
18+
#include "virtual.h"
1219
#include "wasmfs.h"
1320

1421
namespace {
@@ -20,105 +27,178 @@ std::string normalize(const std::string& name) {
2027
}
2128
return result;
2229
}
23-
} // namespace
30+
31+
} // anonymous namespace
2432

2533
namespace wasmfs {
2634

27-
// Problem: Child entries are stored both in IgnoreCaseDirectory and in baseDirectory.
28-
//
29-
// Original name case preservation could be possible if MemoryDirectory::entries
30-
// were accesible. Then, we would store the original case there but search
31-
// ignoring case.
32-
//
33-
class IgnoreCaseDirectory : public MemoryDirectory {
34-
using BaseClass = MemoryDirectory;
35-
std::shared_ptr<Directory> baseDirectory;
35+
class IgnoreCaseDirectory : public VirtualDirectory {
36+
37+
struct ChildInfo {
38+
std::string originalName;
39+
std::shared_ptr<File> child;
40+
};
41+
42+
// Map normalized names to virtual files and their non-normalized names.
43+
std::map<std::string, ChildInfo> children;
3644

3745
public:
38-
IgnoreCaseDirectory(std::shared_ptr<Directory> base, backend_t proxyBackend)
39-
: BaseClass(base->locked().getMode(), proxyBackend), baseDirectory(base) {}
46+
IgnoreCaseDirectory(std::shared_ptr<Directory> real, backend_t backend)
47+
: VirtualDirectory(real, backend) {}
48+
49+
std::shared_ptr<File> getChild(const std::string& name) override;
50+
std::shared_ptr<DataFile> insertDataFile(const std::string& name,
51+
mode_t mode) override;
52+
std::shared_ptr<Directory> insertDirectory(const std::string& name,
53+
mode_t mode) override;
54+
std::shared_ptr<Symlink> insertSymlink(const std::string& name,
55+
const std::string& target) override;
56+
int insertMove(const std::string& name, std::shared_ptr<File> file) override;
57+
int removeChild(const std::string& name) override;
58+
ssize_t getNumEntries() override { return real->locked().getNumEntries(); }
59+
Directory::MaybeEntries getEntries() override;
60+
std::string getName(std::shared_ptr<File> file) override;
61+
bool maintainsFileIdentity() override { return true; }
62+
};
4063

41-
std::shared_ptr<File> getChild(const std::string& name) override {
42-
return BaseClass::getChild(normalize(name));
64+
// Wrap a real file in an IgnoreCase virtual file of the same kind.
65+
std::shared_ptr<DataFile> virtualize(std::shared_ptr<DataFile> data,
66+
backend_t backend) {
67+
return std::make_shared<VirtualDataFile>(data, backend);
68+
}
69+
70+
std::shared_ptr<Directory> virtualize(std::shared_ptr<Directory> dir,
71+
backend_t backend) {
72+
return std::make_shared<IgnoreCaseDirectory>(dir, backend);
73+
}
74+
75+
std::shared_ptr<Symlink> virtualize(std::shared_ptr<Symlink> link,
76+
backend_t backend) {
77+
return std::make_shared<VirtualSymlink>(link, backend);
78+
}
79+
80+
std::shared_ptr<File> virtualize(std::shared_ptr<File> file,
81+
backend_t backend) {
82+
if (auto data = file->dynCast<DataFile>()) {
83+
return virtualize(data, backend);
84+
} else if (auto dir = file->dynCast<Directory>()) {
85+
return virtualize(dir, backend);
86+
} else if (auto link = file->dynCast<Symlink>()) {
87+
return virtualize(link, backend);
4388
}
89+
WASMFS_UNREACHABLE("unexpected file kind");
90+
}
4491

45-
std::shared_ptr<DataFile> insertDataFile(const std::string& name,
46-
mode_t mode) override {
47-
auto name2 = normalize(name);
48-
auto baseDirLocked = baseDirectory->locked();
49-
auto child = baseDirLocked.insertDataFile(name2, mode);
50-
if (child) {
51-
insertChild(name2, child);
52-
// Directory::Hanlde needs a parent
53-
child->locked().setParent(cast<Directory>());
54-
}
55-
return child;
92+
std::shared_ptr<File> IgnoreCaseDirectory::getChild(const std::string& name) {
93+
auto normalized = normalize(name);
94+
if (auto it = children.find(normalized); it != children.end()) {
95+
return it->second.child;
5696
}
97+
auto child = real->locked().getChild(normalized);
98+
if (!child) {
99+
return nullptr;
100+
}
101+
child = virtualize(child, getBackend());
102+
children[normalized] = {name, child};
103+
return child;
104+
}
57105

58-
std::shared_ptr<Directory> insertDirectory(const std::string& name,
59-
mode_t mode) override {
60-
auto name2 = normalize(name);
61-
auto baseDirLocked = baseDirectory->locked();
62-
if (!baseDirLocked.getParent())
63-
baseDirLocked.setParent(parent.lock()); // Directory::Hanlde needs a parent
64-
auto baseChild = baseDirLocked.insertDirectory(name2, mode);
65-
auto child = std::make_shared<IgnoreCaseDirectory>(baseChild, getBackend());
66-
insertChild(name2, child);
67-
return child;
106+
std::shared_ptr<DataFile>
107+
IgnoreCaseDirectory::insertDataFile(const std::string& name, mode_t mode) {
108+
auto normalized = normalize(name);
109+
auto file = real->locked().insertDataFile(normalized, mode);
110+
if (!file) {
111+
return nullptr;
68112
}
113+
file = virtualize(file, getBackend());
114+
children[normalized] = {name, file};
115+
return file;
116+
}
69117

70-
std::shared_ptr<Symlink> insertSymlink(const std::string& name,
71-
const std::string& target) override {
72-
auto name2 = normalize(name);
73-
auto child = baseDirectory->locked().insertSymlink(name2, target);
74-
if (child) {
75-
insertChild(name2, child);
76-
// Directory::Hanlde needs a parent
77-
child->locked().setParent(cast<Directory>());
78-
}
79-
return child;
118+
std::shared_ptr<Directory>
119+
IgnoreCaseDirectory::insertDirectory(const std::string& name, mode_t mode) {
120+
auto normalized = normalize(name);
121+
auto dir = real->locked().insertDirectory(normalized, mode);
122+
if (!dir) {
123+
return nullptr;
80124
}
125+
dir = virtualize(dir, getBackend());
126+
children[normalized] = {name, dir};
127+
return dir;
128+
}
81129

82-
int insertMove(const std::string& name, std::shared_ptr<File> file) override {
83-
auto newName = normalize(name);
84-
// Remove entry with the new name (if any) from this directory.
85-
if (auto err = removeChild(newName))
86-
return err;
87-
auto oldParent = file->locked().getParent()->locked();
88-
auto oldName = normalize(oldParent.getName(file));
89-
// Move in underlying directory.
90-
if (auto err = baseDirectory->locked().insertMove(newName, file))
91-
return err;
92-
// Ensure old file was removed.
93-
if (auto err = oldParent.removeChild(oldName))
94-
return err;
95-
// Cache file with the new name in this directory.
96-
insertChild(newName, file);
97-
file->locked().setParent(cast<Directory>());
98-
return 0;
130+
std::shared_ptr<Symlink>
131+
IgnoreCaseDirectory::insertSymlink(const std::string& name,
132+
const std::string& target) {
133+
auto normalized = normalize(name);
134+
auto link = real->locked().insertSymlink(normalized, target);
135+
if (!link) {
136+
return nullptr;
99137
}
138+
link = virtualize(link, getBackend());
139+
children[normalized] = {name, link};
140+
return link;
141+
}
100142

101-
int removeChild(const std::string& name) override {
102-
auto name2 = normalize(name);
103-
if (auto err = BaseClass::removeChild(name2))
104-
return err;
105-
return baseDirectory->locked().removeChild(name2);
143+
int IgnoreCaseDirectory::insertMove(const std::string& name,
144+
std::shared_ptr<File> file) {
145+
auto normalized = normalize(name);
146+
if (auto err = real->locked().insertMove(normalized, devirtualize(file))) {
147+
return err;
148+
}
149+
auto oldParent =
150+
std::static_pointer_cast<IgnoreCaseDirectory>(file->locked().getParent());
151+
auto& oldChildren = oldParent->children;
152+
// Delete the entry in the old parent.
153+
for (auto it = oldChildren.begin(); it != oldChildren.end(); ++it) {
154+
if (it->second.child == file) {
155+
oldChildren.erase(it);
156+
break;
157+
}
158+
}
159+
// Unlink the overwritten entry if it exists.
160+
auto [it, inserted] = children.insert({normalized, {name, file}});
161+
if (!inserted) {
162+
it->second.child->locked().setParent(nullptr);
163+
it->second = {name, file};
106164
}
107165

108-
ssize_t getNumEntries() override { return baseDirectory->locked().getNumEntries(); }
166+
return 0;
167+
}
109168

110-
// TODO: preserve original name, denormalize, and use it here
111-
Directory::MaybeEntries getEntries() override {
112-
return baseDirectory->locked().getEntries();
169+
int IgnoreCaseDirectory::removeChild(const std::string& name) {
170+
auto normalized = normalize(name);
171+
if (auto err = real->locked().removeChild(normalized)) {
172+
return err;
113173
}
174+
auto it = children.find(normalized);
175+
assert(it != children.end());
176+
it->second.child->locked().setParent(nullptr);
177+
children.erase(it);
178+
return 0;
179+
}
114180

115-
// TODO: preserve original name, denormalize, and use it here
116-
std::string getName(std::shared_ptr<File> file) override {
117-
return BaseClass::getName(file);
181+
Directory::MaybeEntries IgnoreCaseDirectory::getEntries() {
182+
auto entries = real->locked().getEntries();
183+
if (auto err = entries.getError()) {
184+
return entries;
118185
}
186+
for (auto& entry : *entries) {
187+
if (auto it = children.find(entry.name); it != children.end()) {
188+
entry.name = it->second.originalName;
189+
}
190+
}
191+
return entries;
192+
}
119193

120-
bool maintainsFileIdentity() override { return true; }
121-
};
194+
std::string IgnoreCaseDirectory::getName(std::shared_ptr<File> file) {
195+
for (auto& [_, info] : children) {
196+
if (info.child == file) {
197+
return info.originalName;
198+
}
199+
}
200+
return "";
201+
}
122202

123203
class IgnoreCaseBackend : public Backend {
124204
backend_t backend;
@@ -129,16 +209,21 @@ class IgnoreCaseBackend : public Backend {
129209
}
130210

131211
std::shared_ptr<DataFile> createFile(mode_t mode) override {
132-
return backend->createFile(mode);
212+
return virtualize(backend->createFile(mode), this);
133213
}
134214

135215
std::shared_ptr<Directory> createDirectory(mode_t mode) override {
136-
return std::make_shared<IgnoreCaseDirectory>(backend->createDirectory(mode),
137-
this);
216+
auto real = backend->createDirectory(mode);
217+
// Inserts into the real backing directory won't work if it doesn't appear
218+
// to be linked, so give it a parent.
219+
// TODO: Break this reference cycle in a destructor somewhere.
220+
real->locked().setParent(real);
221+
auto ret = virtualize(real, this);
222+
return ret;
138223
}
139224

140225
std::shared_ptr<Symlink> createSymlink(std::string target) override {
141-
return backend->createSymlink(normalize(target));
226+
return virtualize(backend->createSymlink(target), this);
142227
}
143228
};
144229

@@ -148,12 +233,14 @@ backend_t createIgnoreCaseBackend(std::function<backend_t()> createBackend) {
148233
}
149234

150235
extern "C" {
236+
151237
// C API for creating ignore case backend.
152238
backend_t wasmfs_create_icase_backend(backend_constructor_t create_backend,
153239
void* arg) {
154240
return createIgnoreCaseBackend(
155241
[create_backend, arg]() { return create_backend(arg); });
156242
}
157-
}
243+
244+
} // extern "C"
158245

159246
} // namespace wasmfs

system/lib/wasmfs/file.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,12 +178,13 @@ int Directory::Handle::insertMove(const std::string& name,
178178
it->second.file->locked().setParent(nullptr);
179179
it->second = entry;
180180
}
181-
file->locked().setParent(getDir());
182181
} else {
183182
// This backend doesn't use the dcache.
184183
assert(getDir()->maintainsFileIdentity());
185184
}
186185

186+
file->locked().setParent(getDir());
187+
187188
// TODO: Moving mount points probably shouldn't update the mtime.
188189
auto now = time(NULL);
189190
oldParent->locked().setMTime(now);

system/lib/wasmfs/file.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,10 @@ class Directory : public File {
181181
return 0;
182182
}
183183

184+
std::vector<Entry>& operator*() {
185+
return *std::get_if<std::vector<Entry>>(this);
186+
}
187+
184188
std::vector<Entry>* operator->() {
185189
return std::get_if<std::vector<Entry>>(this);
186190
}
@@ -253,7 +257,7 @@ class Directory : public File {
253257
// 1. Ensuring that all insert* and getChild calls returning a particular
254258
// file return the same File object.
255259
//
256-
// 2. Clearing the File's parent field in `removeChild`.
260+
// 2. Clearing unlinked Files' parents in `removeChild` and `insertMove`.
257261
//
258262
// 3. Implementing `getName`, since it cannot be implemented in terms of the
259263
// dcache.

system/lib/wasmfs/memory_backend.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <emscripten/threading.h>
1616

1717
namespace wasmfs {
18+
1819
// This class describes a file that lives in Wasm Memory.
1920
class MemoryDataFile : public DataFile {
2021
std::vector<uint8_t> buffer;

0 commit comments

Comments
 (0)