Skip to content

Commit d1d631d

Browse files
authored
Support building recursive types (WebAssembly#3602)
Updates TypeBuilder to support recursive types. Recursive types are particularly problematic because under the current scheme it is necessary to canonicalize the uses of a type's immediate children before canonicalizing the type itself to avoid leaking non-canonical, temporary types out of the TypeBuilder and into the global type stores. In the case of recursive types, it is not possible to do this because of their cyclic nature. In principle this could be overcome by hashing recursive types based on their structure rather than their contents, but that would be complicated. Instead, this PR takes the shortcut of not canonicalizing self-referential HeapTypes at all, but rather moving them out of the TypeBuilder and into the global type store without changing their addresses or needing to update any of their use sites. This breaks all cycles and makes it possible to canonicalize the other types correctly. Note that this PR only adds support for building recursive types. Doing almost anything with the types, such as printing, comparing, or emitting them will certainly lead to infinite recursions. A follow up PR will update all these operations to work correctly with recursive types.
1 parent 95ccf32 commit d1d631d

2 files changed

Lines changed: 183 additions & 93 deletions

File tree

src/wasm/wasm-type.cpp

Lines changed: 112 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -1089,25 +1089,30 @@ std::ostream& operator<<(std::ostream& os, HeapTypeInfo info) {
10891089

10901090
struct TypeBuilder::Impl {
10911091
TypeStore typeStore;
1092-
HeapTypeStore heapTypeStore;
10931092

10941093
struct Entry {
1095-
// HeapTypeInfo has no default constructor, so pick an arbitrary default.
1096-
HeapTypeInfo info = Signature();
1094+
std::unique_ptr<HeapTypeInfo> info;
10971095
bool initialized = false;
1096+
Entry() {
1097+
// We need to eagerly allocate the HeapTypeInfo so we have a TypeID to use
1098+
// to refer to it before it is initialized. Arbitrarily choose a default
1099+
// value.
1100+
info = std::make_unique<HeapTypeInfo>(Signature());
1101+
}
10981102
void set(HeapTypeInfo&& hti) {
1099-
info = hti;
1103+
*info = hti;
11001104
initialized = true;
11011105
}
1102-
HeapType get() { return HeapType(TypeID(&info)); }
1106+
HeapType get() { return HeapType(TypeID(info.get())); }
11031107
};
11041108

11051109
std::vector<Entry> entries;
1110+
1111+
Impl(size_t n) : entries(n) {}
11061112
};
11071113

11081114
TypeBuilder::TypeBuilder(size_t n) {
1109-
impl = std::make_unique<TypeBuilder::Impl>();
1110-
impl->entries.resize(n);
1115+
impl = std::make_unique<TypeBuilder::Impl>(n);
11111116
}
11121117

11131118
TypeBuilder::~TypeBuilder() = default;
@@ -1175,15 +1180,18 @@ struct Canonicalizer {
11751180
// The work list of Types and HeapTypes remaining to be scanned.
11761181
std::vector<Item> scanList;
11771182

1178-
// Maps Type and HeapType IDs to the IDs of their ancestor Types and HeapTypes
1183+
// Maps Type and HeapType IDs to the IDs of their child Types and HeapTypes
11791184
// in the type graph. Only considers compound Types and HeapTypes.
1180-
std::unordered_map<TypeID, std::unordered_set<TypeID>> ancestors;
1185+
std::unordered_map<TypeID, std::unordered_set<TypeID>> children;
11811186

11821187
// Maps each temporary Type and HeapType to the locations where they will have
11831188
// to be replaced with canonical Types and HeapTypes.
11841189
std::unordered_map<Type, std::vector<Type*>> typeLocations;
11851190
std::unordered_map<HeapType, std::vector<HeapType*>> heapTypeLocations;
11861191

1192+
// These heap types will not participate in canonicalization.
1193+
std::unordered_set<TypeID> selfReferentialHeapTypes;
1194+
11871195
// Maps Types and HeapTypes backed by the TypeBuilder's Stores to globally
11881196
// canonical Types and HeapTypes.
11891197
std::unordered_map<Type, Type> canonicalTypes;
@@ -1196,7 +1204,7 @@ struct Canonicalizer {
11961204
template<typename T1, typename T2> void noteChild(T1 parent, T2* child);
11971205
void scanHeapType(HeapType* ht);
11981206
void scanType(Type* child);
1199-
void makeAncestorsFixedPoint();
1207+
void findSelfReferentialHeapTypes();
12001208
std::vector<Item> getOrderedItems();
12011209

12021210
// Replaces the pointee Type or HeapType of `type` with its globally canonical
@@ -1206,6 +1214,10 @@ struct Canonicalizer {
12061214
void canonicalize(T* type, std::unordered_map<T, T>& canonicals);
12071215
};
12081216

1217+
// Used to extend the lifetime of self-referential HeapTypes so they don't need
1218+
// to be canonicalized.
1219+
std::vector<std::unique_ptr<HeapTypeInfo>> noncanonicalHeapTypes;
1220+
12091221
// Traverse the type graph rooted at the initialized HeapTypeInfos in reverse
12101222
// postorder, replacing in place all Types and HeapTypes backed by the
12111223
// TypeBuilder's Stores with equivalent globally canonicalized Types and
@@ -1224,8 +1236,7 @@ Canonicalizer::Canonicalizer(TypeBuilder& builder) : builder(builder) {
12241236

12251237
// Traverse the type graph reachable from the heap types, calculating
12261238
// reachability and collecting a list of types and heap types that need to be
1227-
// canonicalized. We must scan in depth-first order so that we can do a
1228-
// postorder traversal later.
1239+
// canonicalized.
12291240
while (scanList.size() != 0) {
12301241
auto item = scanList.back();
12311242
scanList.pop_back();
@@ -1239,17 +1250,11 @@ Canonicalizer::Canonicalizer(TypeBuilder& builder) : builder(builder) {
12391250
}
12401251
}
12411252

1242-
// Check for recursive types and heap types. TODO: pre-canonicalize these into
1243-
// their minimal finite representations.
1244-
makeAncestorsFixedPoint();
1245-
for (auto& kv : ancestors) {
1246-
if (kv.second.count(kv.first) != 0) {
1247-
WASM_UNREACHABLE("TODO: support recursive types");
1248-
}
1249-
}
1253+
findSelfReferentialHeapTypes();
12501254

1251-
// Visit the types and heap types in reverse postorder, replacing them with
1252-
// their canonicalized versions.
1255+
// Visit the use sites of Types and HeapTypes, replacing them with their
1256+
// canonicalized versions in an order that guarantees no temporary types will
1257+
// be leaked into the global stores.
12531258
for (auto it : getOrderedItems()) {
12541259
switch (it.kind) {
12551260
case Item::TypeKind:
@@ -1260,12 +1265,21 @@ Canonicalizer::Canonicalizer(TypeBuilder& builder) : builder(builder) {
12601265
break;
12611266
}
12621267
}
1268+
1269+
// Now that all other Types and HeapTypes have been canonicalized, move
1270+
// self-referential HeapTypes to the global store so that they will outlive
1271+
// the TypeBuilder without their IDs changing.
1272+
for (auto& entry : builder.impl->entries) {
1273+
if (selfReferentialHeapTypes.count(entry.get().getID())) {
1274+
noncanonicalHeapTypes.emplace_back(std::move(entry.info));
1275+
}
1276+
}
12631277
}
12641278

12651279
template<typename T1, typename T2>
12661280
void Canonicalizer::noteChild(T1 parent, T2* child) {
12671281
if (child->isCompound()) {
1268-
ancestors[child->getID()].insert(parent.getID());
1282+
children[parent.getID()].insert(child->getID());
12691283
scanList.push_back(child);
12701284
}
12711285
}
@@ -1319,85 +1333,112 @@ void Canonicalizer::scanType(Type* type) {
13191333
}
13201334
}
13211335

1322-
void Canonicalizer::makeAncestorsFixedPoint() {
1323-
// Naively calculate the transitive closure of the reachability graph.
1336+
void Canonicalizer::findSelfReferentialHeapTypes() {
1337+
// Calculate the fixed point of the reachability relation.
1338+
auto fixedPoint = children;
13241339
bool changed;
13251340
do {
13261341
changed = false;
1327-
for (auto& entry : ancestors) {
1342+
for (auto& entry : fixedPoint) {
13281343
auto& succs = entry.second;
1329-
std::unordered_set<TypeID> nextAncestors;
1344+
std::unordered_set<TypeID> newSuccs;
13301345
for (auto& other : succs) {
1331-
auto& otherAncestors = ancestors[other];
1332-
nextAncestors.insert(otherAncestors.begin(), otherAncestors.end());
1346+
auto& otherSuccs = fixedPoint[other];
1347+
newSuccs.insert(otherSuccs.begin(), otherSuccs.end());
13331348
}
13341349
size_t oldSize = succs.size();
1335-
succs.insert(nextAncestors.begin(), nextAncestors.end());
1350+
succs.insert(newSuccs.begin(), newSuccs.end());
13361351
if (succs.size() != oldSize) {
13371352
changed = true;
13381353
}
13391354
}
13401355
} while (changed);
1356+
1357+
// Find HeapTypes that reach themselves
1358+
for (auto& entry : builder.impl->entries) {
1359+
TypeID id = entry.get().getID();
1360+
auto it = fixedPoint.find(id);
1361+
if (it != fixedPoint.end() && it->second.count(id)) {
1362+
selfReferentialHeapTypes.insert(id);
1363+
}
1364+
}
13411365
}
13421366

13431367
std::vector<Canonicalizer::Item> Canonicalizer::getOrderedItems() {
1344-
// Topologically sort the Types and HeapTypes so that all children are
1345-
// canonicalized before their parents.
1368+
// In order to canonicalize Types and HeapTypes without leaking any temporary
1369+
// types into the global type store, we need to canonicalize children before
1370+
// parents. In the case of recursive types, this is not possible due to cycles
1371+
// in the parent-child relation. In principle that can be overcome by
1372+
// canonicalizing type structures rather than type contents, but for now we
1373+
// instead cut corners and break the cycles by skipping canonicalization of
1374+
// self-referential HeapTypes. This works because the path from any Type or
1375+
// HeapType to itself in the parent-child relation must go through some
1376+
// self-referential HeapType; it is not possible to have a cycle composed only
1377+
// of Types, for example. In effect this means that we have a nominal type
1378+
// system for self-referential HeapTypes and a structural type system for
1379+
// everything else. Eventually we will have to implement something more
1380+
// consistent, but this is good enough for getting prototype toolchains up and
1381+
// running.
1382+
1383+
// TODO: None of this is particularly optimized. Benchmark to see if this is a
1384+
// significant bottleneck and investigate using better data structures and
1385+
// algorithms.
1386+
1387+
// Remove self-referential HeapTypes to cut cycles.
1388+
auto childrenDAG = children;
1389+
for (TypeID id : selfReferentialHeapTypes) {
1390+
childrenDAG.erase(id);
1391+
for (auto& kv : childrenDAG) {
1392+
kv.second.erase(id);
1393+
}
1394+
}
1395+
1396+
// Collect the remaining types that will be sorted.
1397+
std::unordered_set<TypeID> toSort;
1398+
for (auto& entry : builder.impl->entries) {
1399+
TypeID id = entry.get().getID();
1400+
if (!selfReferentialHeapTypes.count(id)) {
1401+
toSort.insert(id);
1402+
}
1403+
}
1404+
for (auto& kv : childrenDAG) {
1405+
toSort.insert(kv.first);
1406+
for (auto& child : kv.second) {
1407+
toSort.insert(child);
1408+
}
1409+
}
1410+
1411+
// Topologically sort so that children come before their parents.
13461412
std::vector<TypeID> sorted;
13471413
std::unordered_set<TypeID> seen;
1348-
1349-
// Topologically sort so that all parents are pushed before their children.
1350-
// This sort will be reversed later to have children before their parents.
1351-
std::function<void(TypeID)> visit = [&](TypeID i) {
1352-
if (seen.count(i)) {
1414+
std::function<void(TypeID)> visit = [&](TypeID id) {
1415+
if (seen.count(id)) {
13531416
return;
13541417
}
1355-
// Push ancestors of the current type before pushing the current type.
1356-
auto it = ancestors.find(i);
1357-
if (it != ancestors.end()) {
1358-
for (auto ancestor : it->second) {
1359-
visit(ancestor);
1418+
// Push children of the current type before pushing the current type.
1419+
auto it = childrenDAG.find(id);
1420+
if (it != childrenDAG.end()) {
1421+
for (auto child : it->second) {
1422+
visit(child);
13601423
}
13611424
}
1362-
seen.insert(i);
1363-
sorted.push_back(i);
1425+
seen.insert(id);
1426+
sorted.push_back(id);
13641427
};
1365-
1366-
// Collect the items to be sorted, including all the result HeapTypes and any
1367-
// type that participates in the ancestor relation.
1368-
std::unordered_set<TypeID> allIDs;
1369-
for (auto ht : results) {
1370-
allIDs.insert(ht.getID());
1371-
}
1372-
for (auto& kv : ancestors) {
1373-
allIDs.insert(kv.first);
1374-
for (auto& id : kv.second) {
1375-
allIDs.insert(id);
1376-
}
1377-
}
1378-
1379-
// Perform the sort.
1380-
for (TypeID i : allIDs) {
1428+
for (TypeID i : toSort) {
13811429
visit(i);
13821430
}
13831431

1384-
// Swap order to have all children before their parents.
1385-
std::reverse(sorted.begin(), sorted.end());
1386-
1387-
// Create a list of Items to canonicalize in place according to the
1388-
// topological ordering of types and heap types.
1432+
// Expand the ordered types into ordered type use sites.
13891433
std::vector<Item> items;
13901434
for (TypeID id : sorted) {
13911435
// IDs may be Types or HeapTypes, so just try both.
1392-
auto typeIt = typeLocations.find(Type(id));
1393-
if (typeIt != typeLocations.end()) {
1394-
for (Type* loc : typeIt->second) {
1436+
if (typeLocations.count(Type(id))) {
1437+
for (Type* loc : typeLocations[Type(id)]) {
13951438
items.emplace_back(loc);
13961439
}
13971440
} else {
1398-
auto heapTypeIt = heapTypeLocations.find(HeapType(id));
1399-
assert(heapTypeIt != heapTypeLocations.end());
1400-
for (HeapType* loc : heapTypeIt->second) {
1441+
for (HeapType* loc : heapTypeLocations[HeapType(id)]) {
14011442
items.emplace_back(loc);
14021443
}
14031444
}

0 commit comments

Comments
 (0)