@@ -1089,25 +1089,30 @@ std::ostream& operator<<(std::ostream& os, HeapTypeInfo info) {
10891089
10901090struct TypeBuilder ::Impl {
10911091 TypeStore typeStore;
1092- HeapTypeStore heapTypeStore;
10931092
10941093 struct Entry {
1095- // HeapTypeInfo has no default constructor, so pick an arbitrary default.
1096- HeapTypeInfo info = Signature();
1094+ std::unique_ptr<HeapTypeInfo> info;
10971095 bool initialized = false ;
1096+ Entry () {
1097+ // We need to eagerly allocate the HeapTypeInfo so we have a TypeID to use
1098+ // to refer to it before it is initialized. Arbitrarily choose a default
1099+ // value.
1100+ info = std::make_unique<HeapTypeInfo>(Signature ());
1101+ }
10981102 void set (HeapTypeInfo&& hti) {
1099- info = hti;
1103+ * info = hti;
11001104 initialized = true ;
11011105 }
1102- HeapType get () { return HeapType (TypeID (& info)); }
1106+ HeapType get () { return HeapType (TypeID (info. get () )); }
11031107 };
11041108
11051109 std::vector<Entry> entries;
1110+
1111+ Impl (size_t n) : entries(n) {}
11061112};
11071113
11081114TypeBuilder::TypeBuilder (size_t n) {
1109- impl = std::make_unique<TypeBuilder::Impl>();
1110- impl->entries .resize (n);
1115+ impl = std::make_unique<TypeBuilder::Impl>(n);
11111116}
11121117
11131118TypeBuilder::~TypeBuilder () = default ;
@@ -1175,15 +1180,18 @@ struct Canonicalizer {
11751180 // The work list of Types and HeapTypes remaining to be scanned.
11761181 std::vector<Item> scanList;
11771182
1178- // Maps Type and HeapType IDs to the IDs of their ancestor Types and HeapTypes
1183+ // Maps Type and HeapType IDs to the IDs of their child Types and HeapTypes
11791184 // in the type graph. Only considers compound Types and HeapTypes.
1180- std::unordered_map<TypeID, std::unordered_set<TypeID>> ancestors ;
1185+ std::unordered_map<TypeID, std::unordered_set<TypeID>> children ;
11811186
11821187 // Maps each temporary Type and HeapType to the locations where they will have
11831188 // to be replaced with canonical Types and HeapTypes.
11841189 std::unordered_map<Type, std::vector<Type*>> typeLocations;
11851190 std::unordered_map<HeapType, std::vector<HeapType*>> heapTypeLocations;
11861191
1192+ // These heap types will not participate in canonicalization.
1193+ std::unordered_set<TypeID> selfReferentialHeapTypes;
1194+
11871195 // Maps Types and HeapTypes backed by the TypeBuilder's Stores to globally
11881196 // canonical Types and HeapTypes.
11891197 std::unordered_map<Type, Type> canonicalTypes;
@@ -1196,7 +1204,7 @@ struct Canonicalizer {
11961204 template <typename T1 , typename T2 > void noteChild (T1 parent, T2 * child);
11971205 void scanHeapType (HeapType* ht);
11981206 void scanType (Type* child);
1199- void makeAncestorsFixedPoint ();
1207+ void findSelfReferentialHeapTypes ();
12001208 std::vector<Item> getOrderedItems ();
12011209
12021210 // Replaces the pointee Type or HeapType of `type` with its globally canonical
@@ -1206,6 +1214,10 @@ struct Canonicalizer {
12061214 void canonicalize (T* type, std::unordered_map<T, T>& canonicals);
12071215};
12081216
1217+ // Used to extend the lifetime of self-referential HeapTypes so they don't need
1218+ // to be canonicalized.
1219+ std::vector<std::unique_ptr<HeapTypeInfo>> noncanonicalHeapTypes;
1220+
12091221// Traverse the type graph rooted at the initialized HeapTypeInfos in reverse
12101222// postorder, replacing in place all Types and HeapTypes backed by the
12111223// TypeBuilder's Stores with equivalent globally canonicalized Types and
@@ -1224,8 +1236,7 @@ Canonicalizer::Canonicalizer(TypeBuilder& builder) : builder(builder) {
12241236
12251237 // Traverse the type graph reachable from the heap types, calculating
12261238 // reachability and collecting a list of types and heap types that need to be
1227- // canonicalized. We must scan in depth-first order so that we can do a
1228- // postorder traversal later.
1239+ // canonicalized.
12291240 while (scanList.size () != 0 ) {
12301241 auto item = scanList.back ();
12311242 scanList.pop_back ();
@@ -1239,17 +1250,11 @@ Canonicalizer::Canonicalizer(TypeBuilder& builder) : builder(builder) {
12391250 }
12401251 }
12411252
1242- // Check for recursive types and heap types. TODO: pre-canonicalize these into
1243- // their minimal finite representations.
1244- makeAncestorsFixedPoint ();
1245- for (auto & kv : ancestors) {
1246- if (kv.second .count (kv.first ) != 0 ) {
1247- WASM_UNREACHABLE (" TODO: support recursive types" );
1248- }
1249- }
1253+ findSelfReferentialHeapTypes ();
12501254
1251- // Visit the types and heap types in reverse postorder, replacing them with
1252- // their canonicalized versions.
1255+ // Visit the use sites of Types and HeapTypes, replacing them with their
1256+ // canonicalized versions in an order that guarantees no temporary types will
1257+ // be leaked into the global stores.
12531258 for (auto it : getOrderedItems ()) {
12541259 switch (it.kind ) {
12551260 case Item::TypeKind:
@@ -1260,12 +1265,21 @@ Canonicalizer::Canonicalizer(TypeBuilder& builder) : builder(builder) {
12601265 break ;
12611266 }
12621267 }
1268+
1269+ // Now that all other Types and HeapTypes have been canonicalized, move
1270+ // self-referential HeapTypes to the global store so that they will outlive
1271+ // the TypeBuilder without their IDs changing.
1272+ for (auto & entry : builder.impl ->entries ) {
1273+ if (selfReferentialHeapTypes.count (entry.get ().getID ())) {
1274+ noncanonicalHeapTypes.emplace_back (std::move (entry.info ));
1275+ }
1276+ }
12631277}
12641278
12651279template <typename T1 , typename T2 >
12661280void Canonicalizer::noteChild (T1 parent, T2 * child) {
12671281 if (child->isCompound ()) {
1268- ancestors[child-> getID ()].insert (parent. getID ());
1282+ children[parent. getID ()].insert (child-> getID ());
12691283 scanList.push_back (child);
12701284 }
12711285}
@@ -1319,85 +1333,112 @@ void Canonicalizer::scanType(Type* type) {
13191333 }
13201334}
13211335
1322- void Canonicalizer::makeAncestorsFixedPoint () {
1323- // Naively calculate the transitive closure of the reachability graph.
1336+ void Canonicalizer::findSelfReferentialHeapTypes () {
1337+ // Calculate the fixed point of the reachability relation.
1338+ auto fixedPoint = children;
13241339 bool changed;
13251340 do {
13261341 changed = false ;
1327- for (auto & entry : ancestors ) {
1342+ for (auto & entry : fixedPoint ) {
13281343 auto & succs = entry.second ;
1329- std::unordered_set<TypeID> nextAncestors ;
1344+ std::unordered_set<TypeID> newSuccs ;
13301345 for (auto & other : succs) {
1331- auto & otherAncestors = ancestors [other];
1332- nextAncestors .insert (otherAncestors .begin (), otherAncestors .end ());
1346+ auto & otherSuccs = fixedPoint [other];
1347+ newSuccs .insert (otherSuccs .begin (), otherSuccs .end ());
13331348 }
13341349 size_t oldSize = succs.size ();
1335- succs.insert (nextAncestors .begin (), nextAncestors .end ());
1350+ succs.insert (newSuccs .begin (), newSuccs .end ());
13361351 if (succs.size () != oldSize) {
13371352 changed = true ;
13381353 }
13391354 }
13401355 } while (changed);
1356+
1357+ // Find HeapTypes that reach themselves
1358+ for (auto & entry : builder.impl ->entries ) {
1359+ TypeID id = entry.get ().getID ();
1360+ auto it = fixedPoint.find (id);
1361+ if (it != fixedPoint.end () && it->second .count (id)) {
1362+ selfReferentialHeapTypes.insert (id);
1363+ }
1364+ }
13411365}
13421366
13431367std::vector<Canonicalizer::Item> Canonicalizer::getOrderedItems () {
1344- // Topologically sort the Types and HeapTypes so that all children are
1345- // canonicalized before their parents.
1368+ // In order to canonicalize Types and HeapTypes without leaking any temporary
1369+ // types into the global type store, we need to canonicalize children before
1370+ // parents. In the case of recursive types, this is not possible due to cycles
1371+ // in the parent-child relation. In principle that can be overcome by
1372+ // canonicalizing type structures rather than type contents, but for now we
1373+ // instead cut corners and break the cycles by skipping canonicalization of
1374+ // self-referential HeapTypes. This works because the path from any Type or
1375+ // HeapType to itself in the parent-child relation must go through some
1376+ // self-referential HeapType; it is not possible to have a cycle composed only
1377+ // of Types, for example. In effect this means that we have a nominal type
1378+ // system for self-referential HeapTypes and a structural type system for
1379+ // everything else. Eventually we will have to implement something more
1380+ // consistent, but this is good enough for getting prototype toolchains up and
1381+ // running.
1382+
1383+ // TODO: None of this is particularly optimized. Benchmark to see if this is a
1384+ // significant bottleneck and investigate using better data structures and
1385+ // algorithms.
1386+
1387+ // Remove self-referential HeapTypes to cut cycles.
1388+ auto childrenDAG = children;
1389+ for (TypeID id : selfReferentialHeapTypes) {
1390+ childrenDAG.erase (id);
1391+ for (auto & kv : childrenDAG) {
1392+ kv.second .erase (id);
1393+ }
1394+ }
1395+
1396+ // Collect the remaining types that will be sorted.
1397+ std::unordered_set<TypeID> toSort;
1398+ for (auto & entry : builder.impl ->entries ) {
1399+ TypeID id = entry.get ().getID ();
1400+ if (!selfReferentialHeapTypes.count (id)) {
1401+ toSort.insert (id);
1402+ }
1403+ }
1404+ for (auto & kv : childrenDAG) {
1405+ toSort.insert (kv.first );
1406+ for (auto & child : kv.second ) {
1407+ toSort.insert (child);
1408+ }
1409+ }
1410+
1411+ // Topologically sort so that children come before their parents.
13461412 std::vector<TypeID> sorted;
13471413 std::unordered_set<TypeID> seen;
1348-
1349- // Topologically sort so that all parents are pushed before their children.
1350- // This sort will be reversed later to have children before their parents.
1351- std::function<void (TypeID)> visit = [&](TypeID i) {
1352- if (seen.count (i)) {
1414+ std::function<void (TypeID)> visit = [&](TypeID id) {
1415+ if (seen.count (id)) {
13531416 return ;
13541417 }
1355- // Push ancestors of the current type before pushing the current type.
1356- auto it = ancestors .find (i );
1357- if (it != ancestors .end ()) {
1358- for (auto ancestor : it->second ) {
1359- visit (ancestor );
1418+ // Push children of the current type before pushing the current type.
1419+ auto it = childrenDAG .find (id );
1420+ if (it != childrenDAG .end ()) {
1421+ for (auto child : it->second ) {
1422+ visit (child );
13601423 }
13611424 }
1362- seen.insert (i );
1363- sorted.push_back (i );
1425+ seen.insert (id );
1426+ sorted.push_back (id );
13641427 };
1365-
1366- // Collect the items to be sorted, including all the result HeapTypes and any
1367- // type that participates in the ancestor relation.
1368- std::unordered_set<TypeID> allIDs;
1369- for (auto ht : results) {
1370- allIDs.insert (ht.getID ());
1371- }
1372- for (auto & kv : ancestors) {
1373- allIDs.insert (kv.first );
1374- for (auto & id : kv.second ) {
1375- allIDs.insert (id);
1376- }
1377- }
1378-
1379- // Perform the sort.
1380- for (TypeID i : allIDs) {
1428+ for (TypeID i : toSort) {
13811429 visit (i);
13821430 }
13831431
1384- // Swap order to have all children before their parents.
1385- std::reverse (sorted.begin (), sorted.end ());
1386-
1387- // Create a list of Items to canonicalize in place according to the
1388- // topological ordering of types and heap types.
1432+ // Expand the ordered types into ordered type use sites.
13891433 std::vector<Item> items;
13901434 for (TypeID id : sorted) {
13911435 // IDs may be Types or HeapTypes, so just try both.
1392- auto typeIt = typeLocations.find (Type (id));
1393- if (typeIt != typeLocations.end ()) {
1394- for (Type* loc : typeIt->second ) {
1436+ if (typeLocations.count (Type (id))) {
1437+ for (Type* loc : typeLocations[Type (id)]) {
13951438 items.emplace_back (loc);
13961439 }
13971440 } else {
1398- auto heapTypeIt = heapTypeLocations.find (HeapType (id));
1399- assert (heapTypeIt != heapTypeLocations.end ());
1400- for (HeapType* loc : heapTypeIt->second ) {
1441+ for (HeapType* loc : heapTypeLocations[HeapType (id)]) {
14011442 items.emplace_back (loc);
14021443 }
14031444 }
0 commit comments