From 4f3455f573b3f34cfe02c960dede352b12a08ba8 Mon Sep 17 00:00:00 2001 From: Takanori MAEHARA Date: Wed, 21 Mar 2018 10:03:58 +0900 Subject: [PATCH 1/4] Disjoint Sparse Table --- data_structure/disjoint_sparse_table.cc | 78 +++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 data_structure/disjoint_sparse_table.cc diff --git a/data_structure/disjoint_sparse_table.cc b/data_structure/disjoint_sparse_table.cc new file mode 100644 index 0000000..87dced4 --- /dev/null +++ b/data_structure/disjoint_sparse_table.cc @@ -0,0 +1,78 @@ +// +// Disjoint Sparse Table +// +// Description: +// +// Let `otimes` be a binary associative operator. +// The disjoint sparse table is a data structure for a +// sequence xs that admits a query +// prod(i,j) = xs[i] `otimes` ... `otimes` xs[j-1] +// in time O(1). +// +// The structure is a segment tree whose node maintains +// prod(i,m) and prod(m,j) for all i, j in the segment. +// Then prod(i,j) is evaluated by finding the node that +// splits [i,j) and returning prod(i,m)*prod(m,j). +// +// Complexity: +// +// preprocessing O(n log n) +// query O(1) +// +#include + +using namespace std; + +#define fst first +#define snd second +#define all(c) ((c).begin()), ((c).end()) +#define TEST(s) if (!(s)) { cout << __LINE__ << " " << #s << endl; exit(-1); } + +template +struct DisjointSparseTable { + vector> ys; + Op otimes; + DisjointSparseTable(vector xs, Op otimes_) : otimes(otimes_) { + int n = 1; + while (n <= xs.size()) n *= 2; + xs.resize(n); + ys.push_back(xs); + for (int h = 1; ; ++h) { + int range = (2 << h), half = (range /= 2); + if (range > n) break; + ys.push_back(xs); + for (int i = half; i < n; i += range) { + for (int j = i-2; j >= i-half; --j) + ys[h][j] = otimes(ys[h][j], ys[h][j+1]); + for (int j = i+1; j < min(n, i+half); ++j) + ys[h][j] = otimes(ys[h][j-1], ys[h][j]); + } + } + } + T prod(int i, int j) { // [i, j) query + --j; + int h = sizeof(int)*__CHAR_BIT__-1-__builtin_clz(i ^ j); + return otimes(ys[h][i], ys[h][j]); + } +}; +template +auto makeDisjointSparseTable(vector xs, Op op) { + return DisjointSparseTable(xs, op); +} + +int main() { + vector xs = {3,1,4,1,5,1}; + int n = xs.size(); + auto otimes = [](int a, int b) { return max(a, b); }; + auto dst = makeDisjointSparseTable(xs, otimes); + + for (int i = 0; i < n; ++i) { + for (int j = i+1; j <= n; ++j) { + cout << i << " " << j << " " << dst.prod(i, j) << " "; + int a = xs[i]; + for (int k = i+1; k < j; ++k) + a = otimes(a, xs[k]); + cout << a << endl; + } + } +} From 9cca6b826f19ed7e42dd326a4fbbb9f4d34f04d3 Mon Sep 17 00:00:00 2001 From: Takanori MAEHARA Date: Thu, 7 Jun 2018 03:36:21 +0900 Subject: [PATCH 2/4] Segment Recognizer (evaluate automaton run in O(|M|) time) --- data_structure/segment_recognizer.cc | 149 +++++++++++++++++++++++++++ 1 file changed, 149 insertions(+) create mode 100644 data_structure/segment_recognizer.cc diff --git a/data_structure/segment_recognizer.cc b/data_structure/segment_recognizer.cc new file mode 100644 index 0000000..6e87b4b --- /dev/null +++ b/data_structure/segment_recognizer.cc @@ -0,0 +1,149 @@ +// +// Segment Recognizer +// +// Description: +// Let M be an automaton and x be a sequence of alphabets. +// The segment recognizer computes the transitioned state +// starting from s and reading x[i,j) in O(|M|) time. +// The preprocessing requires O(|M| |x|) time and space. +// +// The same method is implemented by the segment tree, +// where the time complexity is O(log n) and the space +// complexity is O(n log n). Thus, the segment recognizer +// is efficient if |M| is small. +// +// Algorithm: +// Basically, it stores all the runs from all initial +// position i and initial state s. To reduce the space, +// it merges two runs if they yields the same state. +// +// Reference +// Mikola Bojanczyk (2009): "Factorization forests", +// International Conference on Developments in Language Theory, +// pp. 1--17. +// +#include + +using namespace std; + +#define fst first +#define snd second +#define all(c) ((c).begin()), ((c).end()) +#define TEST(s) if (!(s)) { cout << __LINE__ << " " << #s << endl; exit(-1); } + + +// === tick a time === +#include +double tick() { + static clock_t oldtick; + clock_t newtick = clock(); + double diff = 1.0*(newtick - oldtick) / CLOCKS_PER_SEC; + oldtick = newtick; + return diff; +} + +template +struct ModuloAutomaton { + const int init = 0; + int size() const { return MOD; } + int next(int s, int d) const { return (s+d)%MOD; } + int accept(int s) const { return s==0; } +}; + +// 0: free +// 1: selected +// 2: bottom +struct IndependenceAutomaton { + const int init = 0; + int size() const { return 3; } + int next(int s, int d) const { + if (s == 0) return d; + if (s == 1) return 2*d; + if (s == 2) return s; + } + int accept(int s) const { return s!=2; } +}; + +template +struct SegmentRecognizer { + Automaton M; + vector x; + + struct Tape { + int begin; + vector sequence; + }; + vector> index; + vector tapes; + + SegmentRecognizer(Automaton M, vector x) : M(M), x(x) { + index.assign(x.size()+1, vector(M.size())); + vector stripe; + for (int r = 0; r < M.size(); ++r) { + stripe.push_back(r); + index[0][r] = stripe[r]; + tapes.push_back({0, {r}}); + } + for (int i = 0; i < x.size(); ++i) { + unordered_set available; + for (int s = 0; s < M.size(); ++s) + available.insert(s); + vector reallocate; + for (int r = 0; r < M.size(); ++r) { + int next = M.next(tapes[stripe[r]].sequence.back(), x[i]); + if (available.count(next)) { + available.erase(next); + index[i+1][next] = stripe[r]; + tapes[stripe[r]].sequence.push_back(next); + } else { + reallocate.push_back(r); + } + } + for (int r: reallocate) { + int s = *available.begin(); + stripe[r] = tapes.size(); + index[i+1][s] = stripe[r]; + tapes.push_back({i+1, {s}}); + available.erase(s); + } + } + } + + int getState(int i, int s, int j) { + while (1) { + auto &tape = tapes[index[i][s]]; + if (j - tape.begin < tape.sequence.size()) { + return tape.sequence[j - tape.begin]; + } else { + i = tape.begin + tape.sequence.size(); + s = M.next(tape.sequence.back(), x[i-1]); + } + } + } +}; +template +SegmentRecognizer makeSegmentRecognizer(Automaton M, vector s) { + return SegmentRecognizer(M, s); +} + +int main() { + IndependenceAutomaton M; + + for (int n = 2; n < (1<<24); n*=2) { + vector x(n); + for (int i = 0; i < n; ++i) { + x[i] = (rand() % 10 == 0); + } + auto recognizer = makeSegmentRecognizer(M, x); + + tick(); + int count = 0; + for (int iter = 0; iter < n; ++iter) { + int v = (rand() % n) + 1; + int u = rand() % v; + count += recognizer.getState(u, 0, v); + } + double t = tick(); + cout << n << " " << t / n << endl; + } +} From 3a44bc12aa076c73a03184ef45e0cfb974e40161 Mon Sep 17 00:00:00 2001 From: Takanori MAEHARA Date: Thu, 9 Aug 2018 18:25:29 +0900 Subject: [PATCH 3/4] Create roc-auc.cc --- machine_learning/roc-auc.cc | 40 +++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 machine_learning/roc-auc.cc diff --git a/machine_learning/roc-auc.cc b/machine_learning/roc-auc.cc new file mode 100644 index 0000000..5af7947 --- /dev/null +++ b/machine_learning/roc-auc.cc @@ -0,0 +1,40 @@ +#include + +using namespace std; + +double trapezoid(double x1, double x2, double y1, double y2) { + return (y2+y1)/2 * abs(x2-x1); +} + +double auc(vector test, vector pred) { + int n = test.size(); + assert(n == pred.size()); + + vector idx(n); + for (int i = 0; i < n; ++i) idx[i] = i; + sort(idx.begin(), idx.end(), [&](int i, int j) { return pred[i] > pred[j]; }); + + double a = 0.0; + double fp = 0, tp = 0, fp_prev = 0, tp_prev = 0; + double prev_score = -1.0/0.0; + for (int i: idx) { + if (pred[i] != prev_score) { + a += trapezoid(fp, fp_prev, tp, tp_prev); + prev_score = pred[i]; + fp_prev = fp; + tp_prev = tp; + } + if (test[i] == 1) { + tp += 1; + } else { + fp += 1; + } + } + a += trapezoid(fp, fp_prev, tp, tp_prev); + return a / (tp * fp); +} +int main() { + vector test = {0, 1, 0, 1, 1}; + vector pred = {0.2, 0.3, 0.4, 0.5, 0.6}; + cout << auc(test, pred) << endl; +} From 4fdac8202e26def25c1baf9127aaaed6a2c9f7c7 Mon Sep 17 00:00:00 2001 From: Takanori MAEHARA Date: Mon, 7 Jan 2019 09:03:05 +0900 Subject: [PATCH 4/4] debug incorrect implementation of undoable union find. (path compression cannot be undo) --- data_structure/union_find_undo.cc | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/data_structure/union_find_undo.cc b/data_structure/union_find_undo.cc index 4aff857..208ff82 100644 --- a/data_structure/union_find_undo.cc +++ b/data_structure/union_find_undo.cc @@ -27,25 +27,29 @@ struct UndoableUnionFind { UndoableUnionFind(int n) : parent(n, -1) { }; bool unite(int u, int v) { u = root(u); v = root(v); - if (u == v) return false; - if (parent[u] > parent[v]) swap(u, v); - history.push_back(make_tuple(u, v, parent[v])); - parent[u] += parent[v]; parent[v] = u; - return true; + if (u == v) { + history.push_back(make_tuple(-1,-1,-1)); + return false; + } else { + if (parent[u] > parent[v]) swap(u, v); + history.push_back(make_tuple(u, v, parent[v])); + parent[u] += parent[v]; parent[v] = u; + return true; + } } void undo() { int u, v, w; tie(u, v, w) = history.back(); history.pop_back(); + if (u == -1) return; parent[v] = w; parent[u] -= parent[v]; } bool find(int u, int v) { return root(u) == root(v); } - int root(int u) { return parent[u] < 0 ? u : parent[u] = root(parent[u]); } + int root(int u) { while (parent[u] >= 0) u = parent[u]; return u; } int size(int u) { return -parent[root(u)]; } }; - struct OfflineDynamicConnectivity { int n; UndoableUnionFind uf;