Skip to content

Commit b65982b

Browse files
committed
Optimize "diff-index --cached" using cache-tree
When running "diff-index --cached" after making a change to only a small portion of the index, there is no point unpacking unchanged subtrees into the index recursively, only to find that all entries match anyway. Tweak unpack_trees() logic that is used to read in the tree object to catch the case where the tree entry we are looking at matches the index as a whole by looking at the cache-tree. As an exercise, after modifying a few paths in the kernel tree, here are a few numbers on my Athlon 64X2 3800+: (without patch, hot cache) $ /usr/bin/time git diff --cached --raw :100644 100644 b57e1f5... e69de29... M Makefile :100644 000000 8c86b72... 0000000... D arch/x86/Makefile :000000 100644 0000000... e69de29... A arche 0.07user 0.02system 0:00.09elapsed 102%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+9407minor)pagefaults 0swaps (with patch, hot cache) $ /usr/bin/time ../git.git/git-diff --cached --raw :100644 100644 b57e1f5... e69de29... M Makefile :100644 000000 8c86b72... 0000000... D arch/x86/Makefile :000000 100644 0000000... e69de29... A arche 0.02user 0.00system 0:00.02elapsed 103%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+2446minor)pagefaults 0swaps Cold cache numbers are very impressive, but it does not matter very much in practice: (without patch, cold cache) $ su root sh -c 'echo 3 >/proc/sys/vm/drop_caches' $ /usr/bin/time git diff --cached --raw :100644 100644 b57e1f5... e69de29... M Makefile :100644 000000 8c86b72... 0000000... D arch/x86/Makefile :000000 100644 0000000... e69de29... A arche 0.06user 0.17system 0:10.26elapsed 2%CPU (0avgtext+0avgdata 0maxresident)k 247032inputs+0outputs (1172major+8237minor)pagefaults 0swaps (with patch, cold cache) $ su root sh -c 'echo 3 >/proc/sys/vm/drop_caches' $ /usr/bin/time ../git.git/git-diff --cached --raw :100644 100644 b57e1f5... e69de29... M Makefile :100644 000000 8c86b72... 0000000... D arch/x86/Makefile :000000 100644 0000000... e69de29... A arche 0.02user 0.01system 0:01.01elapsed 3%CPU (0avgtext+0avgdata 0maxresident)k 18440inputs+0outputs (79major+2369minor)pagefaults 0swaps This of course helps "git status" as well. (without patch, hot cache) $ /usr/bin/time ../git.git/git-status >/dev/null 0.17user 0.18system 0:00.35elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+5336outputs (0major+10970minor)pagefaults 0swaps (with patch, hot cache) $ /usr/bin/time ../git.git/git-status >/dev/null 0.10user 0.16system 0:00.27elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+5336outputs (0major+3921minor)pagefaults 0swaps Signed-off-by: Junio C Hamano <gitster@pobox.com>
1 parent aceae2e commit b65982b

File tree

5 files changed

+55
-0
lines changed

5 files changed

+55
-0
lines changed

cache-tree.c

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -631,3 +631,35 @@ void prime_cache_tree(struct cache_tree **it, struct tree *tree)
631631
*it = cache_tree();
632632
prime_cache_tree_rec(*it, tree);
633633
}
634+
635+
/*
636+
* find the cache_tree that corresponds to the current level without
637+
* exploding the full path into textual form. The root of the
638+
* cache tree is given as "root", and our current level is "info".
639+
* (1) When at root level, info->prev is NULL, so it is "root" itself.
640+
* (2) Otherwise, find the cache_tree that corresponds to one level
641+
* above us, and find ourselves in there.
642+
*/
643+
static struct cache_tree *find_cache_tree_from_traversal(struct cache_tree *root,
644+
struct traverse_info *info)
645+
{
646+
struct cache_tree *our_parent;
647+
648+
if (!info->prev)
649+
return root;
650+
our_parent = find_cache_tree_from_traversal(root, info->prev);
651+
return cache_tree_find(our_parent, info->name.path);
652+
}
653+
654+
int cache_tree_matches_traversal(struct cache_tree *root,
655+
struct name_entry *ent,
656+
struct traverse_info *info)
657+
{
658+
struct cache_tree *it;
659+
660+
it = find_cache_tree_from_traversal(root, info);
661+
it = cache_tree_find(it, ent->path);
662+
if (it && it->entry_count > 0 && !hashcmp(ent->sha1, it->sha1))
663+
return it->entry_count;
664+
return 0;
665+
}

cache-tree.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#define CACHE_TREE_H
33

44
#include "tree.h"
5+
#include "tree-walk.h"
56

67
struct cache_tree;
78
struct cache_tree_sub {
@@ -42,4 +43,6 @@ int cache_tree_update(struct cache_tree *, struct cache_entry **, int, int, int)
4243
int write_cache_as_tree(unsigned char *sha1, int flags, const char *prefix);
4344
void prime_cache_tree(struct cache_tree **, struct tree *);
4445

46+
extern int cache_tree_matches_traversal(struct cache_tree *, struct name_entry *ent, struct traverse_info *info);
47+
4548
#endif

diff-lib.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,7 @@ int run_diff_index(struct rev_info *revs, int cached)
446446
memset(&opts, 0, sizeof(opts));
447447
opts.head_idx = 1;
448448
opts.index_only = cached;
449+
opts.diff_index_cached = cached;
449450
opts.merge = 1;
450451
opts.fn = oneway_diff;
451452
opts.unpack_data = revs;
@@ -502,6 +503,7 @@ int do_diff_cache(const unsigned char *tree_sha1, struct diff_options *opt)
502503
memset(&opts, 0, sizeof(opts));
503504
opts.head_idx = 1;
504505
opts.index_only = 1;
506+
opts.diff_index_cached = 1;
505507
opts.merge = 1;
506508
opts.fn = oneway_diff;
507509
opts.unpack_data = &revs;

unpack-trees.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,23 @@ static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str
326326
if (src[0])
327327
conflicts |= 1;
328328
}
329+
330+
/* special case: "diff-index --cached" looking at a tree */
331+
if (o->diff_index_cached &&
332+
n == 1 && dirmask == 1 && S_ISDIR(names->mode)) {
333+
int matches;
334+
matches = cache_tree_matches_traversal(o->src_index->cache_tree,
335+
names, info);
336+
/*
337+
* Everything under the name matches. Adjust o->pos to
338+
* skip the entire hierarchy.
339+
*/
340+
if (matches) {
341+
o->pos += matches;
342+
return mask;
343+
}
344+
}
345+
329346
if (traverse_trees_recursive(n, dirmask, conflicts,
330347
names, info) < 0)
331348
return -1;

unpack-trees.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ struct unpack_trees_options {
2727
aggressive:1,
2828
skip_unmerged:1,
2929
initial_checkout:1,
30+
diff_index_cached:1,
3031
gently:1;
3132
const char *prefix;
3233
int pos;

0 commit comments

Comments
 (0)