Skip to content

Commit eb4d0e3

Browse files
torvaldsgitster
authored andcommitted
optimize diffcore-delta by sorting hash entries.
Here's a test-patch. I don't guarantee anything, except that when I did the timings I also did a "wc" on the result, and they matched.. Before: [torvalds@woody linux]$ time git diff -l0 --stat -C v2.6.22.. | wc 7104 28574 438020 real 0m10.526s user 0m10.401s sys 0m0.136s After: [torvalds@woody linux]$ time ~/git/git diff -l0 --stat -C v2.6.22.. | wc 7104 28574 438020 real 0m8.876s user 0m8.761s sys 0m0.128s but the diff is fairly simple, so if somebody will go over it and say whether it's likely to be *correct* too, that 15% may well be worth it. [ Side note, without rename detection, that diff takes just under three seconds for me, so in that sense the improvement to the rename detection itself is larger than the overall 15% - it brings the cost of just rename detection from 7.5s to 5.9s, which would be on the order of just over a 20% performance improvement. ] Hmm. The patch depends on half-way subtle issues like the fact that the hashtables are guaranteed to not be full => we're guaranteed to have zero counts at the end => we don't need to do any steenking iterator count in the loop. A few comments might in order. Linus
1 parent 4c75136 commit eb4d0e3

1 file changed

Lines changed: 30 additions & 24 deletions

File tree

diffcore-delta.c

Lines changed: 30 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -46,22 +46,6 @@ struct spanhash_top {
4646
struct spanhash data[FLEX_ARRAY];
4747
};
4848

49-
static struct spanhash *spanhash_find(struct spanhash_top *top,
50-
unsigned int hashval)
51-
{
52-
int sz = 1 << top->alloc_log2;
53-
int bucket = hashval & (sz - 1);
54-
while (1) {
55-
struct spanhash *h = &(top->data[bucket++]);
56-
if (!h->cnt)
57-
return NULL;
58-
if (h->hashval == hashval)
59-
return h;
60-
if (sz <= bucket)
61-
bucket = 0;
62-
}
63-
}
64-
6549
static struct spanhash_top *spanhash_rehash(struct spanhash_top *orig)
6650
{
6751
struct spanhash_top *new;
@@ -122,6 +106,20 @@ static struct spanhash_top *add_spanhash(struct spanhash_top *top,
122106
}
123107
}
124108

109+
static int spanhash_cmp(const void *a_, const void *b_)
110+
{
111+
const struct spanhash *a = a_;
112+
const struct spanhash *b = b_;
113+
114+
/* A count of zero compares at the end.. */
115+
if (!a->cnt)
116+
return !b->cnt ? 0 : 1;
117+
if (!b->cnt)
118+
return -1;
119+
return a->hashval < b->hashval ? -1 :
120+
a->hashval > b->hashval ? 1 : 0;
121+
}
122+
125123
static struct spanhash_top *hash_chars(struct diff_filespec *one)
126124
{
127125
int i, n;
@@ -158,6 +156,10 @@ static struct spanhash_top *hash_chars(struct diff_filespec *one)
158156
n = 0;
159157
accum1 = accum2 = 0;
160158
}
159+
qsort(hash->data,
160+
1ul << hash->alloc_log2,
161+
sizeof(hash->data[0]),
162+
spanhash_cmp);
161163
return hash;
162164
}
163165

@@ -169,7 +171,7 @@ int diffcore_count_changes(struct diff_filespec *src,
169171
unsigned long *src_copied,
170172
unsigned long *literal_added)
171173
{
172-
int i, ssz;
174+
struct spanhash *s, *d;
173175
struct spanhash_top *src_count, *dst_count;
174176
unsigned long sc, la;
175177

@@ -190,22 +192,26 @@ int diffcore_count_changes(struct diff_filespec *src,
190192
}
191193
sc = la = 0;
192194

193-
ssz = 1 << src_count->alloc_log2;
194-
for (i = 0; i < ssz; i++) {
195-
struct spanhash *s = &(src_count->data[i]);
196-
struct spanhash *d;
195+
s = src_count->data;
196+
d = dst_count->data;
197+
for (;;) {
197198
unsigned dst_cnt, src_cnt;
198199
if (!s->cnt)
199-
continue;
200+
break; /* we checked all in src */
201+
while (d->cnt) {
202+
if (d->hashval >= s->hashval)
203+
break;
204+
d++;
205+
}
200206
src_cnt = s->cnt;
201-
d = spanhash_find(dst_count, s->hashval);
202-
dst_cnt = d ? d->cnt : 0;
207+
dst_cnt = d->hashval == s->hashval ? d->cnt : 0;
203208
if (src_cnt < dst_cnt) {
204209
la += dst_cnt - src_cnt;
205210
sc += src_cnt;
206211
}
207212
else
208213
sc += dst_cnt;
214+
s++;
209215
}
210216

211217
if (!src_count_p)

0 commit comments

Comments
 (0)