Skip to content

Commit eede7b7

Browse files
peffgitster
authored andcommitted
diffcore-rename: cache file deltas
We find rename candidates by computing a fingerprint hash of each file, and then comparing those fingerprints. There are inherently O(n^2) comparisons, so it pays in CPU time to hoist the (rather expensive) computation of the fingerprint out of that loop (or to cache it once we have computed it once). Previously, we didn't keep the filespec information around because then we had the potential to consume a great deal of memory. However, instead of keeping all of the filespec data, we can instead just keep the fingerprint. This patch implements and uses diff_free_filespec_data_large to accomplish that goal. We also have to change estimate_similarity not to needlessly repopulate the filespec data when we already have the hash. Practical tests showed 4.5x speedup for a 10% memory usage increase. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
1 parent 2ff5e18 commit eede7b7

File tree

3 files changed

+11
-4
lines changed

3 files changed

+11
-4
lines changed

diff.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1675,7 +1675,7 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
16751675
return 0;
16761676
}
16771677

1678-
void diff_free_filespec_data(struct diff_filespec *s)
1678+
void diff_free_filespec_data_large(struct diff_filespec *s)
16791679
{
16801680
if (s->should_free)
16811681
free(s->data);
@@ -1686,6 +1686,11 @@ void diff_free_filespec_data(struct diff_filespec *s)
16861686
s->should_free = s->should_munmap = 0;
16871687
s->data = NULL;
16881688
}
1689+
}
1690+
1691+
void diff_free_filespec_data(struct diff_filespec *s)
1692+
{
1693+
diff_free_filespec_data_large(s);
16891694
free(s->cnt_data);
16901695
s->cnt_data = NULL;
16911696
}

diffcore-rename.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,8 @@ static int estimate_similarity(struct diff_filespec *src,
184184
if (base_size * (MAX_SCORE-minimum_score) < delta_size * MAX_SCORE)
185185
return 0;
186186

187-
if (diff_populate_filespec(src, 0) || diff_populate_filespec(dst, 0))
187+
if ((!src->cnt_data && diff_populate_filespec(src, 0))
188+
|| (!dst->cnt_data && diff_populate_filespec(dst, 0)))
188189
return 0; /* error but caught downstream */
189190

190191

@@ -377,10 +378,10 @@ void diffcore_rename(struct diff_options *options)
377378
m->score = estimate_similarity(one, two,
378379
minimum_score);
379380
m->name_score = basename_same(one, two);
380-
diff_free_filespec_data(one);
381+
diff_free_filespec_data_large(one);
381382
}
382383
/* We do not need the text anymore */
383-
diff_free_filespec_data(two);
384+
diff_free_filespec_data_large(two);
384385
dst_cnt++;
385386
}
386387
/* cost matrix sorted by most to least similar pair */

diffcore.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ extern void fill_filespec(struct diff_filespec *, const unsigned char *,
4848

4949
extern int diff_populate_filespec(struct diff_filespec *, int);
5050
extern void diff_free_filespec_data(struct diff_filespec *);
51+
extern void diff_free_filespec_data_large(struct diff_filespec *);
5152
extern int diff_filespec_is_binary(struct diff_filespec *);
5253

5354
struct diff_filepair {

0 commit comments

Comments
 (0)