Skip to content
This repository was archived by the owner on Aug 31, 2021. It is now read-only.

Commit 28b78ec

Browse files
author
Fraser J. Gordon
committed
Optimised the blur code for approx 40% speed-up
1 parent f20ff64 commit 28b78ec

File tree

1 file changed

+134
-121
lines changed

1 file changed

+134
-121
lines changed

libgraphics/src/spread.cpp

Lines changed: 134 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -58,141 +58,154 @@ static int dilateMask(const uint8_t *src, int src_y_stride, uint8_t *dst, int ra
5858
// radii is 254 pixels.
5959
void dilateDistanceXY(const uint8_t *src, uint8_t *dst, int xradius, int yradius, int width, int height, int& r_new_width, int& r_new_height)
6060
{
61-
int new_width = width + 2 * xradius;
61+
int new_width = width + 2 * xradius;
6262
int new_height = height + 2 * yradius;
6363

64-
// Compute the x distance of each pixel from the nearest set pixel.
65-
uint8_t *xd;
66-
xd = new uint8_t[new_width * height];
64+
// Allocate memory for the loop
65+
size_t t_mem_size = new_width * new_height;
66+
uint8_t *buffer = new uint8_t[t_mem_size];
67+
68+
// All the destination pixels are set to the infinite distance initially
69+
memset(buffer, 255, t_mem_size);
70+
71+
// Only this part of the buffer can have non-zero x distances
72+
uint8_t *xd = buffer + new_width * yradius;
73+
74+
// Compute the x distance of each pixel from the nearest set pixel
6775
for(int y = 0; y < height; y++)
6876
{
6977
uint8_t *xdptr;
7078
xdptr = xd + new_width * y;
7179

7280
const uint8_t *sptr;
7381
sptr = src + width * y;
74-
for(int x = 0; x < width; x++)
75-
{
76-
// If there is a value at x, take it
77-
if (sptr[x] != 0)
78-
{
79-
xdptr[x + xradius] = 0;
80-
continue;
81-
}
82-
83-
// Otherwise search back...
84-
int db;
85-
db = 255;
86-
for(int z = x; z >= 0 && (x - z) < 255; z--)
87-
{
88-
if (sptr[z] != 0)
89-
{
90-
db = x - z;
91-
break;
92-
}
93-
}
94-
95-
// Now search forwards...
96-
int df;
97-
df = 255;
98-
for(int z = x; (z < width) && (z - x) < 255; z++)
99-
{
100-
if (sptr[z] != 0)
101-
{
102-
df = z - x;
103-
break;
104-
}
105-
}
106-
107-
// Distance is the minimum.
108-
int d;
109-
d = SkMin32(db, df);
110-
xdptr[x + xradius] = d;
111-
}
112-
113-
// Now expand the fringes.
114-
for(int x = 0; x < xradius; x++)
115-
{
116-
if (xdptr[xradius] + (xradius - x) < 255)
117-
xdptr[x] = ((xradius - x) + xdptr[xradius]);
118-
else
119-
xdptr[x] = 255;
120-
121-
if (xdptr[width + xradius - 1] + x + 1 < 255)
122-
xdptr[width + xradius + x] = xdptr[width + xradius - 1] + x + 1;
123-
else
124-
xdptr[width + xradius + x] = 255;
125-
}
82+
83+
int x = 0;
84+
int next = 0;
85+
while (x < new_width)
86+
{
87+
// Scan along the line for the next pixel that is set in the source mask
88+
while (next < width && sptr[next] == 0)
89+
next++;
90+
91+
// If no more set pixels in the source mask, go to the end of the line
92+
int dnext;
93+
if (next >= width)
94+
dnext = new_width;
95+
else
96+
dnext = next + xradius; // Account for position shift between src and dst
97+
98+
// If we're starting at the left edge, there isn't a mask point there
99+
int distance = dnext - x;
100+
int halfway = distance / 2;
101+
if (x == 0)
102+
halfway = 0;
103+
104+
// If we're ending at the right edge, there isn't a mask point there
105+
if (dnext >= new_width)
106+
halfway = distance;
107+
108+
int i;
109+
for (i = 0; i < halfway; i++)
110+
xdptr[x + i] = SkMin32(i, 255);
111+
for (; i < distance; i++)
112+
xdptr[x + i] = SkMin32(distance - i, 255);
113+
114+
// Move on to the next pixel
115+
//
116+
// Optimisation: scan for the next clear pixel in the mask
117+
// Avoids excessive processing for rows of non-zero pixels
118+
x = dnext;
119+
next = next + 1;
120+
while (next < width && sptr[next + 1] != 0)
121+
{
122+
xdptr[x] = 0;
123+
x++, next++;
124+
}
125+
}
126126
}
127+
128+
// Destination pointer stride
129+
int ydstride = new_width;
127130

128-
unsigned int rf;
129-
rf = xradius * xradius * yradius * yradius;
131+
// a-squared, b-squared and a-squared*b-squared
132+
uint32_t aa, bb, aabb;
133+
aa = xradius * xradius;
134+
bb = yradius * yradius;
135+
aabb = aa*bb;
136+
137+
// Look-up tables for pre-calculated multiplications
138+
uint32_t t_xlut[256], t_ylut[256];
139+
for (int i = 0; i < 256; i++)
140+
t_xlut[i] = bb*i*i, t_ylut[i] = aa*i*i;
141+
142+
// Ensure that the "infinite" distance never compares < anything else
143+
t_xlut[255] = 0xFFFFFFFF;
130144

131145
memset(dst, 0, new_width * new_height);
146+
147+
// X distance array
148+
const uint8_t *xdist = buffer;
132149

133-
// Now use xd to compute the spread mask.
134-
for(int x = 0; x < new_width; x++)
135-
{
136-
for(int y = 0; y < new_height; y++)
137-
{
138-
uint8_t *dptr = dst + y * new_width + x;
139-
140-
// If the distance at x, y is 0 then we are done.
141-
if (y >= yradius && y < (new_height - yradius) && xd[(y - yradius) * new_width + x] == 0)
142-
{
143-
*dptr = 255;
144-
continue;
145-
}
146-
147-
// Otherwise, search up.
148-
*dptr = 0;
149-
for(int z = y; z >= 0; z--)
150-
{
151-
unsigned int yf;
152-
yf = (y - z) * xradius;
153-
yf *= yf;
154-
if (yf >= rf)
155-
break;
156-
157-
unsigned int xf;
158-
if (z >= yradius && z < (new_height - yradius))
159-
xf = yradius * xd[(z - yradius) * new_width + x];
160-
else
161-
xf = yradius * 255;
162-
xf *= xf;
163-
if (xf < rf - yf)
164-
{
165-
*dptr = 255;
166-
break;
167-
}
168-
}
169-
170-
if (*dptr == 255)
171-
continue;
172-
173-
// Search downwards
174-
for(int z = y; z < new_height; z++)
175-
{
176-
unsigned int yf;
177-
yf = (z - y) * xradius;
178-
yf *= yf;
179-
if (yf >= rf)
180-
break;
181-
182-
unsigned int xf;
183-
if (z >= yradius && z < (new_height - yradius))
184-
xf = yradius * xd[(z - yradius) * new_width + x];
185-
else
186-
xf = yradius * 255;
187-
xf *= xf;
188-
if (xf < rf - yf)
189-
{
190-
*dptr = 255;
191-
break;
192-
}
193-
}
194-
}
195-
}
150+
// Be careful:
151+
//
152+
// Minor, seemingly inconsequential, changes to this loop can slow it down
153+
// considerably. On the other hand, they could speed it up...
154+
int offset = 0;
155+
for (int y = 0; y < new_height; y++)
156+
{
157+
for (int x = 0; x < new_width; x++, offset++)
158+
{
159+
// Starting at this position, scan down for a pixel that is within
160+
// the dilation radius.
161+
int ny = y;
162+
int noffset = offset;
163+
int max = SkMin32(new_height, y + yradius);
164+
do
165+
{
166+
uint8_t xval, yval;
167+
xval = xdist[noffset];
168+
yval = ny - y;
169+
170+
// Note the ordering of the comparison to avoid overflow
171+
if (t_xlut[xval] < aabb - t_ylut[yval])
172+
{
173+
dst[offset] = 255;
174+
break;
175+
}
176+
177+
ny += 1;
178+
noffset += ydstride;
179+
}
180+
while (ny < max);
181+
182+
// Early escape to avoid processing another loop
183+
if (dst[offset])
184+
continue;
185+
186+
// Starting at this position, scan up for a pixel that is within
187+
// the dilation radius
188+
ny = y - 1;
189+
noffset = offset - ydstride;
190+
int min = SkMax32(0, y - yradius);
191+
while (ny >= min)
192+
{
193+
uint8_t xval, yval;
194+
xval = xdist[noffset];
195+
yval = y - ny;
196+
197+
// Note the ordering of the comparison to avoid overflow
198+
if (t_xlut[xval] < aabb - t_ylut[yval])
199+
{
200+
dst[offset] = 255;
201+
break;
202+
}
203+
204+
ny -= 1;
205+
noffset -= ydstride;
206+
}
207+
}
208+
}
196209

197210
r_new_width = new_width;
198211
r_new_height = new_height;

0 commit comments

Comments
 (0)