Skip to content

Latest commit

 

History

History
333 lines (295 loc) · 12.5 KB

filters_sse2.c

File metadata and controls

333 lines (295 loc) · 12.5 KB
 
1
2
3
4
5
6
7
8
9
10
11
12
13
// Copyright 2015 Google Inc. All Rights Reserved.
//
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// SSE2 variant of alpha filters
//
// Author: Skal (pascal.massimino@gmail.com)
Oct 26, 2018
Oct 26, 2018
14
#include "src/dsp/dsp.h"
15
16
17
18
19
20
21
22
23
24
25
26
#if defined(WEBP_USE_SSE2)
#include <assert.h>
#include <emmintrin.h>
#include <stdlib.h>
#include <string.h>
//------------------------------------------------------------------------------
// Helpful macro.
# define SANITY_CHECK(in, out) \
Oct 26, 2018
Oct 26, 2018
27
28
assert((in) != NULL); \
assert((out) != NULL); \
29
30
31
32
33
34
assert(width > 0); \
assert(height > 0); \
assert(stride >= width); \
assert(row >= 0 && num_rows > 0 && row + num_rows <= height); \
(void)height; // Silence unused warning.
Oct 26, 2018
Oct 26, 2018
35
36
static void PredictLineTop_SSE2(const uint8_t* src, const uint8_t* pred,
uint8_t* dst, int length) {
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
int i;
const int max_pos = length & ~31;
assert(length >= 0);
for (i = 0; i < max_pos; i += 32) {
const __m128i A0 = _mm_loadu_si128((const __m128i*)&src[i + 0]);
const __m128i A1 = _mm_loadu_si128((const __m128i*)&src[i + 16]);
const __m128i B0 = _mm_loadu_si128((const __m128i*)&pred[i + 0]);
const __m128i B1 = _mm_loadu_si128((const __m128i*)&pred[i + 16]);
const __m128i C0 = _mm_sub_epi8(A0, B0);
const __m128i C1 = _mm_sub_epi8(A1, B1);
_mm_storeu_si128((__m128i*)&dst[i + 0], C0);
_mm_storeu_si128((__m128i*)&dst[i + 16], C1);
}
for (; i < length; ++i) dst[i] = src[i] - pred[i];
}
// Special case for left-based prediction (when preds==dst-1 or preds==src-1).
Oct 26, 2018
Oct 26, 2018
54
static void PredictLineLeft_SSE2(const uint8_t* src, uint8_t* dst, int length) {
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
int i;
const int max_pos = length & ~31;
assert(length >= 0);
for (i = 0; i < max_pos; i += 32) {
const __m128i A0 = _mm_loadu_si128((const __m128i*)(src + i + 0 ));
const __m128i B0 = _mm_loadu_si128((const __m128i*)(src + i + 0 - 1));
const __m128i A1 = _mm_loadu_si128((const __m128i*)(src + i + 16 ));
const __m128i B1 = _mm_loadu_si128((const __m128i*)(src + i + 16 - 1));
const __m128i C0 = _mm_sub_epi8(A0, B0);
const __m128i C1 = _mm_sub_epi8(A1, B1);
_mm_storeu_si128((__m128i*)(dst + i + 0), C0);
_mm_storeu_si128((__m128i*)(dst + i + 16), C1);
}
for (; i < length; ++i) dst[i] = src[i] - src[i - 1];
}
//------------------------------------------------------------------------------
// Horizontal filter.
Oct 26, 2018
Oct 26, 2018
74
75
76
77
78
static WEBP_INLINE void DoHorizontalFilter_SSE2(const uint8_t* in,
int width, int height,
int stride,
int row, int num_rows,
uint8_t* out) {
79
80
81
82
83
84
85
86
87
const size_t start_offset = row * stride;
const int last_row = row + num_rows;
SANITY_CHECK(in, out);
in += start_offset;
out += start_offset;
if (row == 0) {
// Leftmost pixel is the same as input for topmost scanline.
out[0] = in[0];
Oct 26, 2018
Oct 26, 2018
88
PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
89
90
91
92
93
94
95
96
97
row = 1;
in += stride;
out += stride;
}
// Filter line-by-line.
while (row < last_row) {
// Leftmost pixel is predicted from above.
out[0] = in[0] - in[-stride];
Oct 26, 2018
Oct 26, 2018
98
PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
99
100
101
102
103
104
105
106
107
++row;
in += stride;
out += stride;
}
}
//------------------------------------------------------------------------------
// Vertical filter.
Oct 26, 2018
Oct 26, 2018
108
109
110
111
static WEBP_INLINE void DoVerticalFilter_SSE2(const uint8_t* in,
int width, int height, int stride,
int row, int num_rows,
uint8_t* out) {
112
113
114
115
116
117
118
119
120
121
const size_t start_offset = row * stride;
const int last_row = row + num_rows;
SANITY_CHECK(in, out);
in += start_offset;
out += start_offset;
if (row == 0) {
// Very first top-left pixel is copied.
out[0] = in[0];
// Rest of top scan-line is left-predicted.
Oct 26, 2018
Oct 26, 2018
122
PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
123
124
125
126
127
128
129
row = 1;
in += stride;
out += stride;
}
// Filter line-by-line.
while (row < last_row) {
Oct 26, 2018
Oct 26, 2018
130
PredictLineTop_SSE2(in, in - stride, out, width);
131
132
133
134
135
136
137
138
139
++row;
in += stride;
out += stride;
}
}
//------------------------------------------------------------------------------
// Gradient filter.
Oct 26, 2018
Oct 26, 2018
140
static WEBP_INLINE int GradientPredictor_SSE2(uint8_t a, uint8_t b, uint8_t c) {
141
142
143
144
const int g = a + b - c;
return ((g & ~0xff) == 0) ? g : (g < 0) ? 0 : 255; // clip to 8bit
}
Oct 26, 2018
Oct 26, 2018
145
146
147
static void GradientPredictDirect_SSE2(const uint8_t* const row,
const uint8_t* const top,
uint8_t* const out, int length) {
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
const int max_pos = length & ~7;
int i;
const __m128i zero = _mm_setzero_si128();
for (i = 0; i < max_pos; i += 8) {
const __m128i A0 = _mm_loadl_epi64((const __m128i*)&row[i - 1]);
const __m128i B0 = _mm_loadl_epi64((const __m128i*)&top[i]);
const __m128i C0 = _mm_loadl_epi64((const __m128i*)&top[i - 1]);
const __m128i D = _mm_loadl_epi64((const __m128i*)&row[i]);
const __m128i A1 = _mm_unpacklo_epi8(A0, zero);
const __m128i B1 = _mm_unpacklo_epi8(B0, zero);
const __m128i C1 = _mm_unpacklo_epi8(C0, zero);
const __m128i E = _mm_add_epi16(A1, B1);
const __m128i F = _mm_sub_epi16(E, C1);
const __m128i G = _mm_packus_epi16(F, zero);
const __m128i H = _mm_sub_epi8(D, G);
_mm_storel_epi64((__m128i*)(out + i), H);
}
for (; i < length; ++i) {
Oct 26, 2018
Oct 26, 2018
166
out[i] = row[i] - GradientPredictor_SSE2(row[i - 1], top[i], top[i - 1]);
167
168
169
}
}
Oct 26, 2018
Oct 26, 2018
170
171
172
173
static WEBP_INLINE void DoGradientFilter_SSE2(const uint8_t* in,
int width, int height, int stride,
int row, int num_rows,
uint8_t* out) {
174
175
176
177
178
179
180
181
182
const size_t start_offset = row * stride;
const int last_row = row + num_rows;
SANITY_CHECK(in, out);
in += start_offset;
out += start_offset;
// left prediction for top scan-line
if (row == 0) {
out[0] = in[0];
Oct 26, 2018
Oct 26, 2018
183
PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
184
185
186
187
188
189
190
191
row = 1;
in += stride;
out += stride;
}
// Filter line-by-line.
while (row < last_row) {
out[0] = in[0] - in[-stride];
Oct 26, 2018
Oct 26, 2018
192
GradientPredictDirect_SSE2(in + 1, in + 1 - stride, out + 1, width - 1);
193
194
195
196
197
198
199
200
201
202
++row;
in += stride;
out += stride;
}
}
#undef SANITY_CHECK
//------------------------------------------------------------------------------
Oct 26, 2018
Oct 26, 2018
203
204
205
206
static void HorizontalFilter_SSE2(const uint8_t* data, int width, int height,
int stride, uint8_t* filtered_data) {
DoHorizontalFilter_SSE2(data, width, height, stride, 0, height,
filtered_data);
207
208
}
Oct 26, 2018
Oct 26, 2018
209
210
211
static void VerticalFilter_SSE2(const uint8_t* data, int width, int height,
int stride, uint8_t* filtered_data) {
DoVerticalFilter_SSE2(data, width, height, stride, 0, height, filtered_data);
212
213
}
Oct 26, 2018
Oct 26, 2018
214
215
216
static void GradientFilter_SSE2(const uint8_t* data, int width, int height,
int stride, uint8_t* filtered_data) {
DoGradientFilter_SSE2(data, width, height, stride, 0, height, filtered_data);
217
218
219
220
221
}
//------------------------------------------------------------------------------
// Inverse transforms
Oct 26, 2018
Oct 26, 2018
222
223
static void HorizontalUnfilter_SSE2(const uint8_t* prev, const uint8_t* in,
uint8_t* out, int width) {
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
int i;
__m128i last;
out[0] = in[0] + (prev == NULL ? 0 : prev[0]);
if (width <= 1) return;
last = _mm_set_epi32(0, 0, 0, out[0]);
for (i = 1; i + 8 <= width; i += 8) {
const __m128i A0 = _mm_loadl_epi64((const __m128i*)(in + i));
const __m128i A1 = _mm_add_epi8(A0, last);
const __m128i A2 = _mm_slli_si128(A1, 1);
const __m128i A3 = _mm_add_epi8(A1, A2);
const __m128i A4 = _mm_slli_si128(A3, 2);
const __m128i A5 = _mm_add_epi8(A3, A4);
const __m128i A6 = _mm_slli_si128(A5, 4);
const __m128i A7 = _mm_add_epi8(A5, A6);
_mm_storel_epi64((__m128i*)(out + i), A7);
last = _mm_srli_epi64(A7, 56);
}
for (; i < width; ++i) out[i] = in[i] + out[i - 1];
}
Oct 26, 2018
Oct 26, 2018
244
245
static void VerticalUnfilter_SSE2(const uint8_t* prev, const uint8_t* in,
uint8_t* out, int width) {
246
if (prev == NULL) {
Oct 26, 2018
Oct 26, 2018
247
HorizontalUnfilter_SSE2(NULL, in, out, width);
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
} else {
int i;
const int max_pos = width & ~31;
assert(width >= 0);
for (i = 0; i < max_pos; i += 32) {
const __m128i A0 = _mm_loadu_si128((const __m128i*)&in[i + 0]);
const __m128i A1 = _mm_loadu_si128((const __m128i*)&in[i + 16]);
const __m128i B0 = _mm_loadu_si128((const __m128i*)&prev[i + 0]);
const __m128i B1 = _mm_loadu_si128((const __m128i*)&prev[i + 16]);
const __m128i C0 = _mm_add_epi8(A0, B0);
const __m128i C1 = _mm_add_epi8(A1, B1);
_mm_storeu_si128((__m128i*)&out[i + 0], C0);
_mm_storeu_si128((__m128i*)&out[i + 16], C1);
}
for (; i < width; ++i) out[i] = in[i] + prev[i];
}
}
Oct 26, 2018
Oct 26, 2018
266
267
268
static void GradientPredictInverse_SSE2(const uint8_t* const in,
const uint8_t* const top,
uint8_t* const row, int length) {
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
if (length > 0) {
int i;
const int max_pos = length & ~7;
const __m128i zero = _mm_setzero_si128();
__m128i A = _mm_set_epi32(0, 0, 0, row[-1]); // left sample
for (i = 0; i < max_pos; i += 8) {
const __m128i tmp0 = _mm_loadl_epi64((const __m128i*)&top[i]);
const __m128i tmp1 = _mm_loadl_epi64((const __m128i*)&top[i - 1]);
const __m128i B = _mm_unpacklo_epi8(tmp0, zero);
const __m128i C = _mm_unpacklo_epi8(tmp1, zero);
const __m128i D = _mm_loadl_epi64((const __m128i*)&in[i]); // base input
const __m128i E = _mm_sub_epi16(B, C); // unclipped gradient basis B - C
__m128i out = zero; // accumulator for output
__m128i mask_hi = _mm_set_epi32(0, 0, 0, 0xff);
int k = 8;
while (1) {
const __m128i tmp3 = _mm_add_epi16(A, E); // delta = A + B - C
const __m128i tmp4 = _mm_packus_epi16(tmp3, zero); // saturate delta
const __m128i tmp5 = _mm_add_epi8(tmp4, D); // add to in[]
A = _mm_and_si128(tmp5, mask_hi); // 1-complement clip
out = _mm_or_si128(out, A); // accumulate output
if (--k == 0) break;
A = _mm_slli_si128(A, 1); // rotate left sample
mask_hi = _mm_slli_si128(mask_hi, 1); // rotate mask
A = _mm_unpacklo_epi8(A, zero); // convert 8b->16b
}
A = _mm_srli_si128(A, 7); // prepare left sample for next iteration
_mm_storel_epi64((__m128i*)&row[i], out);
}
for (; i < length; ++i) {
Oct 26, 2018
Oct 26, 2018
299
row[i] = in[i] + GradientPredictor_SSE2(row[i - 1], top[i], top[i - 1]);
300
301
302
303
}
}
}
Oct 26, 2018
Oct 26, 2018
304
305
static void GradientUnfilter_SSE2(const uint8_t* prev, const uint8_t* in,
uint8_t* out, int width) {
306
if (prev == NULL) {
Oct 26, 2018
Oct 26, 2018
307
HorizontalUnfilter_SSE2(NULL, in, out, width);
308
309
} else {
out[0] = in[0] + prev[0]; // predict from above
Oct 26, 2018
Oct 26, 2018
310
GradientPredictInverse_SSE2(in + 1, prev + 1, out + 1, width - 1);
311
312
313
314
315
316
317
318
319
}
}
//------------------------------------------------------------------------------
// Entry point
extern void VP8FiltersInitSSE2(void);
WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitSSE2(void) {
Oct 26, 2018
Oct 26, 2018
320
321
322
WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_SSE2;
WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_SSE2;
WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter_SSE2;
323
Oct 26, 2018
Oct 26, 2018
324
325
326
WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter_SSE2;
WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter_SSE2;
WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter_SSE2;
327
328
329
330
331
332
333
}
#else // !WEBP_USE_SSE2
WEBP_DSP_INIT_STUB(VP8FiltersInitSSE2)
#endif // WEBP_USE_SSE2