## ffmpeg / libavcodec / simple_idct.c @ d4c5d2ad

History | View | Annotate | Download (15.6 KB)

1 | 37e8dcda | Arpi | ```
/*
``` |
---|---|---|---|

2 | ff4ec49e | Fabrice Bellard | ```
* Simple IDCT
``` |

3 | ```
*
``` |
||

4 | ```
* Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
``` |
||

5 | ```
*
``` |
||

6 | ```
* This library is free software; you can redistribute it and/or
``` |
||

7 | ```
* modify it under the terms of the GNU Lesser General Public
``` |
||

8 | ```
* License as published by the Free Software Foundation; either
``` |
||

9 | ```
* version 2 of the License, or (at your option) any later version.
``` |
||

10 | ```
*
``` |
||

11 | ```
* This library is distributed in the hope that it will be useful,
``` |
||

12 | ```
* but WITHOUT ANY WARRANTY; without even the implied warranty of
``` |
||

13 | ```
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
``` |
||

14 | ```
* Lesser General Public License for more details.
``` |
||

15 | ```
*
``` |
||

16 | ```
* You should have received a copy of the GNU Lesser General Public
``` |
||

17 | ```
* License along with this library; if not, write to the Free Software
``` |
||

18 | ```
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
``` |
||

19 | ```
*/
``` |
||

20 | 983e3246 | Michael Niedermayer | |

21 | ```
/**
``` |
||

22 | ```
* @file simple_idct.c
``` |
||

23 | ```
* simpleidct in C.
``` |
||

24 | ```
*/
``` |
||

25 | |||

26 | 37e8dcda | Arpi | ```
/*
``` |

27 | ff4ec49e | Fabrice Bellard | ```
based upon some outcommented c code from mpeg2dec (idct_mmx.c
``` |

28 | ```
written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>)
``` |
||

29 | ```
*/
``` |
||

30 | 6000abfa | Fabrice Bellard | #include "avcodec.h" |

31 | d36a2466 | Fabrice Bellard | #include "dsputil.h" |

32 | 37e8dcda | Arpi | #include "simple_idct.h" |

33 | |||

34 | ```
#if 0
``` |
||

35 | ```
#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
``` |
||

36 | ```
#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
``` |
||

37 | ```
#define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
``` |
||

38 | ```
#define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
``` |
||

39 | ```
#define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
``` |
||

40 | ```
#define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
``` |
||

41 | ```
#define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */
``` |
||

42 | ```
#define ROW_SHIFT 8
``` |
||

43 | ```
#define COL_SHIFT 17
``` |
||

44 | ```
#else
``` |
||

45 | #define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
||

46 | #define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
||

47 | #define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
||

48 | ccf589a8 | Michael Niedermayer | #define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |

49 | 37e8dcda | Arpi | #define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |

50 | #define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
||

51 | #define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
||

52 | #define ROW_SHIFT 11 |
||

53 | #define COL_SHIFT 20 // 6 |
||

54 | ```
#endif
``` |
||

55 | 4973971e | Michael Niedermayer | |

56 | 412ba501 | Fabrice Bellard | ```
#if defined(ARCH_POWERPC_405)
``` |

57 | 37e8dcda | Arpi | |

58 | 412ba501 | Fabrice Bellard | ```
/* signed 16x16 -> 32 multiply add accumulate */
``` |

59 | ```
#define MAC16(rt, ra, rb) \
``` |
||

60 | asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb)); |
||

61 | 37e8dcda | Arpi | |

62 | 412ba501 | Fabrice Bellard | ```
/* signed 16x16 -> 32 multiply */
``` |

63 | ```
#define MUL16(rt, ra, rb) \
``` |
||

64 | asm ("mullhw %0, %1, %2" : "=r" (rt) : "r" (ra), "r" (rb)); |
||

65 | |||

66 | ```
#else
``` |
||

67 | |||

68 | ```
/* signed 16x16 -> 32 multiply add accumulate */
``` |
||

69 | ```
#define MAC16(rt, ra, rb) rt += (ra) * (rb)
``` |
||

70 | |||

71 | ```
/* signed 16x16 -> 32 multiply */
``` |
||

72 | ```
#define MUL16(rt, ra, rb) rt = (ra) * (rb)
``` |
||

73 | |||

74 | ```
#endif
``` |
||

75 | 37e8dcda | Arpi | |

76 | 0e15384d | Michael Niedermayer | static inline void idctRowCondDC (DCTELEM * row) |

77 | 37e8dcda | Arpi | { |

78 | ```
int a0, a1, a2, a3, b0, b1, b2, b3;
``` |
||

79 | 412ba501 | Fabrice Bellard | ```
#ifdef FAST_64BIT
``` |

80 | uint64_t temp; |
||

81 | ```
#else
``` |
||

82 | uint32_t temp; |
||

83 | ```
#endif
``` |
||

84 | 4973971e | Michael Niedermayer | |

85 | 412ba501 | Fabrice Bellard | ```
#ifdef FAST_64BIT
``` |

86 | ```
#ifdef WORDS_BIGENDIAN
``` |
||

87 | #define ROW0_MASK 0xffff000000000000LL |
||

88 | ```
#else
``` |
||

89 | #define ROW0_MASK 0xffffLL |
||

90 | ```
#endif
``` |
||

91 | 0e15384d | Michael Niedermayer | if(sizeof(DCTELEM)==2){ |

92 | if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) | |
||

93 | ((uint64_t *)row)[1]) == 0) { |
||

94 | temp = (row[0] << 3) & 0xffff; |
||

95 | ```
temp += temp << 16;
``` |
||

96 | ```
temp += temp << 32;
``` |
||

97 | ```
((uint64_t *)row)[0] = temp;
``` |
||

98 | ```
((uint64_t *)row)[1] = temp;
``` |
||

99 | ```
return;
``` |
||

100 | } |
||

101 | ```
}else{
``` |
||

102 | if (!(row[1]|row[2]|row[3]|row[4]|row[5]|row[6]|row[7])) { |
||

103 | row[0]=row[1]=row[2]=row[3]=row[4]=row[5]=row[6]=row[7]= row[0] << 3; |
||

104 | ```
return;
``` |
||

105 | } |
||

106 | } |
||

107 | 412ba501 | Fabrice Bellard | ```
#else
``` |

108 | 0e15384d | Michael Niedermayer | if(sizeof(DCTELEM)==2){ |

109 | if (!(((uint32_t*)row)[1] | |
||

110 | ```
((uint32_t*)row)[2] |
``` |
||

111 | ```
((uint32_t*)row)[3] |
``` |
||

112 | ```
row[1])) {
``` |
||

113 | temp = (row[0] << 3) & 0xffff; |
||

114 | ```
temp += temp << 16;
``` |
||

115 | ((uint32_t*)row)[0]=((uint32_t*)row)[1] = |
||

116 | ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp; |
||

117 | ```
return;
``` |
||

118 | } |
||

119 | ```
}else{
``` |
||

120 | if (!(row[1]|row[2]|row[3]|row[4]|row[5]|row[6]|row[7])) { |
||

121 | row[0]=row[1]=row[2]=row[3]=row[4]=row[5]=row[6]=row[7]= row[0] << 3; |
||

122 | ```
return;
``` |
||

123 | } |
||

124 | } |
||

125 | 412ba501 | Fabrice Bellard | ```
#endif
``` |

126 | 4973971e | Michael Niedermayer | |

127 | 412ba501 | Fabrice Bellard | a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1)); |

128 | a1 = a0; |
||

129 | a2 = a0; |
||

130 | a3 = a0; |
||

131 | |||

132 | ```
/* no need to optimize : gcc does it */
``` |
||

133 | ```
a0 += W2 * row[2];
``` |
||

134 | ```
a1 += W6 * row[2];
``` |
||

135 | ```
a2 -= W6 * row[2];
``` |
||

136 | ```
a3 -= W2 * row[2];
``` |
||

137 | |||

138 | ```
MUL16(b0, W1, row[1]);
``` |
||

139 | ```
MAC16(b0, W3, row[3]);
``` |
||

140 | ```
MUL16(b1, W3, row[1]);
``` |
||

141 | ```
MAC16(b1, -W7, row[3]);
``` |
||

142 | ```
MUL16(b2, W5, row[1]);
``` |
||

143 | ```
MAC16(b2, -W1, row[3]);
``` |
||

144 | ```
MUL16(b3, W7, row[1]);
``` |
||

145 | ```
MAC16(b3, -W5, row[3]);
``` |
||

146 | |||

147 | ```
#ifdef FAST_64BIT
``` |
||

148 | ```
temp = ((uint64_t*)row)[1];
``` |
||

149 | ```
#else
``` |
||

150 | temp = ((uint32_t*)row)[2] | ((uint32_t*)row)[3]; |
||

151 | ```
#endif
``` |
||

152 | if (temp != 0) { |
||

153 | a0 += W4*row[4] + W6*row[6]; |
||

154 | a1 += - W4*row[4] - W2*row[6]; |
||

155 | a2 += - W4*row[4] + W2*row[6]; |
||

156 | a3 += W4*row[4] - W6*row[6]; |
||

157 | |||

158 | ```
MAC16(b0, W5, row[5]);
``` |
||

159 | ```
MAC16(b0, W7, row[7]);
``` |
||

160 | |||

161 | ```
MAC16(b1, -W1, row[5]);
``` |
||

162 | ```
MAC16(b1, -W5, row[7]);
``` |
||

163 | |||

164 | ```
MAC16(b2, W7, row[5]);
``` |
||

165 | ```
MAC16(b2, W3, row[7]);
``` |
||

166 | |||

167 | ```
MAC16(b3, W3, row[5]);
``` |
||

168 | ```
MAC16(b3, -W1, row[7]);
``` |
||

169 | 4973971e | Michael Niedermayer | } |

170 | |||

171 | ```
row[0] = (a0 + b0) >> ROW_SHIFT;
``` |
||

172 | ```
row[7] = (a0 - b0) >> ROW_SHIFT;
``` |
||

173 | ```
row[1] = (a1 + b1) >> ROW_SHIFT;
``` |
||

174 | ```
row[6] = (a1 - b1) >> ROW_SHIFT;
``` |
||

175 | ```
row[2] = (a2 + b2) >> ROW_SHIFT;
``` |
||

176 | ```
row[5] = (a2 - b2) >> ROW_SHIFT;
``` |
||

177 | ```
row[3] = (a3 + b3) >> ROW_SHIFT;
``` |
||

178 | ```
row[4] = (a3 - b3) >> ROW_SHIFT;
``` |
||

179 | } |
||

180 | |||

181 | 0c1a9eda | Zdenek Kabelac | static inline void idctSparseColPut (uint8_t *dest, int line_size, |

182 | 0e15384d | Michael Niedermayer | DCTELEM * col) |

183 | d36a2466 | Fabrice Bellard | { |

184 | ```
int a0, a1, a2, a3, b0, b1, b2, b3;
``` |
||

185 | 0c1a9eda | Zdenek Kabelac | uint8_t *cm = cropTbl + MAX_NEG_CROP; |

186 | d36a2466 | Fabrice Bellard | |

187 | ```
/* XXX: I did that only to give same values as previous code */
``` |
||

188 | a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); |
||

189 | a1 = a0; |
||

190 | a2 = a0; |
||

191 | a3 = a0; |
||

192 | |||

193 | a0 += + W2*col[8*2]; |
||

194 | a1 += + W6*col[8*2]; |
||

195 | a2 += - W6*col[8*2]; |
||

196 | a3 += - W2*col[8*2]; |
||

197 | |||

198 | MUL16(b0, W1, col[8*1]); |
||

199 | MUL16(b1, W3, col[8*1]); |
||

200 | MUL16(b2, W5, col[8*1]); |
||

201 | MUL16(b3, W7, col[8*1]); |
||

202 | |||

203 | MAC16(b0, + W3, col[8*3]); |
||

204 | MAC16(b1, - W7, col[8*3]); |
||

205 | MAC16(b2, - W1, col[8*3]); |
||

206 | MAC16(b3, - W5, col[8*3]); |
||

207 | |||

208 | if(col[8*4]){ |
||

209 | a0 += + W4*col[8*4]; |
||

210 | a1 += - W4*col[8*4]; |
||

211 | a2 += - W4*col[8*4]; |
||

212 | a3 += + W4*col[8*4]; |
||

213 | } |
||

214 | |||

215 | if (col[8*5]) { |
||

216 | MAC16(b0, + W5, col[8*5]); |
||

217 | MAC16(b1, - W1, col[8*5]); |
||

218 | MAC16(b2, + W7, col[8*5]); |
||

219 | MAC16(b3, + W3, col[8*5]); |
||

220 | } |
||

221 | |||

222 | if(col[8*6]){ |
||

223 | a0 += + W6*col[8*6]; |
||

224 | a1 += - W2*col[8*6]; |
||

225 | a2 += + W2*col[8*6]; |
||

226 | a3 += - W6*col[8*6]; |
||

227 | } |
||

228 | |||

229 | if (col[8*7]) { |
||

230 | MAC16(b0, + W7, col[8*7]); |
||

231 | MAC16(b1, - W5, col[8*7]); |
||

232 | MAC16(b2, + W3, col[8*7]); |
||

233 | MAC16(b3, - W1, col[8*7]); |
||

234 | } |
||

235 | |||

236 | ```
dest[0] = cm[(a0 + b0) >> COL_SHIFT];
``` |
||

237 | dest += line_size; |
||

238 | ```
dest[0] = cm[(a1 + b1) >> COL_SHIFT];
``` |
||

239 | dest += line_size; |
||

240 | ```
dest[0] = cm[(a2 + b2) >> COL_SHIFT];
``` |
||

241 | dest += line_size; |
||

242 | ```
dest[0] = cm[(a3 + b3) >> COL_SHIFT];
``` |
||

243 | dest += line_size; |
||

244 | ```
dest[0] = cm[(a3 - b3) >> COL_SHIFT];
``` |
||

245 | dest += line_size; |
||

246 | ```
dest[0] = cm[(a2 - b2) >> COL_SHIFT];
``` |
||

247 | dest += line_size; |
||

248 | ```
dest[0] = cm[(a1 - b1) >> COL_SHIFT];
``` |
||

249 | dest += line_size; |
||

250 | ```
dest[0] = cm[(a0 - b0) >> COL_SHIFT];
``` |
||

251 | } |
||

252 | |||

253 | 0c1a9eda | Zdenek Kabelac | static inline void idctSparseColAdd (uint8_t *dest, int line_size, |

254 | 0e15384d | Michael Niedermayer | DCTELEM * col) |

255 | 37e8dcda | Arpi | { |

256 | 4973971e | Michael Niedermayer | ```
int a0, a1, a2, a3, b0, b1, b2, b3;
``` |

257 | 0c1a9eda | Zdenek Kabelac | uint8_t *cm = cropTbl + MAX_NEG_CROP; |

258 | 37e8dcda | Arpi | |

259 | 412ba501 | Fabrice Bellard | ```
/* XXX: I did that only to give same values as previous code */
``` |

260 | a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); |
||

261 | a1 = a0; |
||

262 | a2 = a0; |
||

263 | a3 = a0; |
||

264 | 37e8dcda | Arpi | |

265 | 412ba501 | Fabrice Bellard | a0 += + W2*col[8*2]; |

266 | a1 += + W6*col[8*2]; |
||

267 | a2 += - W6*col[8*2]; |
||

268 | a3 += - W2*col[8*2]; |
||

269 | 4973971e | Michael Niedermayer | |

270 | 412ba501 | Fabrice Bellard | MUL16(b0, W1, col[8*1]); |

271 | MUL16(b1, W3, col[8*1]); |
||

272 | MUL16(b2, W5, col[8*1]); |
||

273 | MUL16(b3, W7, col[8*1]); |
||

274 | 37e8dcda | Arpi | |

275 | 412ba501 | Fabrice Bellard | MAC16(b0, + W3, col[8*3]); |

276 | MAC16(b1, - W7, col[8*3]); |
||

277 | MAC16(b2, - W1, col[8*3]); |
||

278 | MAC16(b3, - W5, col[8*3]); |
||

279 | 4973971e | Michael Niedermayer | |

280 | if(col[8*4]){ |
||

281 | 412ba501 | Fabrice Bellard | a0 += + W4*col[8*4]; |

282 | a1 += - W4*col[8*4]; |
||

283 | a2 += - W4*col[8*4]; |
||

284 | a3 += + W4*col[8*4]; |
||

285 | 4973971e | Michael Niedermayer | } |

286 | |||

287 | 412ba501 | Fabrice Bellard | if (col[8*5]) { |

288 | MAC16(b0, + W5, col[8*5]); |
||

289 | MAC16(b1, - W1, col[8*5]); |
||

290 | MAC16(b2, + W7, col[8*5]); |
||

291 | MAC16(b3, + W3, col[8*5]); |
||

292 | 4973971e | Michael Niedermayer | } |

293 | |||

294 | 412ba501 | Fabrice Bellard | if(col[8*6]){ |

295 | a0 += + W6*col[8*6]; |
||

296 | a1 += - W2*col[8*6]; |
||

297 | a2 += + W2*col[8*6]; |
||

298 | a3 += - W6*col[8*6]; |
||

299 | } |
||

300 | |||

301 | if (col[8*7]) { |
||

302 | MAC16(b0, + W7, col[8*7]); |
||

303 | MAC16(b1, - W5, col[8*7]); |
||

304 | MAC16(b2, + W3, col[8*7]); |
||

305 | MAC16(b3, - W1, col[8*7]); |
||

306 | } |
||

307 | |||

308 | d36a2466 | Fabrice Bellard | dest[0] = cm[dest[0] + ((a0 + b0) >> COL_SHIFT)]; |

309 | dest += line_size; |
||

310 | dest[0] = cm[dest[0] + ((a1 + b1) >> COL_SHIFT)]; |
||

311 | dest += line_size; |
||

312 | dest[0] = cm[dest[0] + ((a2 + b2) >> COL_SHIFT)]; |
||

313 | dest += line_size; |
||

314 | dest[0] = cm[dest[0] + ((a3 + b3) >> COL_SHIFT)]; |
||

315 | dest += line_size; |
||

316 | dest[0] = cm[dest[0] + ((a3 - b3) >> COL_SHIFT)]; |
||

317 | dest += line_size; |
||

318 | dest[0] = cm[dest[0] + ((a2 - b2) >> COL_SHIFT)]; |
||

319 | dest += line_size; |
||

320 | dest[0] = cm[dest[0] + ((a1 - b1) >> COL_SHIFT)]; |
||

321 | dest += line_size; |
||

322 | dest[0] = cm[dest[0] + ((a0 - b0) >> COL_SHIFT)]; |
||

323 | 37e8dcda | Arpi | } |

324 | |||

325 | 0e15384d | Michael Niedermayer | static inline void idctSparseCol (DCTELEM * col) |

326 | 86748dbc | Michael Niedermayer | { |

327 | ```
int a0, a1, a2, a3, b0, b1, b2, b3;
``` |
||

328 | |||

329 | ```
/* XXX: I did that only to give same values as previous code */
``` |
||

330 | a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); |
||

331 | a1 = a0; |
||

332 | a2 = a0; |
||

333 | a3 = a0; |
||

334 | |||

335 | a0 += + W2*col[8*2]; |
||

336 | a1 += + W6*col[8*2]; |
||

337 | a2 += - W6*col[8*2]; |
||

338 | a3 += - W2*col[8*2]; |
||

339 | |||

340 | MUL16(b0, W1, col[8*1]); |
||

341 | MUL16(b1, W3, col[8*1]); |
||

342 | MUL16(b2, W5, col[8*1]); |
||

343 | MUL16(b3, W7, col[8*1]); |
||

344 | |||

345 | MAC16(b0, + W3, col[8*3]); |
||

346 | MAC16(b1, - W7, col[8*3]); |
||

347 | MAC16(b2, - W1, col[8*3]); |
||

348 | MAC16(b3, - W5, col[8*3]); |
||

349 | |||

350 | if(col[8*4]){ |
||

351 | a0 += + W4*col[8*4]; |
||

352 | a1 += - W4*col[8*4]; |
||

353 | a2 += - W4*col[8*4]; |
||

354 | a3 += + W4*col[8*4]; |
||

355 | } |
||

356 | |||

357 | if (col[8*5]) { |
||

358 | MAC16(b0, + W5, col[8*5]); |
||

359 | MAC16(b1, - W1, col[8*5]); |
||

360 | MAC16(b2, + W7, col[8*5]); |
||

361 | MAC16(b3, + W3, col[8*5]); |
||

362 | } |
||

363 | |||

364 | if(col[8*6]){ |
||

365 | a0 += + W6*col[8*6]; |
||

366 | a1 += - W2*col[8*6]; |
||

367 | a2 += + W2*col[8*6]; |
||

368 | a3 += - W6*col[8*6]; |
||

369 | } |
||

370 | |||

371 | if (col[8*7]) { |
||

372 | MAC16(b0, + W7, col[8*7]); |
||

373 | MAC16(b1, - W5, col[8*7]); |
||

374 | MAC16(b2, + W3, col[8*7]); |
||

375 | MAC16(b3, - W1, col[8*7]); |
||

376 | } |
||

377 | |||

378 | ```
col[0 ] = ((a0 + b0) >> COL_SHIFT);
``` |
||

379 | ```
col[8 ] = ((a1 + b1) >> COL_SHIFT);
``` |
||

380 | ```
col[16] = ((a2 + b2) >> COL_SHIFT);
``` |
||

381 | ```
col[24] = ((a3 + b3) >> COL_SHIFT);
``` |
||

382 | ```
col[32] = ((a3 - b3) >> COL_SHIFT);
``` |
||

383 | ```
col[40] = ((a2 - b2) >> COL_SHIFT);
``` |
||

384 | ```
col[48] = ((a1 - b1) >> COL_SHIFT);
``` |
||

385 | ```
col[56] = ((a0 - b0) >> COL_SHIFT);
``` |
||

386 | } |
||

387 | |||

388 | 0c1a9eda | Zdenek Kabelac | void simple_idct_put(uint8_t *dest, int line_size, DCTELEM *block) |

389 | d36a2466 | Fabrice Bellard | { |

390 | ```
int i;
``` |
||

391 | for(i=0; i<8; i++) |
||

392 | ```
idctRowCondDC(block + i*8);
``` |
||

393 | |||

394 | for(i=0; i<8; i++) |
||

395 | idctSparseColPut(dest + i, line_size, block + i); |
||

396 | } |
||

397 | |||

398 | 0c1a9eda | Zdenek Kabelac | void simple_idct_add(uint8_t *dest, int line_size, DCTELEM *block) |

399 | 412ba501 | Fabrice Bellard | { |

400 | ```
int i;
``` |
||

401 | for(i=0; i<8; i++) |
||

402 | ```
idctRowCondDC(block + i*8);
``` |
||

403 | |||

404 | for(i=0; i<8; i++) |
||

405 | d36a2466 | Fabrice Bellard | idctSparseColAdd(dest + i, line_size, block + i); |

406 | 4973971e | Michael Niedermayer | } |

407 | cd4af68a | Zdenek Kabelac | |

408 | 0e15384d | Michael Niedermayer | ```
void simple_idct(DCTELEM *block)
``` |

409 | 86748dbc | Michael Niedermayer | { |

410 | ```
int i;
``` |
||

411 | for(i=0; i<8; i++) |
||

412 | ```
idctRowCondDC(block + i*8);
``` |
||

413 | |||

414 | for(i=0; i<8; i++) |
||

415 | idctSparseCol(block + i); |
||

416 | } |
||

417 | |||

418 | 9bf71516 | Fabrice Bellard | ```
/* 2x4x8 idct */
``` |

419 | |||

420 | #define CN_SHIFT 12 |
||

421 | #define C_FIX(x) ((int)((x) * (1 << CN_SHIFT) + 0.5)) |
||

422 | 652f0197 | Fabrice Bellard | #define C1 C_FIX(0.6532814824) |

423 | #define C2 C_FIX(0.2705980501) |
||

424 | 9bf71516 | Fabrice Bellard | |

425 | 652f0197 | Fabrice Bellard | ```
/* row idct is multiple by 16 * sqrt(2.0), col idct4 is normalized,
``` |

426 | ```
and the butterfly must be multiplied by 0.5 * sqrt(2.0) */
``` |
||

427 | #define C_SHIFT (4+1+12) |
||

428 | 9bf71516 | Fabrice Bellard | |

429 | 0c1a9eda | Zdenek Kabelac | static inline void idct4col(uint8_t *dest, int line_size, const DCTELEM *col) |

430 | 9bf71516 | Fabrice Bellard | { |

431 | ```
int c0, c1, c2, c3, a0, a1, a2, a3;
``` |
||

432 | 0c1a9eda | Zdenek Kabelac | ```
const uint8_t *cm = cropTbl + MAX_NEG_CROP;
``` |

433 | 9bf71516 | Fabrice Bellard | |

434 | a0 = col[8*0]; |
||

435 | a1 = col[8*2]; |
||

436 | a2 = col[8*4]; |
||

437 | a3 = col[8*6]; |
||

438 | 652f0197 | Fabrice Bellard | c0 = ((a0 + a2) << (CN_SHIFT - 1)) + (1 << (C_SHIFT - 1)); |

439 | c2 = ((a0 - a2) << (CN_SHIFT - 1)) + (1 << (C_SHIFT - 1)); |
||

440 | 9bf71516 | Fabrice Bellard | c1 = a1 * C1 + a3 * C2; |

441 | c3 = a1 * C2 - a3 * C1; |
||

442 | ```
dest[0] = cm[(c0 + c1) >> C_SHIFT];
``` |
||

443 | dest += line_size; |
||

444 | ```
dest[0] = cm[(c2 + c3) >> C_SHIFT];
``` |
||

445 | dest += line_size; |
||

446 | ```
dest[0] = cm[(c2 - c3) >> C_SHIFT];
``` |
||

447 | dest += line_size; |
||

448 | ```
dest[0] = cm[(c0 - c1) >> C_SHIFT];
``` |
||

449 | } |
||

450 | |||

451 | ```
#define BF(k) \
``` |
||

452 | {\ |
||

453 | ```
int a0, a1;\
``` |
||

454 | a0 = ptr[k];\ |
||

455 | ```
a1 = ptr[8 + k];\
``` |
||

456 | ptr[k] = a0 + a1;\ |
||

457 | ```
ptr[8 + k] = a0 - a1;\
``` |
||

458 | } |
||

459 | |||

460 | ```
/* only used by DV codec. The input must be interlaced. 128 is added
``` |
||

461 | ```
to the pixels before clamping to avoid systematic error
``` |
||

462 | ```
(1024*sqrt(2)) offset would be needed otherwise. */
``` |
||

463 | ```
/* XXX: I think a 1.0/sqrt(2) normalization should be needed to
``` |
||

464 | ```
compensate the extra butterfly stage - I don't have the full DV
``` |
||

465 | ```
specification */
``` |
||

466 | 0c1a9eda | Zdenek Kabelac | void simple_idct248_put(uint8_t *dest, int line_size, DCTELEM *block) |

467 | 9bf71516 | Fabrice Bellard | { |

468 | ```
int i;
``` |
||

469 | 0e15384d | Michael Niedermayer | DCTELEM *ptr; |

470 | 9bf71516 | Fabrice Bellard | |

471 | ```
/* butterfly */
``` |
||

472 | ptr = block; |
||

473 | for(i=0;i<4;i++) { |
||

474 | ```
BF(0);
``` |
||

475 | ```
BF(1);
``` |
||

476 | ```
BF(2);
``` |
||

477 | ```
BF(3);
``` |
||

478 | ```
BF(4);
``` |
||

479 | ```
BF(5);
``` |
||

480 | ```
BF(6);
``` |
||

481 | ```
BF(7);
``` |
||

482 | ptr += 2 * 8; |
||

483 | } |
||

484 | |||

485 | ```
/* IDCT8 on each line */
``` |
||

486 | for(i=0; i<8; i++) { |
||

487 | ```
idctRowCondDC(block + i*8);
``` |
||

488 | } |
||

489 | |||

490 | ```
/* IDCT4 and store */
``` |
||

491 | for(i=0;i<8;i++) { |
||

492 | ```
idct4col(dest + i, 2 * line_size, block + i);
``` |
||

493 | idct4col(dest + line_size + i, 2 * line_size, block + 8 + i); |
||

494 | } |
||

495 | } |
||

496 | 1457ab52 | Michael Niedermayer | |

497 | ```
/* 8x4 & 4x8 WMV2 IDCT */
``` |
||

498 | ```
#undef CN_SHIFT
``` |
||

499 | ```
#undef C_SHIFT
``` |
||

500 | ```
#undef C_FIX
``` |
||

501 | ```
#undef C1
``` |
||

502 | ```
#undef C2
``` |
||

503 | #define CN_SHIFT 12 |
||

504 | #define C_FIX(x) ((int)((x) * 1.414213562 * (1 << CN_SHIFT) + 0.5)) |
||

505 | #define C1 C_FIX(0.6532814824) |
||

506 | #define C2 C_FIX(0.2705980501) |
||

507 | #define C3 C_FIX(0.5) |
||

508 | #define C_SHIFT (4+1+12) |
||

509 | 0c1a9eda | Zdenek Kabelac | static inline void idct4col_add(uint8_t *dest, int line_size, const DCTELEM *col) |

510 | 1457ab52 | Michael Niedermayer | { |

511 | ```
int c0, c1, c2, c3, a0, a1, a2, a3;
``` |
||

512 | 0c1a9eda | Zdenek Kabelac | ```
const uint8_t *cm = cropTbl + MAX_NEG_CROP;
``` |

513 | 1457ab52 | Michael Niedermayer | |

514 | a0 = col[8*0]; |
||

515 | a1 = col[8*1]; |
||

516 | a2 = col[8*2]; |
||

517 | a3 = col[8*3]; |
||

518 | c0 = (a0 + a2)*C3 + (1 << (C_SHIFT - 1)); |
||

519 | c2 = (a0 - a2)*C3 + (1 << (C_SHIFT - 1)); |
||

520 | c1 = a1 * C1 + a3 * C2; |
||

521 | c3 = a1 * C2 - a3 * C1; |
||

522 | dest[0] = cm[dest[0] + ((c0 + c1) >> C_SHIFT)]; |
||

523 | dest += line_size; |
||

524 | dest[0] = cm[dest[0] + ((c2 + c3) >> C_SHIFT)]; |
||

525 | dest += line_size; |
||

526 | dest[0] = cm[dest[0] + ((c2 - c3) >> C_SHIFT)]; |
||

527 | dest += line_size; |
||

528 | dest[0] = cm[dest[0] + ((c0 - c1) >> C_SHIFT)]; |
||

529 | } |
||

530 | |||

531 | #define RN_SHIFT 15 |
||

532 | #define R_FIX(x) ((int)((x) * 1.414213562 * (1 << RN_SHIFT) + 0.5)) |
||

533 | #define R1 R_FIX(0.6532814824) |
||

534 | #define R2 R_FIX(0.2705980501) |
||

535 | #define R3 R_FIX(0.5) |
||

536 | #define R_SHIFT 11 |
||

537 | 0e15384d | Michael Niedermayer | static inline void idct4row(DCTELEM *row) |

538 | 1457ab52 | Michael Niedermayer | { |

539 | ```
int c0, c1, c2, c3, a0, a1, a2, a3;
``` |
||

540 | 0c1a9eda | Zdenek Kabelac | ```
//const uint8_t *cm = cropTbl + MAX_NEG_CROP;
``` |

541 | 1457ab52 | Michael Niedermayer | |

542 | ```
a0 = row[0];
``` |
||

543 | ```
a1 = row[1];
``` |
||

544 | ```
a2 = row[2];
``` |
||

545 | ```
a3 = row[3];
``` |
||

546 | c0 = (a0 + a2)*R3 + (1 << (R_SHIFT - 1)); |
||

547 | c2 = (a0 - a2)*R3 + (1 << (R_SHIFT - 1)); |
||

548 | c1 = a1 * R1 + a3 * R2; |
||

549 | c3 = a1 * R2 - a3 * R1; |
||

550 | ```
row[0]= (c0 + c1) >> R_SHIFT;
``` |
||

551 | ```
row[1]= (c2 + c3) >> R_SHIFT;
``` |
||

552 | ```
row[2]= (c2 - c3) >> R_SHIFT;
``` |
||

553 | ```
row[3]= (c0 - c1) >> R_SHIFT;
``` |
||

554 | } |
||

555 | |||

556 | 0c1a9eda | Zdenek Kabelac | void simple_idct84_add(uint8_t *dest, int line_size, DCTELEM *block) |

557 | 1457ab52 | Michael Niedermayer | { |

558 | ```
int i;
``` |
||

559 | |||

560 | ```
/* IDCT8 on each line */
``` |
||

561 | for(i=0; i<4; i++) { |
||

562 | ```
idctRowCondDC(block + i*8);
``` |
||

563 | } |
||

564 | |||

565 | ```
/* IDCT4 and store */
``` |
||

566 | for(i=0;i<8;i++) { |
||

567 | idct4col_add(dest + i, line_size, block + i); |
||

568 | } |
||

569 | } |
||

570 | |||

571 | 0c1a9eda | Zdenek Kabelac | void simple_idct48_add(uint8_t *dest, int line_size, DCTELEM *block) |

572 | 1457ab52 | Michael Niedermayer | { |

573 | ```
int i;
``` |
||

574 | |||

575 | ```
/* IDCT4 on each line */
``` |
||

576 | for(i=0; i<8; i++) { |
||

577 | ```
idct4row(block + i*8);
``` |
||

578 | } |
||

579 | |||

580 | ```
/* IDCT8 and store */
``` |
||

581 | for(i=0; i<4; i++){ |
||

582 | idctSparseColAdd(dest + i, line_size, block + i); |
||

583 | } |
||

584 | } |